1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
40};
41use datasynth_core::models::sourcing::{
42 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
43 SupplierBid, SupplierQualification, SupplierScorecard,
44};
45use datasynth_core::models::subledger::ap::APInvoice;
46use datasynth_core::models::subledger::ar::ARInvoice;
47use datasynth_core::models::*;
48use datasynth_core::traits::Generator;
49use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
50use datasynth_fingerprint::{
51 io::FingerprintReader,
52 models::Fingerprint,
53 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
54};
55use datasynth_generators::{
56 AnomalyInjector,
58 AnomalyInjectorConfig,
59 AssetGenerator,
60 AuditEngagementGenerator,
62 BalanceTrackerConfig,
63 BankReconciliationGenerator,
65 BidEvaluationGenerator,
67 BidGenerator,
68 CatalogGenerator,
69 ChartOfAccountsGenerator,
71 ContractGenerator,
72 ControlGenerator,
74 ControlGeneratorConfig,
75 CustomerGenerator,
76 DataQualityConfig,
77 DataQualityInjector,
79 DataQualityStats,
80 DocumentFlowJeConfig,
82 DocumentFlowJeGenerator,
83 DocumentFlowLinker,
85 EmployeeGenerator,
86 EsgAnomalyLabel,
88 EvidenceGenerator,
89 FinancialStatementGenerator,
91 FindingGenerator,
92 JournalEntryGenerator,
93 JudgmentGenerator,
94 LatePaymentDistribution,
95 MaterialGenerator,
96 O2CDocumentChain,
97 O2CGenerator,
98 O2CGeneratorConfig,
99 O2CPaymentBehavior,
100 P2PDocumentChain,
101 P2PGenerator,
103 P2PGeneratorConfig,
104 P2PPaymentBehavior,
105 PaymentReference,
106 QualificationGenerator,
107 RfxGenerator,
108 RiskAssessmentGenerator,
109 RunningBalanceTracker,
111 ScorecardGenerator,
112 SourcingProjectGenerator,
113 SpendAnalysisGenerator,
114 ValidationError,
115 VendorGenerator,
117 WorkpaperGenerator,
118};
119use datasynth_graph::{
120 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
121 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
122 TransactionGraphConfig,
123};
124use datasynth_ocpm::{
125 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
126 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
127 OcpmUuidFactory, P2pDocuments, S2cDocuments,
128};
129
130use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
131use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
132use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
133use datasynth_core::llm::MockLlmProvider;
134use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
135use datasynth_core::models::documents::PaymentMethod;
136use datasynth_core::models::IndustrySector;
137use datasynth_generators::coa_generator::CoAFramework;
138use datasynth_generators::llm_enrichment::VendorLlmEnricher;
139use rayon::prelude::*;
140
141fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
147 let payment_behavior = &schema_config.payment_behavior;
148 let late_dist = &payment_behavior.late_payment_days_distribution;
149
150 P2PGeneratorConfig {
151 three_way_match_rate: schema_config.three_way_match_rate,
152 partial_delivery_rate: schema_config.partial_delivery_rate,
153 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
154 price_variance_rate: schema_config.price_variance_rate,
155 max_price_variance_percent: schema_config.max_price_variance_percent,
156 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
157 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
158 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
159 payment_method_distribution: vec![
160 (PaymentMethod::BankTransfer, 0.60),
161 (PaymentMethod::Check, 0.25),
162 (PaymentMethod::Wire, 0.10),
163 (PaymentMethod::CreditCard, 0.05),
164 ],
165 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
166 payment_behavior: P2PPaymentBehavior {
167 late_payment_rate: payment_behavior.late_payment_rate,
168 late_payment_distribution: LatePaymentDistribution {
169 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
170 late_8_to_14: late_dist.late_8_to_14,
171 very_late_15_to_30: late_dist.very_late_15_to_30,
172 severely_late_31_to_60: late_dist.severely_late_31_to_60,
173 extremely_late_over_60: late_dist.extremely_late_over_60,
174 },
175 partial_payment_rate: payment_behavior.partial_payment_rate,
176 payment_correction_rate: payment_behavior.payment_correction_rate,
177 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
178 },
179 }
180}
181
182fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
184 let payment_behavior = &schema_config.payment_behavior;
185
186 O2CGeneratorConfig {
187 credit_check_failure_rate: schema_config.credit_check_failure_rate,
188 partial_shipment_rate: schema_config.partial_shipment_rate,
189 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
190 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
191 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
192 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
193 bad_debt_rate: schema_config.bad_debt_rate,
194 returns_rate: schema_config.return_rate,
195 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
196 payment_method_distribution: vec![
197 (PaymentMethod::BankTransfer, 0.50),
198 (PaymentMethod::Check, 0.30),
199 (PaymentMethod::Wire, 0.15),
200 (PaymentMethod::CreditCard, 0.05),
201 ],
202 payment_behavior: O2CPaymentBehavior {
203 partial_payment_rate: payment_behavior.partial_payments.rate,
204 short_payment_rate: payment_behavior.short_payments.rate,
205 max_short_percent: payment_behavior.short_payments.max_short_percent,
206 on_account_rate: payment_behavior.on_account_payments.rate,
207 payment_correction_rate: payment_behavior.payment_corrections.rate,
208 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
209 },
210 }
211}
212
213#[derive(Debug, Clone)]
215pub struct PhaseConfig {
216 pub generate_master_data: bool,
218 pub generate_document_flows: bool,
220 pub generate_ocpm_events: bool,
222 pub generate_journal_entries: bool,
224 pub inject_anomalies: bool,
226 pub inject_data_quality: bool,
228 pub validate_balances: bool,
230 pub show_progress: bool,
232 pub vendors_per_company: usize,
234 pub customers_per_company: usize,
236 pub materials_per_company: usize,
238 pub assets_per_company: usize,
240 pub employees_per_company: usize,
242 pub p2p_chains: usize,
244 pub o2c_chains: usize,
246 pub generate_audit: bool,
248 pub audit_engagements: usize,
250 pub workpapers_per_engagement: usize,
252 pub evidence_per_workpaper: usize,
254 pub risks_per_engagement: usize,
256 pub findings_per_engagement: usize,
258 pub judgments_per_engagement: usize,
260 pub generate_banking: bool,
262 pub generate_graph_export: bool,
264 pub generate_sourcing: bool,
266 pub generate_bank_reconciliation: bool,
268 pub generate_financial_statements: bool,
270 pub generate_accounting_standards: bool,
272 pub generate_manufacturing: bool,
274 pub generate_sales_kpi_budgets: bool,
276 pub generate_tax: bool,
278 pub generate_esg: bool,
280 pub generate_intercompany: bool,
282 pub generate_evolution_events: bool,
284 pub generate_counterfactuals: bool,
286}
287
288impl Default for PhaseConfig {
289 fn default() -> Self {
290 Self {
291 generate_master_data: true,
292 generate_document_flows: true,
293 generate_ocpm_events: false, generate_journal_entries: true,
295 inject_anomalies: false,
296 inject_data_quality: false, validate_balances: true,
298 show_progress: true,
299 vendors_per_company: 50,
300 customers_per_company: 100,
301 materials_per_company: 200,
302 assets_per_company: 50,
303 employees_per_company: 100,
304 p2p_chains: 100,
305 o2c_chains: 100,
306 generate_audit: false, audit_engagements: 5,
308 workpapers_per_engagement: 20,
309 evidence_per_workpaper: 5,
310 risks_per_engagement: 15,
311 findings_per_engagement: 8,
312 judgments_per_engagement: 10,
313 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, }
327 }
328}
329
330#[derive(Debug, Clone, Default)]
332pub struct MasterDataSnapshot {
333 pub vendors: Vec<Vendor>,
335 pub customers: Vec<Customer>,
337 pub materials: Vec<Material>,
339 pub assets: Vec<FixedAsset>,
341 pub employees: Vec<Employee>,
343}
344
345#[derive(Debug, Clone)]
347pub struct HypergraphExportInfo {
348 pub node_count: usize,
350 pub edge_count: usize,
352 pub hyperedge_count: usize,
354 pub output_path: PathBuf,
356}
357
358#[derive(Debug, Clone, Default)]
360pub struct DocumentFlowSnapshot {
361 pub p2p_chains: Vec<P2PDocumentChain>,
363 pub o2c_chains: Vec<O2CDocumentChain>,
365 pub purchase_orders: Vec<documents::PurchaseOrder>,
367 pub goods_receipts: Vec<documents::GoodsReceipt>,
369 pub vendor_invoices: Vec<documents::VendorInvoice>,
371 pub sales_orders: Vec<documents::SalesOrder>,
373 pub deliveries: Vec<documents::Delivery>,
375 pub customer_invoices: Vec<documents::CustomerInvoice>,
377 pub payments: Vec<documents::Payment>,
379}
380
381#[derive(Debug, Clone, Default)]
383pub struct SubledgerSnapshot {
384 pub ap_invoices: Vec<APInvoice>,
386 pub ar_invoices: Vec<ARInvoice>,
388 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
390 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
392 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
394}
395
396#[derive(Debug, Clone, Default)]
398pub struct OcpmSnapshot {
399 pub event_log: Option<OcpmEventLog>,
401 pub event_count: usize,
403 pub object_count: usize,
405 pub case_count: usize,
407}
408
409#[derive(Debug, Clone, Default)]
411pub struct AuditSnapshot {
412 pub engagements: Vec<AuditEngagement>,
414 pub workpapers: Vec<Workpaper>,
416 pub evidence: Vec<AuditEvidence>,
418 pub risk_assessments: Vec<RiskAssessment>,
420 pub findings: Vec<AuditFinding>,
422 pub judgments: Vec<ProfessionalJudgment>,
424}
425
426#[derive(Debug, Clone, Default)]
428pub struct BankingSnapshot {
429 pub customers: Vec<BankingCustomer>,
431 pub accounts: Vec<BankAccount>,
433 pub transactions: Vec<BankTransaction>,
435 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
437 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
439 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
441 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
443 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
445 pub suspicious_count: usize,
447 pub scenario_count: usize,
449}
450
451#[derive(Debug, Clone, Default, Serialize)]
453pub struct GraphExportSnapshot {
454 pub exported: bool,
456 pub graph_count: usize,
458 pub exports: HashMap<String, GraphExportInfo>,
460}
461
462#[derive(Debug, Clone, Serialize)]
464pub struct GraphExportInfo {
465 pub name: String,
467 pub format: String,
469 pub output_path: PathBuf,
471 pub node_count: usize,
473 pub edge_count: usize,
475}
476
477#[derive(Debug, Clone, Default)]
479pub struct SourcingSnapshot {
480 pub spend_analyses: Vec<SpendAnalysis>,
482 pub sourcing_projects: Vec<SourcingProject>,
484 pub qualifications: Vec<SupplierQualification>,
486 pub rfx_events: Vec<RfxEvent>,
488 pub bids: Vec<SupplierBid>,
490 pub bid_evaluations: Vec<BidEvaluation>,
492 pub contracts: Vec<ProcurementContract>,
494 pub catalog_items: Vec<CatalogItem>,
496 pub scorecards: Vec<SupplierScorecard>,
498}
499
500#[derive(Debug, Clone, Serialize, Deserialize)]
502pub struct PeriodTrialBalance {
503 pub fiscal_year: u16,
505 pub fiscal_period: u8,
507 pub period_start: NaiveDate,
509 pub period_end: NaiveDate,
511 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
513}
514
515#[derive(Debug, Clone, Default)]
517pub struct FinancialReportingSnapshot {
518 pub financial_statements: Vec<FinancialStatement>,
520 pub bank_reconciliations: Vec<BankReconciliation>,
522 pub trial_balances: Vec<PeriodTrialBalance>,
524}
525
526#[derive(Debug, Clone, Default)]
528pub struct HrSnapshot {
529 pub payroll_runs: Vec<PayrollRun>,
531 pub payroll_line_items: Vec<PayrollLineItem>,
533 pub time_entries: Vec<TimeEntry>,
535 pub expense_reports: Vec<ExpenseReport>,
537 pub benefit_enrollments: Vec<BenefitEnrollment>,
539 pub payroll_run_count: usize,
541 pub payroll_line_item_count: usize,
543 pub time_entry_count: usize,
545 pub expense_report_count: usize,
547 pub benefit_enrollment_count: usize,
549}
550
551#[derive(Debug, Clone, Default)]
553pub struct AccountingStandardsSnapshot {
554 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
556 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
558 pub revenue_contract_count: usize,
560 pub impairment_test_count: usize,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct ManufacturingSnapshot {
567 pub production_orders: Vec<ProductionOrder>,
569 pub quality_inspections: Vec<QualityInspection>,
571 pub cycle_counts: Vec<CycleCount>,
573 pub bom_components: Vec<BomComponent>,
575 pub inventory_movements: Vec<InventoryMovement>,
577 pub production_order_count: usize,
579 pub quality_inspection_count: usize,
581 pub cycle_count_count: usize,
583 pub bom_component_count: usize,
585 pub inventory_movement_count: usize,
587}
588
589#[derive(Debug, Clone, Default)]
591pub struct SalesKpiBudgetsSnapshot {
592 pub sales_quotes: Vec<SalesQuote>,
594 pub kpis: Vec<ManagementKpi>,
596 pub budgets: Vec<Budget>,
598 pub sales_quote_count: usize,
600 pub kpi_count: usize,
602 pub budget_line_count: usize,
604}
605
606#[derive(Debug, Clone, Default)]
608pub struct AnomalyLabels {
609 pub labels: Vec<LabeledAnomaly>,
611 pub summary: Option<AnomalySummary>,
613 pub by_type: HashMap<String, usize>,
615}
616
617#[derive(Debug, Clone, Default)]
619pub struct BalanceValidationResult {
620 pub validated: bool,
622 pub is_balanced: bool,
624 pub entries_processed: u64,
626 pub total_debits: rust_decimal::Decimal,
628 pub total_credits: rust_decimal::Decimal,
630 pub accounts_tracked: usize,
632 pub companies_tracked: usize,
634 pub validation_errors: Vec<ValidationError>,
636 pub has_unbalanced_entries: bool,
638}
639
640#[derive(Debug, Clone, Default)]
642pub struct TaxSnapshot {
643 pub jurisdictions: Vec<TaxJurisdiction>,
645 pub codes: Vec<TaxCode>,
647 pub tax_lines: Vec<TaxLine>,
649 pub tax_returns: Vec<TaxReturn>,
651 pub tax_provisions: Vec<TaxProvision>,
653 pub withholding_records: Vec<WithholdingTaxRecord>,
655 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
657 pub jurisdiction_count: usize,
659 pub code_count: usize,
661}
662
663#[derive(Debug, Clone, Default, Serialize, Deserialize)]
665pub struct IntercompanySnapshot {
666 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
668 pub seller_journal_entries: Vec<JournalEntry>,
670 pub buyer_journal_entries: Vec<JournalEntry>,
672 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
674 pub matched_pair_count: usize,
676 pub elimination_entry_count: usize,
678 pub match_rate: f64,
680}
681
682#[derive(Debug, Clone, Default)]
684pub struct EsgSnapshot {
685 pub emissions: Vec<EmissionRecord>,
687 pub energy: Vec<EnergyConsumption>,
689 pub water: Vec<WaterUsage>,
691 pub waste: Vec<WasteRecord>,
693 pub diversity: Vec<WorkforceDiversityMetric>,
695 pub pay_equity: Vec<PayEquityMetric>,
697 pub safety_incidents: Vec<SafetyIncident>,
699 pub safety_metrics: Vec<SafetyMetric>,
701 pub governance: Vec<GovernanceMetric>,
703 pub supplier_assessments: Vec<SupplierEsgAssessment>,
705 pub materiality: Vec<MaterialityAssessment>,
707 pub disclosures: Vec<EsgDisclosure>,
709 pub climate_scenarios: Vec<ClimateScenario>,
711 pub anomaly_labels: Vec<EsgAnomalyLabel>,
713 pub emission_count: usize,
715 pub disclosure_count: usize,
717}
718
719#[derive(Debug, Clone, Default)]
721pub struct TreasurySnapshot {
722 pub cash_positions: Vec<CashPosition>,
724 pub cash_forecasts: Vec<CashForecast>,
726 pub cash_pools: Vec<CashPool>,
728 pub cash_pool_sweeps: Vec<CashPoolSweep>,
730 pub hedging_instruments: Vec<HedgingInstrument>,
732 pub hedge_relationships: Vec<HedgeRelationship>,
734 pub debt_instruments: Vec<DebtInstrument>,
736 pub bank_guarantees: Vec<BankGuarantee>,
738 pub netting_runs: Vec<NettingRun>,
740 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
742}
743
744#[derive(Debug, Clone, Default)]
746pub struct ProjectAccountingSnapshot {
747 pub projects: Vec<Project>,
749 pub cost_lines: Vec<ProjectCostLine>,
751 pub revenue_records: Vec<ProjectRevenue>,
753 pub earned_value_metrics: Vec<EarnedValueMetric>,
755 pub change_orders: Vec<ChangeOrder>,
757 pub milestones: Vec<ProjectMilestone>,
759}
760
761#[derive(Debug)]
763pub struct EnhancedGenerationResult {
764 pub chart_of_accounts: ChartOfAccounts,
766 pub master_data: MasterDataSnapshot,
768 pub document_flows: DocumentFlowSnapshot,
770 pub subledger: SubledgerSnapshot,
772 pub ocpm: OcpmSnapshot,
774 pub audit: AuditSnapshot,
776 pub banking: BankingSnapshot,
778 pub graph_export: GraphExportSnapshot,
780 pub sourcing: SourcingSnapshot,
782 pub financial_reporting: FinancialReportingSnapshot,
784 pub hr: HrSnapshot,
786 pub accounting_standards: AccountingStandardsSnapshot,
788 pub manufacturing: ManufacturingSnapshot,
790 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
792 pub tax: TaxSnapshot,
794 pub esg: EsgSnapshot,
796 pub treasury: TreasurySnapshot,
798 pub project_accounting: ProjectAccountingSnapshot,
800 pub process_evolution: Vec<ProcessEvolutionEvent>,
802 pub organizational_events: Vec<OrganizationalEvent>,
804 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
806 pub intercompany: IntercompanySnapshot,
808 pub journal_entries: Vec<JournalEntry>,
810 pub anomaly_labels: AnomalyLabels,
812 pub balance_validation: BalanceValidationResult,
814 pub data_quality_stats: DataQualityStats,
816 pub statistics: EnhancedGenerationStatistics,
818 pub lineage: Option<super::lineage::LineageGraph>,
820 pub gate_result: Option<datasynth_eval::gates::GateResult>,
822 pub internal_controls: Vec<InternalControl>,
824 pub opening_balances: Vec<GeneratedOpeningBalance>,
826 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
828 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
830 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
832 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
834 pub temporal_vendor_chains:
836 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
837 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
839 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
841 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
843}
844
845#[derive(Debug, Clone, Default, Serialize, Deserialize)]
847pub struct EnhancedGenerationStatistics {
848 pub total_entries: u64,
850 pub total_line_items: u64,
852 pub accounts_count: usize,
854 pub companies_count: usize,
856 pub period_months: u32,
858 pub vendor_count: usize,
860 pub customer_count: usize,
861 pub material_count: usize,
862 pub asset_count: usize,
863 pub employee_count: usize,
864 pub p2p_chain_count: usize,
866 pub o2c_chain_count: usize,
867 pub ap_invoice_count: usize,
869 pub ar_invoice_count: usize,
870 pub ocpm_event_count: usize,
872 pub ocpm_object_count: usize,
873 pub ocpm_case_count: usize,
874 pub audit_engagement_count: usize,
876 pub audit_workpaper_count: usize,
877 pub audit_evidence_count: usize,
878 pub audit_risk_count: usize,
879 pub audit_finding_count: usize,
880 pub audit_judgment_count: usize,
881 pub anomalies_injected: usize,
883 pub data_quality_issues: usize,
885 pub banking_customer_count: usize,
887 pub banking_account_count: usize,
888 pub banking_transaction_count: usize,
889 pub banking_suspicious_count: usize,
890 pub graph_export_count: usize,
892 pub graph_node_count: usize,
893 pub graph_edge_count: usize,
894 #[serde(default)]
896 pub llm_enrichment_ms: u64,
897 #[serde(default)]
899 pub llm_vendors_enriched: usize,
900 #[serde(default)]
902 pub diffusion_enhancement_ms: u64,
903 #[serde(default)]
905 pub diffusion_samples_generated: usize,
906 #[serde(default)]
908 pub causal_generation_ms: u64,
909 #[serde(default)]
911 pub causal_samples_generated: usize,
912 #[serde(default)]
914 pub causal_validation_passed: Option<bool>,
915 #[serde(default)]
917 pub sourcing_project_count: usize,
918 #[serde(default)]
919 pub rfx_event_count: usize,
920 #[serde(default)]
921 pub bid_count: usize,
922 #[serde(default)]
923 pub contract_count: usize,
924 #[serde(default)]
925 pub catalog_item_count: usize,
926 #[serde(default)]
927 pub scorecard_count: usize,
928 #[serde(default)]
930 pub financial_statement_count: usize,
931 #[serde(default)]
932 pub bank_reconciliation_count: usize,
933 #[serde(default)]
935 pub payroll_run_count: usize,
936 #[serde(default)]
937 pub time_entry_count: usize,
938 #[serde(default)]
939 pub expense_report_count: usize,
940 #[serde(default)]
941 pub benefit_enrollment_count: usize,
942 #[serde(default)]
944 pub revenue_contract_count: usize,
945 #[serde(default)]
946 pub impairment_test_count: usize,
947 #[serde(default)]
949 pub production_order_count: usize,
950 #[serde(default)]
951 pub quality_inspection_count: usize,
952 #[serde(default)]
953 pub cycle_count_count: usize,
954 #[serde(default)]
955 pub bom_component_count: usize,
956 #[serde(default)]
957 pub inventory_movement_count: usize,
958 #[serde(default)]
960 pub sales_quote_count: usize,
961 #[serde(default)]
962 pub kpi_count: usize,
963 #[serde(default)]
964 pub budget_line_count: usize,
965 #[serde(default)]
967 pub tax_jurisdiction_count: usize,
968 #[serde(default)]
969 pub tax_code_count: usize,
970 #[serde(default)]
972 pub esg_emission_count: usize,
973 #[serde(default)]
974 pub esg_disclosure_count: usize,
975 #[serde(default)]
977 pub ic_matched_pair_count: usize,
978 #[serde(default)]
979 pub ic_elimination_count: usize,
980 #[serde(default)]
982 pub ic_transaction_count: usize,
983 #[serde(default)]
985 pub fa_subledger_count: usize,
986 #[serde(default)]
988 pub inventory_subledger_count: usize,
989 #[serde(default)]
991 pub treasury_debt_instrument_count: usize,
992 #[serde(default)]
994 pub treasury_hedging_instrument_count: usize,
995 #[serde(default)]
997 pub project_count: usize,
998 #[serde(default)]
1000 pub project_change_order_count: usize,
1001 #[serde(default)]
1003 pub tax_provision_count: usize,
1004 #[serde(default)]
1006 pub opening_balance_count: usize,
1007 #[serde(default)]
1009 pub subledger_reconciliation_count: usize,
1010 #[serde(default)]
1012 pub tax_line_count: usize,
1013 #[serde(default)]
1015 pub project_cost_line_count: usize,
1016 #[serde(default)]
1018 pub cash_position_count: usize,
1019 #[serde(default)]
1021 pub cash_forecast_count: usize,
1022 #[serde(default)]
1024 pub cash_pool_count: usize,
1025 #[serde(default)]
1027 pub process_evolution_event_count: usize,
1028 #[serde(default)]
1030 pub organizational_event_count: usize,
1031 #[serde(default)]
1033 pub counterfactual_pair_count: usize,
1034 #[serde(default)]
1036 pub red_flag_count: usize,
1037 #[serde(default)]
1039 pub collusion_ring_count: usize,
1040 #[serde(default)]
1042 pub temporal_version_chain_count: usize,
1043 #[serde(default)]
1045 pub entity_relationship_node_count: usize,
1046 #[serde(default)]
1048 pub entity_relationship_edge_count: usize,
1049 #[serde(default)]
1051 pub cross_process_link_count: usize,
1052 #[serde(default)]
1054 pub disruption_event_count: usize,
1055 #[serde(default)]
1057 pub industry_gl_account_count: usize,
1058}
1059
1060pub struct EnhancedOrchestrator {
1062 config: GeneratorConfig,
1063 phase_config: PhaseConfig,
1064 coa: Option<Arc<ChartOfAccounts>>,
1065 master_data: MasterDataSnapshot,
1066 seed: u64,
1067 multi_progress: Option<MultiProgress>,
1068 resource_guard: ResourceGuard,
1070 output_path: Option<PathBuf>,
1072 copula_generators: Vec<CopulaGeneratorSpec>,
1074 country_pack_registry: datasynth_core::CountryPackRegistry,
1076 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1078}
1079
1080impl EnhancedOrchestrator {
1081 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1083 datasynth_config::validate_config(&config)?;
1084
1085 let seed = config.global.seed.unwrap_or_else(rand::random);
1086
1087 let resource_guard = Self::build_resource_guard(&config, None);
1089
1090 let country_pack_registry = match &config.country_packs {
1092 Some(cp) => {
1093 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1094 .map_err(|e| SynthError::config(e.to_string()))?
1095 }
1096 None => datasynth_core::CountryPackRegistry::builtin_only()
1097 .map_err(|e| SynthError::config(e.to_string()))?,
1098 };
1099
1100 Ok(Self {
1101 config,
1102 phase_config,
1103 coa: None,
1104 master_data: MasterDataSnapshot::default(),
1105 seed,
1106 multi_progress: None,
1107 resource_guard,
1108 output_path: None,
1109 copula_generators: Vec::new(),
1110 country_pack_registry,
1111 phase_sink: None,
1112 })
1113 }
1114
1115 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1117 Self::new(config, PhaseConfig::default())
1118 }
1119
1120 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1122 self.phase_sink = Some(sink);
1123 self
1124 }
1125
1126 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1128 if let Some(ref sink) = self.phase_sink {
1129 for item in items {
1130 if let Ok(value) = serde_json::to_value(item) {
1131 if let Err(e) = sink.emit(phase, type_name, &value) {
1132 warn!(
1133 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1134 );
1135 }
1136 }
1137 }
1138 if let Err(e) = sink.phase_complete(phase) {
1139 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1140 }
1141 }
1142 }
1143
1144 pub fn with_progress(mut self, show: bool) -> Self {
1146 self.phase_config.show_progress = show;
1147 if show {
1148 self.multi_progress = Some(MultiProgress::new());
1149 }
1150 self
1151 }
1152
1153 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1155 let path = path.into();
1156 self.output_path = Some(path.clone());
1157 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1159 self
1160 }
1161
1162 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1164 &self.country_pack_registry
1165 }
1166
1167 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1169 self.country_pack_registry.get_by_str(country)
1170 }
1171
1172 fn primary_country_code(&self) -> &str {
1175 self.config
1176 .companies
1177 .first()
1178 .map(|c| c.country.as_str())
1179 .unwrap_or("US")
1180 }
1181
1182 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1184 self.country_pack_for(self.primary_country_code())
1185 }
1186
1187 fn resolve_coa_framework(&self) -> CoAFramework {
1189 if self.config.accounting_standards.enabled {
1190 match self.config.accounting_standards.framework {
1191 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1192 return CoAFramework::FrenchPcg;
1193 }
1194 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1195 return CoAFramework::GermanSkr04;
1196 }
1197 _ => {}
1198 }
1199 }
1200 let pack = self.primary_pack();
1202 match pack.accounting.framework.as_str() {
1203 "french_gaap" => CoAFramework::FrenchPcg,
1204 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1205 _ => CoAFramework::UsGaap,
1206 }
1207 }
1208
1209 pub fn has_copulas(&self) -> bool {
1214 !self.copula_generators.is_empty()
1215 }
1216
1217 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1223 &self.copula_generators
1224 }
1225
1226 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1230 &mut self.copula_generators
1231 }
1232
1233 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1237 self.copula_generators
1238 .iter_mut()
1239 .find(|c| c.name == copula_name)
1240 .map(|c| c.generator.sample())
1241 }
1242
1243 pub fn from_fingerprint(
1266 fingerprint_path: &std::path::Path,
1267 phase_config: PhaseConfig,
1268 scale: f64,
1269 ) -> SynthResult<Self> {
1270 info!("Loading fingerprint from: {}", fingerprint_path.display());
1271
1272 let reader = FingerprintReader::new();
1274 let fingerprint = reader
1275 .read_from_file(fingerprint_path)
1276 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1277
1278 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1279 }
1280
1281 pub fn from_fingerprint_data(
1288 fingerprint: Fingerprint,
1289 phase_config: PhaseConfig,
1290 scale: f64,
1291 ) -> SynthResult<Self> {
1292 info!(
1293 "Synthesizing config from fingerprint (version: {}, tables: {})",
1294 fingerprint.manifest.version,
1295 fingerprint.schema.tables.len()
1296 );
1297
1298 let seed: u64 = rand::random();
1300
1301 let options = SynthesisOptions {
1303 scale,
1304 seed: Some(seed),
1305 preserve_correlations: true,
1306 inject_anomalies: true,
1307 };
1308 let synthesizer = ConfigSynthesizer::with_options(options);
1309
1310 let synthesis_result = synthesizer
1312 .synthesize_full(&fingerprint, seed)
1313 .map_err(|e| {
1314 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1315 })?;
1316
1317 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1319 Self::base_config_for_industry(industry)
1320 } else {
1321 Self::base_config_for_industry("manufacturing")
1322 };
1323
1324 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1326
1327 info!(
1329 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1330 fingerprint.schema.tables.len(),
1331 scale,
1332 synthesis_result.copula_generators.len()
1333 );
1334
1335 if !synthesis_result.copula_generators.is_empty() {
1336 for spec in &synthesis_result.copula_generators {
1337 info!(
1338 " Copula '{}' for table '{}': {} columns",
1339 spec.name,
1340 spec.table,
1341 spec.columns.len()
1342 );
1343 }
1344 }
1345
1346 let mut orchestrator = Self::new(config, phase_config)?;
1348
1349 orchestrator.copula_generators = synthesis_result.copula_generators;
1351
1352 Ok(orchestrator)
1353 }
1354
1355 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1357 use datasynth_config::presets::create_preset;
1358 use datasynth_config::TransactionVolume;
1359 use datasynth_core::models::{CoAComplexity, IndustrySector};
1360
1361 let sector = match industry.to_lowercase().as_str() {
1362 "manufacturing" => IndustrySector::Manufacturing,
1363 "retail" => IndustrySector::Retail,
1364 "financial" | "financial_services" => IndustrySector::FinancialServices,
1365 "healthcare" => IndustrySector::Healthcare,
1366 "technology" | "tech" => IndustrySector::Technology,
1367 _ => IndustrySector::Manufacturing,
1368 };
1369
1370 create_preset(
1372 sector,
1373 1, 12, CoAComplexity::Medium,
1376 TransactionVolume::TenK,
1377 )
1378 }
1379
1380 fn apply_config_patch(
1382 mut config: GeneratorConfig,
1383 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1384 ) -> GeneratorConfig {
1385 use datasynth_fingerprint::synthesis::ConfigValue;
1386
1387 for (key, value) in patch.values() {
1388 match (key.as_str(), value) {
1389 ("transactions.count", ConfigValue::Integer(n)) => {
1392 info!(
1393 "Fingerprint suggests {} transactions (apply via company volumes)",
1394 n
1395 );
1396 }
1397 ("global.period_months", ConfigValue::Integer(n)) => {
1398 config.global.period_months = (*n).clamp(1, 120) as u32;
1399 }
1400 ("global.start_date", ConfigValue::String(s)) => {
1401 config.global.start_date = s.clone();
1402 }
1403 ("global.seed", ConfigValue::Integer(n)) => {
1404 config.global.seed = Some(*n as u64);
1405 }
1406 ("fraud.enabled", ConfigValue::Bool(b)) => {
1407 config.fraud.enabled = *b;
1408 }
1409 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1410 config.fraud.fraud_rate = *f;
1411 }
1412 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1413 config.data_quality.enabled = *b;
1414 }
1415 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1417 config.fraud.enabled = *b;
1418 }
1419 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1420 config.fraud.fraud_rate = *f;
1421 }
1422 _ => {
1423 debug!("Ignoring unknown config patch key: {}", key);
1424 }
1425 }
1426 }
1427
1428 config
1429 }
1430
1431 fn build_resource_guard(
1433 config: &GeneratorConfig,
1434 output_path: Option<PathBuf>,
1435 ) -> ResourceGuard {
1436 let mut builder = ResourceGuardBuilder::new();
1437
1438 if config.global.memory_limit_mb > 0 {
1440 builder = builder.memory_limit(config.global.memory_limit_mb);
1441 }
1442
1443 if let Some(path) = output_path {
1445 builder = builder.output_path(path).min_free_disk(100); }
1447
1448 builder = builder.conservative();
1450
1451 builder.build()
1452 }
1453
1454 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1459 self.resource_guard.check()
1460 }
1461
1462 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1464 let level = self.resource_guard.check()?;
1465
1466 if level != DegradationLevel::Normal {
1467 warn!(
1468 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1469 phase,
1470 level,
1471 self.resource_guard.current_memory_mb(),
1472 self.resource_guard.available_disk_mb()
1473 );
1474 }
1475
1476 Ok(level)
1477 }
1478
1479 fn get_degradation_actions(&self) -> DegradationActions {
1481 self.resource_guard.get_actions()
1482 }
1483
1484 fn check_memory_limit(&self) -> SynthResult<()> {
1486 self.check_resources()?;
1487 Ok(())
1488 }
1489
1490 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1492 info!("Starting enhanced generation workflow");
1493 info!(
1494 "Config: industry={:?}, period_months={}, companies={}",
1495 self.config.global.industry,
1496 self.config.global.period_months,
1497 self.config.companies.len()
1498 );
1499
1500 let initial_level = self.check_resources_with_log("initial")?;
1502 if initial_level == DegradationLevel::Emergency {
1503 return Err(SynthError::resource(
1504 "Insufficient resources to start generation",
1505 ));
1506 }
1507
1508 let mut stats = EnhancedGenerationStatistics {
1509 companies_count: self.config.companies.len(),
1510 period_months: self.config.global.period_months,
1511 ..Default::default()
1512 };
1513
1514 let coa = self.phase_chart_of_accounts(&mut stats)?;
1516
1517 self.phase_master_data(&mut stats)?;
1519
1520 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1522 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1523 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1524
1525 let (mut document_flows, subledger, fa_journal_entries) =
1527 self.phase_document_flows(&mut stats)?;
1528
1529 self.emit_phase_items(
1531 "document_flows",
1532 "PurchaseOrder",
1533 &document_flows.purchase_orders,
1534 );
1535 self.emit_phase_items(
1536 "document_flows",
1537 "GoodsReceipt",
1538 &document_flows.goods_receipts,
1539 );
1540 self.emit_phase_items(
1541 "document_flows",
1542 "VendorInvoice",
1543 &document_flows.vendor_invoices,
1544 );
1545 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1546 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1547
1548 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1550
1551 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1559
1560 if !fa_journal_entries.is_empty() {
1562 debug!(
1563 "Appending {} FA acquisition JEs to main entries",
1564 fa_journal_entries.len()
1565 );
1566 entries.extend(fa_journal_entries);
1567 }
1568
1569 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1571
1572 let actions = self.get_degradation_actions();
1574
1575 let sourcing = self.phase_sourcing_data(&mut stats)?;
1577
1578 if !sourcing.contracts.is_empty() {
1580 let mut linked_count = 0usize;
1581 for chain in &mut document_flows.p2p_chains {
1582 if chain.purchase_order.contract_id.is_none() {
1583 if let Some(contract) = sourcing
1584 .contracts
1585 .iter()
1586 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1587 {
1588 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1589 linked_count += 1;
1590 }
1591 }
1592 }
1593 if linked_count > 0 {
1594 debug!(
1595 "Linked {} purchase orders to S2C contracts by vendor match",
1596 linked_count
1597 );
1598 }
1599 }
1600
1601 let intercompany = self.phase_intercompany(&mut stats)?;
1603
1604 if !intercompany.seller_journal_entries.is_empty()
1606 || !intercompany.buyer_journal_entries.is_empty()
1607 {
1608 let ic_je_count = intercompany.seller_journal_entries.len()
1609 + intercompany.buyer_journal_entries.len();
1610 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1611 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1612 debug!(
1613 "Appended {} IC journal entries to main entries",
1614 ic_je_count
1615 );
1616 }
1617
1618 let hr = self.phase_hr_data(&mut stats)?;
1620
1621 if !hr.payroll_runs.is_empty() {
1623 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1624 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1625 entries.extend(payroll_jes);
1626 }
1627
1628 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1630
1631 if !manufacturing_snap.production_orders.is_empty() {
1633 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1634 debug!("Generated {} JEs from production orders", mfg_jes.len());
1635 entries.extend(mfg_jes);
1636 }
1637
1638 if !entries.is_empty() {
1641 stats.total_entries = entries.len() as u64;
1642 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1643 debug!(
1644 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1645 stats.total_entries, stats.total_line_items
1646 );
1647 }
1648
1649 if self.config.internal_controls.enabled && !entries.is_empty() {
1651 info!("Phase 7b: Applying internal controls to journal entries");
1652 let control_config = ControlGeneratorConfig {
1653 exception_rate: self.config.internal_controls.exception_rate,
1654 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1655 enable_sox_marking: true,
1656 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1657 self.config.internal_controls.sox_materiality_threshold,
1658 )
1659 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1660 };
1661 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1662 for entry in &mut entries {
1663 control_gen.apply_controls(entry, &coa);
1664 }
1665 let with_controls = entries
1666 .iter()
1667 .filter(|e| !e.header.control_ids.is_empty())
1668 .count();
1669 info!(
1670 "Applied controls to {} entries ({} with control IDs assigned)",
1671 entries.len(),
1672 with_controls
1673 );
1674 }
1675
1676 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1678
1679 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1681
1682 self.emit_phase_items(
1684 "anomaly_injection",
1685 "LabeledAnomaly",
1686 &anomaly_labels.labels,
1687 );
1688
1689 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
1691
1692 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
1694
1695 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
1697
1698 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
1700
1701 let balance_validation = self.phase_balance_validation(&entries)?;
1703
1704 let subledger_reconciliation =
1706 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1707
1708 let data_quality_stats =
1710 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1711
1712 let audit = self.phase_audit_data(&entries, &mut stats)?;
1714
1715 let banking = self.phase_banking_data(&mut stats)?;
1717
1718 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1720
1721 self.phase_llm_enrichment(&mut stats);
1723
1724 self.phase_diffusion_enhancement(&mut stats);
1726
1727 self.phase_causal_overlay(&mut stats);
1729
1730 let financial_reporting =
1732 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1733
1734 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1736
1737 let ocpm = self.phase_ocpm_events(
1739 &document_flows,
1740 &sourcing,
1741 &hr,
1742 &manufacturing_snap,
1743 &banking,
1744 &audit,
1745 &financial_reporting,
1746 &mut stats,
1747 )?;
1748
1749 if let Some(ref event_log) = ocpm.event_log {
1751 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1752 }
1753
1754 let sales_kpi_budgets =
1756 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1757
1758 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1760
1761 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1763
1764 let treasury =
1766 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
1767
1768 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1770
1771 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
1773
1774 let disruption_events = self.phase_disruption_events(&mut stats)?;
1776
1777 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
1779
1780 let (entity_relationship_graph, cross_process_links) =
1782 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
1783
1784 let industry_output = self.phase_industry_data(&mut stats);
1786
1787 self.phase_hypergraph_export(
1789 &coa,
1790 &entries,
1791 &document_flows,
1792 &sourcing,
1793 &hr,
1794 &manufacturing_snap,
1795 &banking,
1796 &audit,
1797 &financial_reporting,
1798 &ocpm,
1799 &mut stats,
1800 )?;
1801
1802 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1805 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1806 }
1807
1808 if self.config.streaming.enabled {
1810 info!("Note: streaming config is enabled but batch mode does not use it");
1811 }
1812 if self.config.vendor_network.enabled {
1813 debug!("Vendor network config available; relationship graph generation is partial");
1814 }
1815 if self.config.customer_segmentation.enabled {
1816 debug!("Customer segmentation config available; segment-aware generation is partial");
1817 }
1818
1819 let resource_stats = self.resource_guard.stats();
1821 info!(
1822 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1823 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1824 resource_stats.disk.estimated_bytes_written,
1825 resource_stats.degradation_level
1826 );
1827
1828 if let Some(ref sink) = self.phase_sink {
1830 if let Err(e) = sink.flush() {
1831 warn!("Stream sink flush failed: {e}");
1832 }
1833 }
1834
1835 let lineage = self.build_lineage_graph();
1837
1838 let gate_result = if self.config.quality_gates.enabled {
1840 let profile_name = &self.config.quality_gates.profile;
1841 match datasynth_eval::gates::get_profile(profile_name) {
1842 Some(profile) => {
1843 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1845
1846 if balance_validation.validated {
1848 eval.coherence.balance =
1849 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1850 equation_balanced: balance_validation.is_balanced,
1851 max_imbalance: (balance_validation.total_debits
1852 - balance_validation.total_credits)
1853 .abs(),
1854 periods_evaluated: 1,
1855 periods_imbalanced: if balance_validation.is_balanced {
1856 0
1857 } else {
1858 1
1859 },
1860 period_results: Vec::new(),
1861 companies_evaluated: self.config.companies.len(),
1862 });
1863 }
1864
1865 eval.coherence.passes = balance_validation.is_balanced;
1867 if !balance_validation.is_balanced {
1868 eval.coherence
1869 .failures
1870 .push("Balance sheet equation not satisfied".to_string());
1871 }
1872
1873 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1875 eval.statistical.passes = !entries.is_empty();
1876
1877 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1880
1881 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1882 info!(
1883 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1884 profile_name, result.gates_passed, result.gates_total, result.summary
1885 );
1886 Some(result)
1887 }
1888 None => {
1889 warn!(
1890 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1891 profile_name
1892 );
1893 None
1894 }
1895 }
1896 } else {
1897 None
1898 };
1899
1900 let internal_controls = if self.config.internal_controls.enabled {
1902 InternalControl::standard_controls()
1903 } else {
1904 Vec::new()
1905 };
1906
1907 Ok(EnhancedGenerationResult {
1908 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
1909 master_data: std::mem::take(&mut self.master_data),
1910 document_flows,
1911 subledger,
1912 ocpm,
1913 audit,
1914 banking,
1915 graph_export,
1916 sourcing,
1917 financial_reporting,
1918 hr,
1919 accounting_standards,
1920 manufacturing: manufacturing_snap,
1921 sales_kpi_budgets,
1922 tax,
1923 esg: esg_snap,
1924 treasury,
1925 project_accounting,
1926 process_evolution,
1927 organizational_events,
1928 disruption_events,
1929 intercompany,
1930 journal_entries: entries,
1931 anomaly_labels,
1932 balance_validation,
1933 data_quality_stats,
1934 statistics: stats,
1935 lineage: Some(lineage),
1936 gate_result,
1937 internal_controls,
1938 opening_balances,
1939 subledger_reconciliation,
1940 counterfactual_pairs,
1941 red_flags,
1942 collusion_rings,
1943 temporal_vendor_chains,
1944 entity_relationship_graph,
1945 cross_process_links,
1946 industry_output,
1947 })
1948 }
1949
1950 fn phase_chart_of_accounts(
1956 &mut self,
1957 stats: &mut EnhancedGenerationStatistics,
1958 ) -> SynthResult<Arc<ChartOfAccounts>> {
1959 info!("Phase 1: Generating Chart of Accounts");
1960 let coa = self.generate_coa()?;
1961 stats.accounts_count = coa.account_count();
1962 info!(
1963 "Chart of Accounts generated: {} accounts",
1964 stats.accounts_count
1965 );
1966 self.check_resources_with_log("post-coa")?;
1967 Ok(coa)
1968 }
1969
1970 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1972 if self.phase_config.generate_master_data {
1973 info!("Phase 2: Generating Master Data");
1974 self.generate_master_data()?;
1975 stats.vendor_count = self.master_data.vendors.len();
1976 stats.customer_count = self.master_data.customers.len();
1977 stats.material_count = self.master_data.materials.len();
1978 stats.asset_count = self.master_data.assets.len();
1979 stats.employee_count = self.master_data.employees.len();
1980 info!(
1981 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1982 stats.vendor_count, stats.customer_count, stats.material_count,
1983 stats.asset_count, stats.employee_count
1984 );
1985 self.check_resources_with_log("post-master-data")?;
1986 } else {
1987 debug!("Phase 2: Skipped (master data generation disabled)");
1988 }
1989 Ok(())
1990 }
1991
1992 fn phase_document_flows(
1994 &mut self,
1995 stats: &mut EnhancedGenerationStatistics,
1996 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1997 let mut document_flows = DocumentFlowSnapshot::default();
1998 let mut subledger = SubledgerSnapshot::default();
1999
2000 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2001 info!("Phase 3: Generating Document Flows");
2002 self.generate_document_flows(&mut document_flows)?;
2003 stats.p2p_chain_count = document_flows.p2p_chains.len();
2004 stats.o2c_chain_count = document_flows.o2c_chains.len();
2005 info!(
2006 "Document flows generated: {} P2P chains, {} O2C chains",
2007 stats.p2p_chain_count, stats.o2c_chain_count
2008 );
2009
2010 debug!("Phase 3b: Linking document flows to subledgers");
2012 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2013 stats.ap_invoice_count = subledger.ap_invoices.len();
2014 stats.ar_invoice_count = subledger.ar_invoices.len();
2015 debug!(
2016 "Subledgers linked: {} AP invoices, {} AR invoices",
2017 stats.ap_invoice_count, stats.ar_invoice_count
2018 );
2019
2020 self.check_resources_with_log("post-document-flows")?;
2021 } else {
2022 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2023 }
2024
2025 let mut fa_journal_entries = Vec::new();
2027 if !self.master_data.assets.is_empty() {
2028 debug!("Generating FA subledger records");
2029 let company_code = self
2030 .config
2031 .companies
2032 .first()
2033 .map(|c| c.code.as_str())
2034 .unwrap_or("1000");
2035 let currency = self
2036 .config
2037 .companies
2038 .first()
2039 .map(|c| c.currency.as_str())
2040 .unwrap_or("USD");
2041
2042 let mut fa_gen = datasynth_generators::FAGenerator::new(
2043 datasynth_generators::FAGeneratorConfig::default(),
2044 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2045 );
2046
2047 for asset in &self.master_data.assets {
2048 let (record, je) = fa_gen.generate_asset_acquisition(
2049 company_code,
2050 &format!("{:?}", asset.asset_class),
2051 &asset.description,
2052 asset.acquisition_date,
2053 currency,
2054 asset.cost_center.as_deref(),
2055 );
2056 subledger.fa_records.push(record);
2057 fa_journal_entries.push(je);
2058 }
2059
2060 stats.fa_subledger_count = subledger.fa_records.len();
2061 debug!(
2062 "FA subledger records generated: {} (with {} acquisition JEs)",
2063 stats.fa_subledger_count,
2064 fa_journal_entries.len()
2065 );
2066 }
2067
2068 if !self.master_data.materials.is_empty() {
2070 debug!("Generating Inventory subledger records");
2071 let first_company = self.config.companies.first();
2072 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2073 let inv_currency = first_company
2074 .map(|c| c.currency.clone())
2075 .unwrap_or_else(|| "USD".to_string());
2076
2077 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2078 datasynth_generators::InventoryGeneratorConfig::default(),
2079 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2080 inv_currency.clone(),
2081 );
2082
2083 for (i, material) in self.master_data.materials.iter().enumerate() {
2084 let plant = format!("PLANT{:02}", (i % 3) + 1);
2085 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2086 let initial_qty = rust_decimal::Decimal::from(
2087 material
2088 .safety_stock
2089 .to_string()
2090 .parse::<i64>()
2091 .unwrap_or(100),
2092 );
2093
2094 let position = inv_gen.generate_position(
2095 company_code,
2096 &plant,
2097 &storage_loc,
2098 &material.material_id,
2099 &material.description,
2100 initial_qty,
2101 Some(material.standard_cost),
2102 &inv_currency,
2103 );
2104 subledger.inventory_positions.push(position);
2105 }
2106
2107 stats.inventory_subledger_count = subledger.inventory_positions.len();
2108 debug!(
2109 "Inventory subledger records generated: {}",
2110 stats.inventory_subledger_count
2111 );
2112 }
2113
2114 Ok((document_flows, subledger, fa_journal_entries))
2115 }
2116
2117 #[allow(clippy::too_many_arguments)]
2119 fn phase_ocpm_events(
2120 &mut self,
2121 document_flows: &DocumentFlowSnapshot,
2122 sourcing: &SourcingSnapshot,
2123 hr: &HrSnapshot,
2124 manufacturing: &ManufacturingSnapshot,
2125 banking: &BankingSnapshot,
2126 audit: &AuditSnapshot,
2127 financial_reporting: &FinancialReportingSnapshot,
2128 stats: &mut EnhancedGenerationStatistics,
2129 ) -> SynthResult<OcpmSnapshot> {
2130 if self.phase_config.generate_ocpm_events {
2131 info!("Phase 3c: Generating OCPM Events");
2132 let ocpm_snapshot = self.generate_ocpm_events(
2133 document_flows,
2134 sourcing,
2135 hr,
2136 manufacturing,
2137 banking,
2138 audit,
2139 financial_reporting,
2140 )?;
2141 stats.ocpm_event_count = ocpm_snapshot.event_count;
2142 stats.ocpm_object_count = ocpm_snapshot.object_count;
2143 stats.ocpm_case_count = ocpm_snapshot.case_count;
2144 info!(
2145 "OCPM events generated: {} events, {} objects, {} cases",
2146 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2147 );
2148 self.check_resources_with_log("post-ocpm")?;
2149 Ok(ocpm_snapshot)
2150 } else {
2151 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2152 Ok(OcpmSnapshot::default())
2153 }
2154 }
2155
2156 fn phase_journal_entries(
2158 &mut self,
2159 coa: &Arc<ChartOfAccounts>,
2160 document_flows: &DocumentFlowSnapshot,
2161 _stats: &mut EnhancedGenerationStatistics,
2162 ) -> SynthResult<Vec<JournalEntry>> {
2163 let mut entries = Vec::new();
2164
2165 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2167 debug!("Phase 4a: Generating JEs from document flows");
2168 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2169 debug!("Generated {} JEs from document flows", flow_entries.len());
2170 entries.extend(flow_entries);
2171 }
2172
2173 if self.phase_config.generate_journal_entries {
2175 info!("Phase 4: Generating Journal Entries");
2176 let je_entries = self.generate_journal_entries(coa)?;
2177 info!("Generated {} standalone journal entries", je_entries.len());
2178 entries.extend(je_entries);
2179 } else {
2180 debug!("Phase 4: Skipped (journal entry generation disabled)");
2181 }
2182
2183 if !entries.is_empty() {
2184 self.check_resources_with_log("post-journal-entries")?;
2187 }
2188
2189 Ok(entries)
2190 }
2191
2192 fn phase_anomaly_injection(
2194 &mut self,
2195 entries: &mut [JournalEntry],
2196 actions: &DegradationActions,
2197 stats: &mut EnhancedGenerationStatistics,
2198 ) -> SynthResult<AnomalyLabels> {
2199 if self.phase_config.inject_anomalies
2200 && !entries.is_empty()
2201 && !actions.skip_anomaly_injection
2202 {
2203 info!("Phase 5: Injecting Anomalies");
2204 let result = self.inject_anomalies(entries)?;
2205 stats.anomalies_injected = result.labels.len();
2206 info!("Injected {} anomalies", stats.anomalies_injected);
2207 self.check_resources_with_log("post-anomaly-injection")?;
2208 Ok(result)
2209 } else if actions.skip_anomaly_injection {
2210 warn!("Phase 5: Skipped due to resource degradation");
2211 Ok(AnomalyLabels::default())
2212 } else {
2213 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2214 Ok(AnomalyLabels::default())
2215 }
2216 }
2217
2218 fn phase_balance_validation(
2220 &mut self,
2221 entries: &[JournalEntry],
2222 ) -> SynthResult<BalanceValidationResult> {
2223 if self.phase_config.validate_balances && !entries.is_empty() {
2224 debug!("Phase 6: Validating Balances");
2225 let balance_validation = self.validate_journal_entries(entries)?;
2226 if balance_validation.is_balanced {
2227 debug!("Balance validation passed");
2228 } else {
2229 warn!(
2230 "Balance validation found {} errors",
2231 balance_validation.validation_errors.len()
2232 );
2233 }
2234 Ok(balance_validation)
2235 } else {
2236 Ok(BalanceValidationResult::default())
2237 }
2238 }
2239
2240 fn phase_data_quality_injection(
2242 &mut self,
2243 entries: &mut [JournalEntry],
2244 actions: &DegradationActions,
2245 stats: &mut EnhancedGenerationStatistics,
2246 ) -> SynthResult<DataQualityStats> {
2247 if self.phase_config.inject_data_quality
2248 && !entries.is_empty()
2249 && !actions.skip_data_quality
2250 {
2251 info!("Phase 7: Injecting Data Quality Variations");
2252 let dq_stats = self.inject_data_quality(entries)?;
2253 stats.data_quality_issues = dq_stats.records_with_issues;
2254 info!("Injected {} data quality issues", stats.data_quality_issues);
2255 self.check_resources_with_log("post-data-quality")?;
2256 Ok(dq_stats)
2257 } else if actions.skip_data_quality {
2258 warn!("Phase 7: Skipped due to resource degradation");
2259 Ok(DataQualityStats::default())
2260 } else {
2261 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2262 Ok(DataQualityStats::default())
2263 }
2264 }
2265
2266 fn phase_audit_data(
2268 &mut self,
2269 entries: &[JournalEntry],
2270 stats: &mut EnhancedGenerationStatistics,
2271 ) -> SynthResult<AuditSnapshot> {
2272 if self.phase_config.generate_audit {
2273 info!("Phase 8: Generating Audit Data");
2274 let audit_snapshot = self.generate_audit_data(entries)?;
2275 stats.audit_engagement_count = audit_snapshot.engagements.len();
2276 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2277 stats.audit_evidence_count = audit_snapshot.evidence.len();
2278 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2279 stats.audit_finding_count = audit_snapshot.findings.len();
2280 stats.audit_judgment_count = audit_snapshot.judgments.len();
2281 info!(
2282 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2283 stats.audit_engagement_count, stats.audit_workpaper_count,
2284 stats.audit_evidence_count, stats.audit_risk_count,
2285 stats.audit_finding_count, stats.audit_judgment_count
2286 );
2287 self.check_resources_with_log("post-audit")?;
2288 Ok(audit_snapshot)
2289 } else {
2290 debug!("Phase 8: Skipped (audit generation disabled)");
2291 Ok(AuditSnapshot::default())
2292 }
2293 }
2294
2295 fn phase_banking_data(
2297 &mut self,
2298 stats: &mut EnhancedGenerationStatistics,
2299 ) -> SynthResult<BankingSnapshot> {
2300 if self.phase_config.generate_banking && self.config.banking.enabled {
2301 info!("Phase 9: Generating Banking KYC/AML Data");
2302 let banking_snapshot = self.generate_banking_data()?;
2303 stats.banking_customer_count = banking_snapshot.customers.len();
2304 stats.banking_account_count = banking_snapshot.accounts.len();
2305 stats.banking_transaction_count = banking_snapshot.transactions.len();
2306 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2307 info!(
2308 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2309 stats.banking_customer_count, stats.banking_account_count,
2310 stats.banking_transaction_count, stats.banking_suspicious_count
2311 );
2312 self.check_resources_with_log("post-banking")?;
2313 Ok(banking_snapshot)
2314 } else {
2315 debug!("Phase 9: Skipped (banking generation disabled)");
2316 Ok(BankingSnapshot::default())
2317 }
2318 }
2319
2320 fn phase_graph_export(
2322 &mut self,
2323 entries: &[JournalEntry],
2324 coa: &Arc<ChartOfAccounts>,
2325 stats: &mut EnhancedGenerationStatistics,
2326 ) -> SynthResult<GraphExportSnapshot> {
2327 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2328 && !entries.is_empty()
2329 {
2330 info!("Phase 10: Exporting Accounting Network Graphs");
2331 match self.export_graphs(entries, coa, stats) {
2332 Ok(snapshot) => {
2333 info!(
2334 "Graph export complete: {} graphs ({} nodes, {} edges)",
2335 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2336 );
2337 Ok(snapshot)
2338 }
2339 Err(e) => {
2340 warn!("Phase 10: Graph export failed: {}", e);
2341 Ok(GraphExportSnapshot::default())
2342 }
2343 }
2344 } else {
2345 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2346 Ok(GraphExportSnapshot::default())
2347 }
2348 }
2349
2350 #[allow(clippy::too_many_arguments)]
2352 fn phase_hypergraph_export(
2353 &self,
2354 coa: &Arc<ChartOfAccounts>,
2355 entries: &[JournalEntry],
2356 document_flows: &DocumentFlowSnapshot,
2357 sourcing: &SourcingSnapshot,
2358 hr: &HrSnapshot,
2359 manufacturing: &ManufacturingSnapshot,
2360 banking: &BankingSnapshot,
2361 audit: &AuditSnapshot,
2362 financial_reporting: &FinancialReportingSnapshot,
2363 ocpm: &OcpmSnapshot,
2364 stats: &mut EnhancedGenerationStatistics,
2365 ) -> SynthResult<()> {
2366 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2367 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2368 match self.export_hypergraph(
2369 coa,
2370 entries,
2371 document_flows,
2372 sourcing,
2373 hr,
2374 manufacturing,
2375 banking,
2376 audit,
2377 financial_reporting,
2378 ocpm,
2379 stats,
2380 ) {
2381 Ok(info) => {
2382 info!(
2383 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2384 info.node_count, info.edge_count, info.hyperedge_count
2385 );
2386 }
2387 Err(e) => {
2388 warn!("Phase 10b: Hypergraph export failed: {}", e);
2389 }
2390 }
2391 } else {
2392 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2393 }
2394 Ok(())
2395 }
2396
2397 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2403 if !self.config.llm.enabled {
2404 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2405 return;
2406 }
2407
2408 info!("Phase 11: Starting LLM Enrichment");
2409 let start = std::time::Instant::now();
2410
2411 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2412 let provider = Arc::new(MockLlmProvider::new(self.seed));
2413 let enricher = VendorLlmEnricher::new(provider);
2414
2415 let industry = format!("{:?}", self.config.global.industry);
2416 let max_enrichments = self
2417 .config
2418 .llm
2419 .max_vendor_enrichments
2420 .min(self.master_data.vendors.len());
2421
2422 let mut enriched_count = 0usize;
2423 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2424 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2425 Ok(name) => {
2426 vendor.name = name;
2427 enriched_count += 1;
2428 }
2429 Err(e) => {
2430 warn!(
2431 "LLM vendor enrichment failed for {}: {}",
2432 vendor.vendor_id, e
2433 );
2434 }
2435 }
2436 }
2437
2438 enriched_count
2439 }));
2440
2441 match result {
2442 Ok(enriched_count) => {
2443 stats.llm_vendors_enriched = enriched_count;
2444 let elapsed = start.elapsed();
2445 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2446 info!(
2447 "Phase 11 complete: {} vendors enriched in {}ms",
2448 enriched_count, stats.llm_enrichment_ms
2449 );
2450 }
2451 Err(_) => {
2452 let elapsed = start.elapsed();
2453 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2454 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2455 }
2456 }
2457 }
2458
2459 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2465 if !self.config.diffusion.enabled {
2466 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2467 return;
2468 }
2469
2470 info!("Phase 12: Starting Diffusion Enhancement");
2471 let start = std::time::Instant::now();
2472
2473 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2474 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2477
2478 let diffusion_config = DiffusionConfig {
2479 n_steps: self.config.diffusion.n_steps,
2480 seed: self.seed,
2481 ..Default::default()
2482 };
2483
2484 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2485
2486 let n_samples = self.config.diffusion.sample_size;
2487 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2489
2490 samples.len()
2491 }));
2492
2493 match result {
2494 Ok(sample_count) => {
2495 stats.diffusion_samples_generated = sample_count;
2496 let elapsed = start.elapsed();
2497 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2498 info!(
2499 "Phase 12 complete: {} diffusion samples generated in {}ms",
2500 sample_count, stats.diffusion_enhancement_ms
2501 );
2502 }
2503 Err(_) => {
2504 let elapsed = start.elapsed();
2505 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2506 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2507 }
2508 }
2509 }
2510
2511 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2518 if !self.config.causal.enabled {
2519 debug!("Phase 13: Skipped (causal generation disabled)");
2520 return;
2521 }
2522
2523 info!("Phase 13: Starting Causal Overlay");
2524 let start = std::time::Instant::now();
2525
2526 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2527 let graph = match self.config.causal.template.as_str() {
2529 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2530 _ => CausalGraph::fraud_detection_template(),
2531 };
2532
2533 let scm = StructuralCausalModel::new(graph.clone())
2534 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
2535
2536 let n_samples = self.config.causal.sample_size;
2537 let samples = scm
2538 .generate(n_samples, self.seed)
2539 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
2540
2541 let validation_passed = if self.config.causal.validate {
2543 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2544 if report.valid {
2545 info!(
2546 "Causal validation passed: all {} checks OK",
2547 report.checks.len()
2548 );
2549 } else {
2550 warn!(
2551 "Causal validation: {} violations detected: {:?}",
2552 report.violations.len(),
2553 report.violations
2554 );
2555 }
2556 Some(report.valid)
2557 } else {
2558 None
2559 };
2560
2561 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2562 }));
2563
2564 match result {
2565 Ok(Ok((sample_count, validation_passed))) => {
2566 stats.causal_samples_generated = sample_count;
2567 stats.causal_validation_passed = validation_passed;
2568 let elapsed = start.elapsed();
2569 stats.causal_generation_ms = elapsed.as_millis() as u64;
2570 info!(
2571 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2572 sample_count, stats.causal_generation_ms, validation_passed,
2573 );
2574 }
2575 Ok(Err(e)) => {
2576 let elapsed = start.elapsed();
2577 stats.causal_generation_ms = elapsed.as_millis() as u64;
2578 warn!("Phase 13: Causal generation failed: {}", e);
2579 }
2580 Err(_) => {
2581 let elapsed = start.elapsed();
2582 stats.causal_generation_ms = elapsed.as_millis() as u64;
2583 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2584 }
2585 }
2586 }
2587
2588 fn phase_sourcing_data(
2590 &mut self,
2591 stats: &mut EnhancedGenerationStatistics,
2592 ) -> SynthResult<SourcingSnapshot> {
2593 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2594 debug!("Phase 14: Skipped (sourcing generation disabled)");
2595 return Ok(SourcingSnapshot::default());
2596 }
2597
2598 info!("Phase 14: Generating S2C Sourcing Data");
2599 let seed = self.seed;
2600
2601 let vendor_ids: Vec<String> = self
2603 .master_data
2604 .vendors
2605 .iter()
2606 .map(|v| v.vendor_id.clone())
2607 .collect();
2608 if vendor_ids.is_empty() {
2609 debug!("Phase 14: Skipped (no vendors available)");
2610 return Ok(SourcingSnapshot::default());
2611 }
2612
2613 let categories: Vec<(String, String)> = vec![
2614 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2615 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2616 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2617 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2618 ("CAT-LOG".to_string(), "Logistics".to_string()),
2619 ];
2620 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2621 .iter()
2622 .map(|(id, name)| {
2623 (
2624 id.clone(),
2625 name.clone(),
2626 rust_decimal::Decimal::from(100_000),
2627 )
2628 })
2629 .collect();
2630
2631 let company_code = self
2632 .config
2633 .companies
2634 .first()
2635 .map(|c| c.code.as_str())
2636 .unwrap_or("1000");
2637 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2638 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2639 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2640 let fiscal_year = start_date.year() as u16;
2641 let owner_ids: Vec<String> = self
2642 .master_data
2643 .employees
2644 .iter()
2645 .take(5)
2646 .map(|e| e.employee_id.clone())
2647 .collect();
2648 let owner_id = owner_ids
2649 .first()
2650 .map(std::string::String::as_str)
2651 .unwrap_or("BUYER-001");
2652
2653 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2655 let spend_analyses =
2656 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2657
2658 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2660 let sourcing_projects = if owner_ids.is_empty() {
2661 Vec::new()
2662 } else {
2663 project_gen.generate(
2664 company_code,
2665 &categories_with_spend,
2666 &owner_ids,
2667 start_date,
2668 self.config.global.period_months,
2669 )
2670 };
2671 stats.sourcing_project_count = sourcing_projects.len();
2672
2673 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2675 let mut qual_gen = QualificationGenerator::new(seed + 2);
2676 let qualifications = qual_gen.generate(
2677 company_code,
2678 &qual_vendor_ids,
2679 sourcing_projects.first().map(|p| p.project_id.as_str()),
2680 owner_id,
2681 start_date,
2682 );
2683
2684 let mut rfx_gen = RfxGenerator::new(seed + 3);
2686 let rfx_events: Vec<RfxEvent> = sourcing_projects
2687 .iter()
2688 .map(|proj| {
2689 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2690 rfx_gen.generate(
2691 company_code,
2692 &proj.project_id,
2693 &proj.category_id,
2694 &qualified_vids,
2695 owner_id,
2696 start_date,
2697 50000.0,
2698 )
2699 })
2700 .collect();
2701 stats.rfx_event_count = rfx_events.len();
2702
2703 let mut bid_gen = BidGenerator::new(seed + 4);
2705 let mut all_bids = Vec::new();
2706 for rfx in &rfx_events {
2707 let bidder_count = vendor_ids.len().clamp(2, 5);
2708 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2709 let bids = bid_gen.generate(rfx, &responding, start_date);
2710 all_bids.extend(bids);
2711 }
2712 stats.bid_count = all_bids.len();
2713
2714 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2716 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2717 .iter()
2718 .map(|rfx| {
2719 let rfx_bids: Vec<SupplierBid> = all_bids
2720 .iter()
2721 .filter(|b| b.rfx_id == rfx.rfx_id)
2722 .cloned()
2723 .collect();
2724 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2725 })
2726 .collect();
2727
2728 let mut contract_gen = ContractGenerator::new(seed + 6);
2730 let contracts: Vec<ProcurementContract> = bid_evaluations
2731 .iter()
2732 .zip(rfx_events.iter())
2733 .filter_map(|(eval, rfx)| {
2734 eval.ranked_bids.first().and_then(|winner| {
2735 all_bids
2736 .iter()
2737 .find(|b| b.bid_id == winner.bid_id)
2738 .map(|winning_bid| {
2739 contract_gen.generate_from_bid(
2740 winning_bid,
2741 Some(&rfx.sourcing_project_id),
2742 &rfx.category_id,
2743 owner_id,
2744 start_date,
2745 )
2746 })
2747 })
2748 })
2749 .collect();
2750 stats.contract_count = contracts.len();
2751
2752 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2754 let catalog_items = catalog_gen.generate(&contracts);
2755 stats.catalog_item_count = catalog_items.len();
2756
2757 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2759 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2760 .iter()
2761 .fold(
2762 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2763 |mut acc, c| {
2764 acc.entry(c.vendor_id.clone()).or_default().push(c);
2765 acc
2766 },
2767 )
2768 .into_iter()
2769 .collect();
2770 let scorecards = scorecard_gen.generate(
2771 company_code,
2772 &vendor_contracts,
2773 start_date,
2774 end_date,
2775 owner_id,
2776 );
2777 stats.scorecard_count = scorecards.len();
2778
2779 let mut sourcing_projects = sourcing_projects;
2782 for project in &mut sourcing_projects {
2783 project.rfx_ids = rfx_events
2785 .iter()
2786 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2787 .map(|rfx| rfx.rfx_id.clone())
2788 .collect();
2789
2790 project.contract_id = contracts
2792 .iter()
2793 .find(|c| {
2794 c.sourcing_project_id
2795 .as_deref()
2796 .is_some_and(|sp| sp == project.project_id)
2797 })
2798 .map(|c| c.contract_id.clone());
2799
2800 project.spend_analysis_id = spend_analyses
2802 .iter()
2803 .find(|sa| sa.category_id == project.category_id)
2804 .map(|sa| sa.category_id.clone());
2805 }
2806
2807 info!(
2808 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2809 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2810 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2811 );
2812 self.check_resources_with_log("post-sourcing")?;
2813
2814 Ok(SourcingSnapshot {
2815 spend_analyses,
2816 sourcing_projects,
2817 qualifications,
2818 rfx_events,
2819 bids: all_bids,
2820 bid_evaluations,
2821 contracts,
2822 catalog_items,
2823 scorecards,
2824 })
2825 }
2826
2827 fn phase_intercompany(
2829 &mut self,
2830 stats: &mut EnhancedGenerationStatistics,
2831 ) -> SynthResult<IntercompanySnapshot> {
2832 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2834 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2835 return Ok(IntercompanySnapshot::default());
2836 }
2837
2838 if self.config.companies.len() < 2 {
2840 debug!(
2841 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2842 self.config.companies.len()
2843 );
2844 return Ok(IntercompanySnapshot::default());
2845 }
2846
2847 info!("Phase 14b: Generating Intercompany Transactions");
2848
2849 let seed = self.seed;
2850 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2851 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2852 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2853
2854 let parent_code = self.config.companies[0].code.clone();
2857 let mut ownership_structure =
2858 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2859
2860 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2861 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2862 format!("REL{:03}", i + 1),
2863 parent_code.clone(),
2864 company.code.clone(),
2865 rust_decimal::Decimal::from(100), start_date,
2867 );
2868 ownership_structure.add_relationship(relationship);
2869 }
2870
2871 let tp_method = match self.config.intercompany.transfer_pricing_method {
2873 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2874 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2875 }
2876 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2877 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2878 }
2879 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2880 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2881 }
2882 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2883 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2884 }
2885 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2886 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2887 }
2888 };
2889
2890 let ic_currency = self
2892 .config
2893 .companies
2894 .first()
2895 .map(|c| c.currency.clone())
2896 .unwrap_or_else(|| "USD".to_string());
2897 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2898 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2899 transfer_pricing_method: tp_method,
2900 markup_percent: rust_decimal::Decimal::from_f64_retain(
2901 self.config.intercompany.markup_percent,
2902 )
2903 .unwrap_or(rust_decimal::Decimal::from(5)),
2904 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2905 default_currency: ic_currency,
2906 ..Default::default()
2907 };
2908
2909 let mut ic_generator = datasynth_generators::ICGenerator::new(
2911 ic_gen_config,
2912 ownership_structure.clone(),
2913 seed + 50,
2914 );
2915
2916 let transactions_per_day = 3;
2919 let matched_pairs = ic_generator.generate_transactions_for_period(
2920 start_date,
2921 end_date,
2922 transactions_per_day,
2923 );
2924
2925 let mut seller_entries = Vec::new();
2927 let mut buyer_entries = Vec::new();
2928 let fiscal_year = start_date.year();
2929
2930 for pair in &matched_pairs {
2931 let fiscal_period = pair.posting_date.month();
2932 let (seller_je, buyer_je) =
2933 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2934 seller_entries.push(seller_je);
2935 buyer_entries.push(buyer_je);
2936 }
2937
2938 let matching_config = datasynth_generators::ICMatchingConfig {
2940 base_currency: self
2941 .config
2942 .companies
2943 .first()
2944 .map(|c| c.currency.clone())
2945 .unwrap_or_else(|| "USD".to_string()),
2946 ..Default::default()
2947 };
2948 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2949 matching_engine.load_matched_pairs(&matched_pairs);
2950 let matching_result = matching_engine.run_matching(end_date);
2951
2952 let mut elimination_entries = Vec::new();
2954 if self.config.intercompany.generate_eliminations {
2955 let elim_config = datasynth_generators::EliminationConfig {
2956 consolidation_entity: "GROUP".to_string(),
2957 base_currency: self
2958 .config
2959 .companies
2960 .first()
2961 .map(|c| c.currency.clone())
2962 .unwrap_or_else(|| "USD".to_string()),
2963 ..Default::default()
2964 };
2965
2966 let mut elim_generator =
2967 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2968
2969 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2970 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2971 matching_result
2972 .matched_balances
2973 .iter()
2974 .chain(matching_result.unmatched_balances.iter())
2975 .cloned()
2976 .collect();
2977
2978 let journal = elim_generator.generate_eliminations(
2979 &fiscal_period,
2980 end_date,
2981 &all_balances,
2982 &matched_pairs,
2983 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2986
2987 elimination_entries = journal.entries.clone();
2988 }
2989
2990 let matched_pair_count = matched_pairs.len();
2991 let elimination_entry_count = elimination_entries.len();
2992 let match_rate = matching_result.match_rate;
2993
2994 stats.ic_matched_pair_count = matched_pair_count;
2995 stats.ic_elimination_count = elimination_entry_count;
2996 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2997
2998 info!(
2999 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3000 matched_pair_count,
3001 stats.ic_transaction_count,
3002 seller_entries.len(),
3003 buyer_entries.len(),
3004 elimination_entry_count,
3005 match_rate * 100.0
3006 );
3007 self.check_resources_with_log("post-intercompany")?;
3008
3009 Ok(IntercompanySnapshot {
3010 matched_pairs,
3011 seller_journal_entries: seller_entries,
3012 buyer_journal_entries: buyer_entries,
3013 elimination_entries,
3014 matched_pair_count,
3015 elimination_entry_count,
3016 match_rate,
3017 })
3018 }
3019
3020 fn phase_financial_reporting(
3022 &mut self,
3023 document_flows: &DocumentFlowSnapshot,
3024 journal_entries: &[JournalEntry],
3025 coa: &Arc<ChartOfAccounts>,
3026 stats: &mut EnhancedGenerationStatistics,
3027 ) -> SynthResult<FinancialReportingSnapshot> {
3028 let fs_enabled = self.phase_config.generate_financial_statements
3029 || self.config.financial_reporting.enabled;
3030 let br_enabled = self.phase_config.generate_bank_reconciliation;
3031
3032 if !fs_enabled && !br_enabled {
3033 debug!("Phase 15: Skipped (financial reporting disabled)");
3034 return Ok(FinancialReportingSnapshot::default());
3035 }
3036
3037 info!("Phase 15: Generating Financial Reporting Data");
3038
3039 let seed = self.seed;
3040 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3041 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3042
3043 let mut financial_statements = Vec::new();
3044 let mut bank_reconciliations = Vec::new();
3045 let mut trial_balances = Vec::new();
3046
3047 if fs_enabled {
3055 let company_code = self
3056 .config
3057 .companies
3058 .first()
3059 .map(|c| c.code.as_str())
3060 .unwrap_or("1000");
3061 let currency = self
3062 .config
3063 .companies
3064 .first()
3065 .map(|c| c.currency.as_str())
3066 .unwrap_or("USD");
3067 let has_journal_entries = !journal_entries.is_empty();
3068
3069 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
3072
3073 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
3075 None;
3076
3077 for period in 0..self.config.global.period_months {
3079 let period_start = start_date + chrono::Months::new(period);
3080 let period_end =
3081 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3082 let fiscal_year = period_end.year() as u16;
3083 let fiscal_period = period_end.month() as u8;
3084
3085 if has_journal_entries {
3086 let tb_entries = Self::build_cumulative_trial_balance(
3089 journal_entries,
3090 coa,
3091 company_code,
3092 start_date,
3093 period_end,
3094 fiscal_year,
3095 fiscal_period,
3096 );
3097
3098 let prior_ref = prior_cumulative_tb.as_deref();
3101 let stmts = fs_gen.generate(
3102 company_code,
3103 currency,
3104 &tb_entries,
3105 period_start,
3106 period_end,
3107 fiscal_year,
3108 fiscal_period,
3109 prior_ref,
3110 "SYS-AUTOCLOSE",
3111 );
3112
3113 for stmt in stmts {
3115 if stmt.statement_type == StatementType::CashFlowStatement {
3116 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
3118 let cf_items = Self::build_cash_flow_from_trial_balances(
3119 &tb_entries,
3120 prior_ref,
3121 net_income,
3122 );
3123 financial_statements.push(FinancialStatement {
3124 cash_flow_items: cf_items,
3125 ..stmt
3126 });
3127 } else {
3128 financial_statements.push(stmt);
3129 }
3130 }
3131
3132 trial_balances.push(PeriodTrialBalance {
3134 fiscal_year,
3135 fiscal_period,
3136 period_start,
3137 period_end,
3138 entries: tb_entries.clone(),
3139 });
3140
3141 prior_cumulative_tb = Some(tb_entries);
3143 } else {
3144 let tb_entries = Self::build_trial_balance_from_entries(
3147 journal_entries,
3148 coa,
3149 company_code,
3150 fiscal_year,
3151 fiscal_period,
3152 );
3153
3154 let stmts = fs_gen.generate(
3155 company_code,
3156 currency,
3157 &tb_entries,
3158 period_start,
3159 period_end,
3160 fiscal_year,
3161 fiscal_period,
3162 None,
3163 "SYS-AUTOCLOSE",
3164 );
3165 financial_statements.extend(stmts);
3166
3167 if !tb_entries.is_empty() {
3169 trial_balances.push(PeriodTrialBalance {
3170 fiscal_year,
3171 fiscal_period,
3172 period_start,
3173 period_end,
3174 entries: tb_entries,
3175 });
3176 }
3177 }
3178 }
3179 stats.financial_statement_count = financial_statements.len();
3180 info!(
3181 "Financial statements generated: {} statements (JE-derived: {})",
3182 stats.financial_statement_count, has_journal_entries
3183 );
3184 }
3185
3186 if br_enabled && !document_flows.payments.is_empty() {
3188 let employee_ids: Vec<String> = self
3189 .master_data
3190 .employees
3191 .iter()
3192 .map(|e| e.employee_id.clone())
3193 .collect();
3194 let mut br_gen =
3195 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3196
3197 for company in &self.config.companies {
3199 let company_payments: Vec<PaymentReference> = document_flows
3200 .payments
3201 .iter()
3202 .filter(|p| p.header.company_code == company.code)
3203 .map(|p| PaymentReference {
3204 id: p.header.document_id.clone(),
3205 amount: if p.is_vendor { p.amount } else { -p.amount },
3206 date: p.header.document_date,
3207 reference: p
3208 .check_number
3209 .clone()
3210 .or_else(|| p.wire_reference.clone())
3211 .unwrap_or_else(|| p.header.document_id.clone()),
3212 })
3213 .collect();
3214
3215 if company_payments.is_empty() {
3216 continue;
3217 }
3218
3219 let bank_account_id = format!("{}-MAIN", company.code);
3220
3221 for period in 0..self.config.global.period_months {
3223 let period_start = start_date + chrono::Months::new(period);
3224 let period_end =
3225 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3226
3227 let period_payments: Vec<PaymentReference> = company_payments
3228 .iter()
3229 .filter(|p| p.date >= period_start && p.date <= period_end)
3230 .cloned()
3231 .collect();
3232
3233 let recon = br_gen.generate(
3234 &company.code,
3235 &bank_account_id,
3236 period_start,
3237 period_end,
3238 &company.currency,
3239 &period_payments,
3240 );
3241 bank_reconciliations.push(recon);
3242 }
3243 }
3244 info!(
3245 "Bank reconciliations generated: {} reconciliations",
3246 bank_reconciliations.len()
3247 );
3248 }
3249
3250 stats.bank_reconciliation_count = bank_reconciliations.len();
3251 self.check_resources_with_log("post-financial-reporting")?;
3252
3253 if !trial_balances.is_empty() {
3254 info!(
3255 "Period-close trial balances captured: {} periods",
3256 trial_balances.len()
3257 );
3258 }
3259
3260 Ok(FinancialReportingSnapshot {
3261 financial_statements,
3262 bank_reconciliations,
3263 trial_balances,
3264 })
3265 }
3266
3267 fn build_trial_balance_from_entries(
3273 journal_entries: &[JournalEntry],
3274 coa: &ChartOfAccounts,
3275 company_code: &str,
3276 fiscal_year: u16,
3277 fiscal_period: u8,
3278 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3279 use rust_decimal::Decimal;
3280
3281 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3283 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3284
3285 for je in journal_entries {
3286 if je.header.company_code != company_code
3288 || je.header.fiscal_year != fiscal_year
3289 || je.header.fiscal_period != fiscal_period
3290 {
3291 continue;
3292 }
3293
3294 for line in &je.lines {
3295 let acct = &line.gl_account;
3296 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3297 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3298 }
3299 }
3300
3301 let mut all_accounts: Vec<&String> = account_debits
3303 .keys()
3304 .chain(account_credits.keys())
3305 .collect::<std::collections::HashSet<_>>()
3306 .into_iter()
3307 .collect();
3308 all_accounts.sort();
3309
3310 let mut entries = Vec::new();
3311
3312 for acct_number in all_accounts {
3313 let debit = account_debits
3314 .get(acct_number)
3315 .copied()
3316 .unwrap_or(Decimal::ZERO);
3317 let credit = account_credits
3318 .get(acct_number)
3319 .copied()
3320 .unwrap_or(Decimal::ZERO);
3321
3322 if debit.is_zero() && credit.is_zero() {
3323 continue;
3324 }
3325
3326 let account_name = coa
3328 .get_account(acct_number)
3329 .map(|gl| gl.short_description.clone())
3330 .unwrap_or_else(|| format!("Account {acct_number}"));
3331
3332 let category = Self::category_from_account_code(acct_number);
3337
3338 entries.push(datasynth_generators::TrialBalanceEntry {
3339 account_code: acct_number.clone(),
3340 account_name,
3341 category,
3342 debit_balance: debit,
3343 credit_balance: credit,
3344 });
3345 }
3346
3347 entries
3348 }
3349
3350 fn build_cumulative_trial_balance(
3357 journal_entries: &[JournalEntry],
3358 coa: &ChartOfAccounts,
3359 company_code: &str,
3360 start_date: NaiveDate,
3361 period_end: NaiveDate,
3362 fiscal_year: u16,
3363 fiscal_period: u8,
3364 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3365 use rust_decimal::Decimal;
3366
3367 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3369 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3370
3371 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3373 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3374
3375 for je in journal_entries {
3376 if je.header.company_code != company_code {
3377 continue;
3378 }
3379
3380 for line in &je.lines {
3381 let acct = &line.gl_account;
3382 let category = Self::category_from_account_code(acct);
3383 let is_bs_account = matches!(
3384 category.as_str(),
3385 "Cash"
3386 | "Receivables"
3387 | "Inventory"
3388 | "FixedAssets"
3389 | "Payables"
3390 | "AccruedLiabilities"
3391 | "LongTermDebt"
3392 | "Equity"
3393 );
3394
3395 if is_bs_account {
3396 if je.header.document_date <= period_end
3398 && je.header.document_date >= start_date
3399 {
3400 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3401 line.debit_amount;
3402 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3403 line.credit_amount;
3404 }
3405 } else {
3406 if je.header.fiscal_year == fiscal_year
3408 && je.header.fiscal_period == fiscal_period
3409 {
3410 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3411 line.debit_amount;
3412 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3413 line.credit_amount;
3414 }
3415 }
3416 }
3417 }
3418
3419 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3421 all_accounts.extend(bs_debits.keys().cloned());
3422 all_accounts.extend(bs_credits.keys().cloned());
3423 all_accounts.extend(is_debits.keys().cloned());
3424 all_accounts.extend(is_credits.keys().cloned());
3425
3426 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3427 sorted_accounts.sort();
3428
3429 let mut entries = Vec::new();
3430
3431 for acct_number in &sorted_accounts {
3432 let category = Self::category_from_account_code(acct_number);
3433 let is_bs_account = matches!(
3434 category.as_str(),
3435 "Cash"
3436 | "Receivables"
3437 | "Inventory"
3438 | "FixedAssets"
3439 | "Payables"
3440 | "AccruedLiabilities"
3441 | "LongTermDebt"
3442 | "Equity"
3443 );
3444
3445 let (debit, credit) = if is_bs_account {
3446 (
3447 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3448 bs_credits
3449 .get(acct_number)
3450 .copied()
3451 .unwrap_or(Decimal::ZERO),
3452 )
3453 } else {
3454 (
3455 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3456 is_credits
3457 .get(acct_number)
3458 .copied()
3459 .unwrap_or(Decimal::ZERO),
3460 )
3461 };
3462
3463 if debit.is_zero() && credit.is_zero() {
3464 continue;
3465 }
3466
3467 let account_name = coa
3468 .get_account(acct_number)
3469 .map(|gl| gl.short_description.clone())
3470 .unwrap_or_else(|| format!("Account {acct_number}"));
3471
3472 entries.push(datasynth_generators::TrialBalanceEntry {
3473 account_code: acct_number.clone(),
3474 account_name,
3475 category,
3476 debit_balance: debit,
3477 credit_balance: credit,
3478 });
3479 }
3480
3481 entries
3482 }
3483
3484 fn build_cash_flow_from_trial_balances(
3489 current_tb: &[datasynth_generators::TrialBalanceEntry],
3490 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3491 net_income: rust_decimal::Decimal,
3492 ) -> Vec<CashFlowItem> {
3493 use rust_decimal::Decimal;
3494
3495 let aggregate =
3497 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3498 let mut map: HashMap<String, Decimal> = HashMap::new();
3499 for entry in tb {
3500 let net = entry.debit_balance - entry.credit_balance;
3501 *map.entry(entry.category.clone()).or_default() += net;
3502 }
3503 map
3504 };
3505
3506 let current = aggregate(current_tb);
3507 let prior = prior_tb.map(aggregate);
3508
3509 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3511 *map.get(key).unwrap_or(&Decimal::ZERO)
3512 };
3513
3514 let change = |key: &str| -> Decimal {
3516 let curr = get(¤t, key);
3517 match &prior {
3518 Some(p) => curr - get(p, key),
3519 None => curr,
3520 }
3521 };
3522
3523 let fixed_asset_change = change("FixedAssets");
3526 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3527 -fixed_asset_change
3528 } else {
3529 Decimal::ZERO
3530 };
3531
3532 let ar_change = change("Receivables");
3534 let inventory_change = change("Inventory");
3535 let ap_change = change("Payables");
3537 let accrued_change = change("AccruedLiabilities");
3538
3539 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3540 + (-ap_change)
3541 + (-accrued_change);
3542
3543 let capex = if fixed_asset_change > Decimal::ZERO {
3545 -fixed_asset_change
3546 } else {
3547 Decimal::ZERO
3548 };
3549 let investing_cf = capex;
3550
3551 let debt_change = -change("LongTermDebt");
3553 let equity_change = -change("Equity");
3554 let financing_cf = debt_change + equity_change;
3555
3556 let net_change = operating_cf + investing_cf + financing_cf;
3557
3558 vec![
3559 CashFlowItem {
3560 item_code: "CF-NI".to_string(),
3561 label: "Net Income".to_string(),
3562 category: CashFlowCategory::Operating,
3563 amount: net_income,
3564 amount_prior: None,
3565 sort_order: 1,
3566 is_total: false,
3567 },
3568 CashFlowItem {
3569 item_code: "CF-DEP".to_string(),
3570 label: "Depreciation & Amortization".to_string(),
3571 category: CashFlowCategory::Operating,
3572 amount: depreciation_addback,
3573 amount_prior: None,
3574 sort_order: 2,
3575 is_total: false,
3576 },
3577 CashFlowItem {
3578 item_code: "CF-AR".to_string(),
3579 label: "Change in Accounts Receivable".to_string(),
3580 category: CashFlowCategory::Operating,
3581 amount: -ar_change,
3582 amount_prior: None,
3583 sort_order: 3,
3584 is_total: false,
3585 },
3586 CashFlowItem {
3587 item_code: "CF-AP".to_string(),
3588 label: "Change in Accounts Payable".to_string(),
3589 category: CashFlowCategory::Operating,
3590 amount: -ap_change,
3591 amount_prior: None,
3592 sort_order: 4,
3593 is_total: false,
3594 },
3595 CashFlowItem {
3596 item_code: "CF-INV".to_string(),
3597 label: "Change in Inventory".to_string(),
3598 category: CashFlowCategory::Operating,
3599 amount: -inventory_change,
3600 amount_prior: None,
3601 sort_order: 5,
3602 is_total: false,
3603 },
3604 CashFlowItem {
3605 item_code: "CF-OP".to_string(),
3606 label: "Net Cash from Operating Activities".to_string(),
3607 category: CashFlowCategory::Operating,
3608 amount: operating_cf,
3609 amount_prior: None,
3610 sort_order: 6,
3611 is_total: true,
3612 },
3613 CashFlowItem {
3614 item_code: "CF-CAPEX".to_string(),
3615 label: "Capital Expenditures".to_string(),
3616 category: CashFlowCategory::Investing,
3617 amount: capex,
3618 amount_prior: None,
3619 sort_order: 7,
3620 is_total: false,
3621 },
3622 CashFlowItem {
3623 item_code: "CF-INV-T".to_string(),
3624 label: "Net Cash from Investing Activities".to_string(),
3625 category: CashFlowCategory::Investing,
3626 amount: investing_cf,
3627 amount_prior: None,
3628 sort_order: 8,
3629 is_total: true,
3630 },
3631 CashFlowItem {
3632 item_code: "CF-DEBT".to_string(),
3633 label: "Net Borrowings / (Repayments)".to_string(),
3634 category: CashFlowCategory::Financing,
3635 amount: debt_change,
3636 amount_prior: None,
3637 sort_order: 9,
3638 is_total: false,
3639 },
3640 CashFlowItem {
3641 item_code: "CF-EQ".to_string(),
3642 label: "Equity Changes".to_string(),
3643 category: CashFlowCategory::Financing,
3644 amount: equity_change,
3645 amount_prior: None,
3646 sort_order: 10,
3647 is_total: false,
3648 },
3649 CashFlowItem {
3650 item_code: "CF-FIN-T".to_string(),
3651 label: "Net Cash from Financing Activities".to_string(),
3652 category: CashFlowCategory::Financing,
3653 amount: financing_cf,
3654 amount_prior: None,
3655 sort_order: 11,
3656 is_total: true,
3657 },
3658 CashFlowItem {
3659 item_code: "CF-NET".to_string(),
3660 label: "Net Change in Cash".to_string(),
3661 category: CashFlowCategory::Operating,
3662 amount: net_change,
3663 amount_prior: None,
3664 sort_order: 12,
3665 is_total: true,
3666 },
3667 ]
3668 }
3669
3670 fn calculate_net_income_from_tb(
3674 tb: &[datasynth_generators::TrialBalanceEntry],
3675 ) -> rust_decimal::Decimal {
3676 use rust_decimal::Decimal;
3677
3678 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3679 for entry in tb {
3680 let net = entry.debit_balance - entry.credit_balance;
3681 *aggregated.entry(entry.category.clone()).or_default() += net;
3682 }
3683
3684 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3685 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3686 let opex = *aggregated
3687 .get("OperatingExpenses")
3688 .unwrap_or(&Decimal::ZERO);
3689 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3690 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3691
3692 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3695 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
3697 operating_income - tax
3698 }
3699
3700 fn category_from_account_code(code: &str) -> String {
3707 let prefix: String = code.chars().take(2).collect();
3708 match prefix.as_str() {
3709 "10" => "Cash",
3710 "11" => "Receivables",
3711 "12" | "13" | "14" => "Inventory",
3712 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3713 "20" => "Payables",
3714 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3715 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3716 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3717 "40" | "41" | "42" | "43" | "44" => "Revenue",
3718 "50" | "51" | "52" => "CostOfSales",
3719 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3720 "OperatingExpenses"
3721 }
3722 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3723 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3724 _ => "OperatingExpenses",
3725 }
3726 .to_string()
3727 }
3728
3729 fn phase_hr_data(
3731 &mut self,
3732 stats: &mut EnhancedGenerationStatistics,
3733 ) -> SynthResult<HrSnapshot> {
3734 if !self.config.hr.enabled {
3735 debug!("Phase 16: Skipped (HR generation disabled)");
3736 return Ok(HrSnapshot::default());
3737 }
3738
3739 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3740
3741 let seed = self.seed;
3742 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3743 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3744 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3745 let company_code = self
3746 .config
3747 .companies
3748 .first()
3749 .map(|c| c.code.as_str())
3750 .unwrap_or("1000");
3751 let currency = self
3752 .config
3753 .companies
3754 .first()
3755 .map(|c| c.currency.as_str())
3756 .unwrap_or("USD");
3757
3758 let employee_ids: Vec<String> = self
3759 .master_data
3760 .employees
3761 .iter()
3762 .map(|e| e.employee_id.clone())
3763 .collect();
3764
3765 if employee_ids.is_empty() {
3766 debug!("Phase 16: Skipped (no employees available)");
3767 return Ok(HrSnapshot::default());
3768 }
3769
3770 let cost_center_ids: Vec<String> = self
3773 .master_data
3774 .employees
3775 .iter()
3776 .filter_map(|e| e.cost_center.clone())
3777 .collect::<std::collections::HashSet<_>>()
3778 .into_iter()
3779 .collect();
3780
3781 let mut snapshot = HrSnapshot::default();
3782
3783 if self.config.hr.payroll.enabled {
3785 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3786 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3787
3788 let payroll_pack = self.primary_pack();
3790
3791 payroll_gen.set_country_pack(payroll_pack.clone());
3794
3795 let employees_with_salary: Vec<(
3796 String,
3797 rust_decimal::Decimal,
3798 Option<String>,
3799 Option<String>,
3800 )> = self
3801 .master_data
3802 .employees
3803 .iter()
3804 .map(|e| {
3805 (
3806 e.employee_id.clone(),
3807 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3809 e.department_id.clone(),
3810 )
3811 })
3812 .collect();
3813
3814 for month in 0..self.config.global.period_months {
3815 let period_start = start_date + chrono::Months::new(month);
3816 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3817 let (run, items) = payroll_gen.generate(
3818 company_code,
3819 &employees_with_salary,
3820 period_start,
3821 period_end,
3822 currency,
3823 );
3824 snapshot.payroll_runs.push(run);
3825 snapshot.payroll_run_count += 1;
3826 snapshot.payroll_line_item_count += items.len();
3827 snapshot.payroll_line_items.extend(items);
3828 }
3829 }
3830
3831 if self.config.hr.time_attendance.enabled {
3833 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3834 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3835 let entries = time_gen.generate(
3836 &employee_ids,
3837 start_date,
3838 end_date,
3839 &self.config.hr.time_attendance,
3840 );
3841 snapshot.time_entry_count = entries.len();
3842 snapshot.time_entries = entries;
3843 }
3844
3845 if self.config.hr.expenses.enabled {
3847 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3848 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3849 expense_gen.set_country_pack(self.primary_pack().clone());
3850 let company_currency = self
3851 .config
3852 .companies
3853 .first()
3854 .map(|c| c.currency.as_str())
3855 .unwrap_or("USD");
3856 let reports = expense_gen.generate_with_currency(
3857 &employee_ids,
3858 start_date,
3859 end_date,
3860 &self.config.hr.expenses,
3861 company_currency,
3862 );
3863 snapshot.expense_report_count = reports.len();
3864 snapshot.expense_reports = reports;
3865 }
3866
3867 if self.config.hr.payroll.enabled {
3869 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
3870 let employee_pairs: Vec<(String, String)> = self
3871 .master_data
3872 .employees
3873 .iter()
3874 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
3875 .collect();
3876 let enrollments =
3877 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
3878 snapshot.benefit_enrollment_count = enrollments.len();
3879 snapshot.benefit_enrollments = enrollments;
3880 }
3881
3882 stats.payroll_run_count = snapshot.payroll_run_count;
3883 stats.time_entry_count = snapshot.time_entry_count;
3884 stats.expense_report_count = snapshot.expense_report_count;
3885 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
3886
3887 info!(
3888 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments",
3889 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3890 snapshot.time_entry_count, snapshot.expense_report_count,
3891 snapshot.benefit_enrollment_count
3892 );
3893 self.check_resources_with_log("post-hr")?;
3894
3895 Ok(snapshot)
3896 }
3897
3898 fn phase_accounting_standards(
3900 &mut self,
3901 stats: &mut EnhancedGenerationStatistics,
3902 ) -> SynthResult<AccountingStandardsSnapshot> {
3903 if !self.phase_config.generate_accounting_standards
3904 || !self.config.accounting_standards.enabled
3905 {
3906 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3907 return Ok(AccountingStandardsSnapshot::default());
3908 }
3909 info!("Phase 17: Generating Accounting Standards Data");
3910
3911 let seed = self.seed;
3912 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3913 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3914 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3915 let company_code = self
3916 .config
3917 .companies
3918 .first()
3919 .map(|c| c.code.as_str())
3920 .unwrap_or("1000");
3921 let currency = self
3922 .config
3923 .companies
3924 .first()
3925 .map(|c| c.currency.as_str())
3926 .unwrap_or("USD");
3927
3928 let framework = match self.config.accounting_standards.framework {
3933 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3934 datasynth_standards::framework::AccountingFramework::UsGaap
3935 }
3936 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3937 datasynth_standards::framework::AccountingFramework::Ifrs
3938 }
3939 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3940 datasynth_standards::framework::AccountingFramework::DualReporting
3941 }
3942 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3943 datasynth_standards::framework::AccountingFramework::FrenchGaap
3944 }
3945 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3946 datasynth_standards::framework::AccountingFramework::GermanGaap
3947 }
3948 None => {
3949 let pack = self.primary_pack();
3951 let pack_fw = pack.accounting.framework.as_str();
3952 match pack_fw {
3953 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3954 "dual_reporting" => {
3955 datasynth_standards::framework::AccountingFramework::DualReporting
3956 }
3957 "french_gaap" => {
3958 datasynth_standards::framework::AccountingFramework::FrenchGaap
3959 }
3960 "german_gaap" | "hgb" => {
3961 datasynth_standards::framework::AccountingFramework::GermanGaap
3962 }
3963 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3965 }
3966 }
3967 };
3968
3969 let mut snapshot = AccountingStandardsSnapshot::default();
3970
3971 if self.config.accounting_standards.revenue_recognition.enabled {
3973 let customer_ids: Vec<String> = self
3974 .master_data
3975 .customers
3976 .iter()
3977 .map(|c| c.customer_id.clone())
3978 .collect();
3979
3980 if !customer_ids.is_empty() {
3981 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3982 let contracts = rev_gen.generate(
3983 company_code,
3984 &customer_ids,
3985 start_date,
3986 end_date,
3987 currency,
3988 &self.config.accounting_standards.revenue_recognition,
3989 framework,
3990 );
3991 snapshot.revenue_contract_count = contracts.len();
3992 snapshot.contracts = contracts;
3993 }
3994 }
3995
3996 if self.config.accounting_standards.impairment.enabled {
3998 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3999 .master_data
4000 .assets
4001 .iter()
4002 .map(|a| {
4003 (
4004 a.asset_id.clone(),
4005 a.description.clone(),
4006 a.acquisition_cost,
4007 )
4008 })
4009 .collect();
4010
4011 if !asset_data.is_empty() {
4012 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
4013 let tests = imp_gen.generate(
4014 company_code,
4015 &asset_data,
4016 end_date,
4017 &self.config.accounting_standards.impairment,
4018 framework,
4019 );
4020 snapshot.impairment_test_count = tests.len();
4021 snapshot.impairment_tests = tests;
4022 }
4023 }
4024
4025 stats.revenue_contract_count = snapshot.revenue_contract_count;
4026 stats.impairment_test_count = snapshot.impairment_test_count;
4027
4028 info!(
4029 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
4030 snapshot.revenue_contract_count, snapshot.impairment_test_count
4031 );
4032 self.check_resources_with_log("post-accounting-standards")?;
4033
4034 Ok(snapshot)
4035 }
4036
4037 fn phase_manufacturing(
4039 &mut self,
4040 stats: &mut EnhancedGenerationStatistics,
4041 ) -> SynthResult<ManufacturingSnapshot> {
4042 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
4043 debug!("Phase 18: Skipped (manufacturing generation disabled)");
4044 return Ok(ManufacturingSnapshot::default());
4045 }
4046 info!("Phase 18: Generating Manufacturing Data");
4047
4048 let seed = self.seed;
4049 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4050 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4051 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4052 let company_code = self
4053 .config
4054 .companies
4055 .first()
4056 .map(|c| c.code.as_str())
4057 .unwrap_or("1000");
4058
4059 let material_data: Vec<(String, String)> = self
4060 .master_data
4061 .materials
4062 .iter()
4063 .map(|m| (m.material_id.clone(), m.description.clone()))
4064 .collect();
4065
4066 if material_data.is_empty() {
4067 debug!("Phase 18: Skipped (no materials available)");
4068 return Ok(ManufacturingSnapshot::default());
4069 }
4070
4071 let mut snapshot = ManufacturingSnapshot::default();
4072
4073 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
4075 let production_orders = prod_gen.generate(
4076 company_code,
4077 &material_data,
4078 start_date,
4079 end_date,
4080 &self.config.manufacturing.production_orders,
4081 &self.config.manufacturing.costing,
4082 &self.config.manufacturing.routing,
4083 );
4084 snapshot.production_order_count = production_orders.len();
4085
4086 let inspection_data: Vec<(String, String, String)> = production_orders
4088 .iter()
4089 .map(|po| {
4090 (
4091 po.order_id.clone(),
4092 po.material_id.clone(),
4093 po.material_description.clone(),
4094 )
4095 })
4096 .collect();
4097
4098 snapshot.production_orders = production_orders;
4099
4100 if !inspection_data.is_empty() {
4101 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
4102 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
4103 snapshot.quality_inspection_count = inspections.len();
4104 snapshot.quality_inspections = inspections;
4105 }
4106
4107 let storage_locations: Vec<(String, String)> = material_data
4109 .iter()
4110 .enumerate()
4111 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
4112 .collect();
4113
4114 let employee_ids: Vec<String> = self
4115 .master_data
4116 .employees
4117 .iter()
4118 .map(|e| e.employee_id.clone())
4119 .collect();
4120 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
4121 .with_employee_pool(employee_ids);
4122 let mut cycle_count_total = 0usize;
4123 for month in 0..self.config.global.period_months {
4124 let count_date = start_date + chrono::Months::new(month);
4125 let items_per_count = storage_locations.len().clamp(10, 50);
4126 let cc = cc_gen.generate(
4127 company_code,
4128 &storage_locations,
4129 count_date,
4130 items_per_count,
4131 );
4132 snapshot.cycle_counts.push(cc);
4133 cycle_count_total += 1;
4134 }
4135 snapshot.cycle_count_count = cycle_count_total;
4136
4137 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
4139 let bom_components = bom_gen.generate(company_code, &material_data);
4140 snapshot.bom_component_count = bom_components.len();
4141 snapshot.bom_components = bom_components;
4142
4143 let currency = self
4145 .config
4146 .companies
4147 .first()
4148 .map(|c| c.currency.as_str())
4149 .unwrap_or("USD");
4150 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
4151 let inventory_movements = inv_mov_gen.generate(
4152 company_code,
4153 &material_data,
4154 start_date,
4155 end_date,
4156 2,
4157 currency,
4158 );
4159 snapshot.inventory_movement_count = inventory_movements.len();
4160 snapshot.inventory_movements = inventory_movements;
4161
4162 stats.production_order_count = snapshot.production_order_count;
4163 stats.quality_inspection_count = snapshot.quality_inspection_count;
4164 stats.cycle_count_count = snapshot.cycle_count_count;
4165 stats.bom_component_count = snapshot.bom_component_count;
4166 stats.inventory_movement_count = snapshot.inventory_movement_count;
4167
4168 info!(
4169 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
4170 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
4171 snapshot.bom_component_count, snapshot.inventory_movement_count
4172 );
4173 self.check_resources_with_log("post-manufacturing")?;
4174
4175 Ok(snapshot)
4176 }
4177
4178 fn phase_sales_kpi_budgets(
4180 &mut self,
4181 coa: &Arc<ChartOfAccounts>,
4182 financial_reporting: &FinancialReportingSnapshot,
4183 stats: &mut EnhancedGenerationStatistics,
4184 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
4185 if !self.phase_config.generate_sales_kpi_budgets {
4186 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4187 return Ok(SalesKpiBudgetsSnapshot::default());
4188 }
4189 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4190
4191 let seed = self.seed;
4192 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4193 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4194 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4195 let company_code = self
4196 .config
4197 .companies
4198 .first()
4199 .map(|c| c.code.as_str())
4200 .unwrap_or("1000");
4201
4202 let mut snapshot = SalesKpiBudgetsSnapshot::default();
4203
4204 if self.config.sales_quotes.enabled {
4206 let customer_data: Vec<(String, String)> = self
4207 .master_data
4208 .customers
4209 .iter()
4210 .map(|c| (c.customer_id.clone(), c.name.clone()))
4211 .collect();
4212 let material_data: Vec<(String, String)> = self
4213 .master_data
4214 .materials
4215 .iter()
4216 .map(|m| (m.material_id.clone(), m.description.clone()))
4217 .collect();
4218
4219 if !customer_data.is_empty() && !material_data.is_empty() {
4220 let employee_ids: Vec<String> = self
4221 .master_data
4222 .employees
4223 .iter()
4224 .map(|e| e.employee_id.clone())
4225 .collect();
4226 let customer_ids: Vec<String> = self
4227 .master_data
4228 .customers
4229 .iter()
4230 .map(|c| c.customer_id.clone())
4231 .collect();
4232 let company_currency = self
4233 .config
4234 .companies
4235 .first()
4236 .map(|c| c.currency.as_str())
4237 .unwrap_or("USD");
4238
4239 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4240 .with_pools(employee_ids, customer_ids);
4241 let quotes = quote_gen.generate_with_currency(
4242 company_code,
4243 &customer_data,
4244 &material_data,
4245 start_date,
4246 end_date,
4247 &self.config.sales_quotes,
4248 company_currency,
4249 );
4250 snapshot.sales_quote_count = quotes.len();
4251 snapshot.sales_quotes = quotes;
4252 }
4253 }
4254
4255 if self.config.financial_reporting.management_kpis.enabled {
4257 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4258 let mut kpis = kpi_gen.generate(
4259 company_code,
4260 start_date,
4261 end_date,
4262 &self.config.financial_reporting.management_kpis,
4263 );
4264
4265 {
4267 use rust_decimal::Decimal;
4268
4269 if let Some(income_stmt) =
4270 financial_reporting.financial_statements.iter().find(|fs| {
4271 fs.statement_type == StatementType::IncomeStatement
4272 && fs.company_code == company_code
4273 })
4274 {
4275 let total_revenue: Decimal = income_stmt
4277 .line_items
4278 .iter()
4279 .filter(|li| li.section.contains("Revenue") && !li.is_total)
4280 .map(|li| li.amount)
4281 .sum();
4282 let total_cogs: Decimal = income_stmt
4283 .line_items
4284 .iter()
4285 .filter(|li| {
4286 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4287 && !li.is_total
4288 })
4289 .map(|li| li.amount.abs())
4290 .sum();
4291 let total_opex: Decimal = income_stmt
4292 .line_items
4293 .iter()
4294 .filter(|li| {
4295 li.section.contains("Expense")
4296 && !li.is_total
4297 && !li.section.contains("Cost")
4298 })
4299 .map(|li| li.amount.abs())
4300 .sum();
4301
4302 if total_revenue > Decimal::ZERO {
4303 let hundred = Decimal::from(100);
4304 let gross_margin_pct =
4305 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4306 let operating_income = total_revenue - total_cogs - total_opex;
4307 let op_margin_pct =
4308 (operating_income * hundred / total_revenue).round_dp(2);
4309
4310 for kpi in &mut kpis {
4312 if kpi.name == "Gross Margin" {
4313 kpi.value = gross_margin_pct;
4314 } else if kpi.name == "Operating Margin" {
4315 kpi.value = op_margin_pct;
4316 }
4317 }
4318 }
4319 }
4320
4321 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4323 fs.statement_type == StatementType::BalanceSheet
4324 && fs.company_code == company_code
4325 }) {
4326 let current_assets: Decimal = bs
4327 .line_items
4328 .iter()
4329 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4330 .map(|li| li.amount)
4331 .sum();
4332 let current_liabilities: Decimal = bs
4333 .line_items
4334 .iter()
4335 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4336 .map(|li| li.amount.abs())
4337 .sum();
4338
4339 if current_liabilities > Decimal::ZERO {
4340 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4341 for kpi in &mut kpis {
4342 if kpi.name == "Current Ratio" {
4343 kpi.value = current_ratio;
4344 }
4345 }
4346 }
4347 }
4348 }
4349
4350 snapshot.kpi_count = kpis.len();
4351 snapshot.kpis = kpis;
4352 }
4353
4354 if self.config.financial_reporting.budgets.enabled {
4356 let account_data: Vec<(String, String)> = coa
4357 .accounts
4358 .iter()
4359 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4360 .collect();
4361
4362 if !account_data.is_empty() {
4363 let fiscal_year = start_date.year() as u32;
4364 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4365 let budget = budget_gen.generate(
4366 company_code,
4367 fiscal_year,
4368 &account_data,
4369 &self.config.financial_reporting.budgets,
4370 );
4371 snapshot.budget_line_count = budget.line_items.len();
4372 snapshot.budgets.push(budget);
4373 }
4374 }
4375
4376 stats.sales_quote_count = snapshot.sales_quote_count;
4377 stats.kpi_count = snapshot.kpi_count;
4378 stats.budget_line_count = snapshot.budget_line_count;
4379
4380 info!(
4381 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4382 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4383 );
4384 self.check_resources_with_log("post-sales-kpi-budgets")?;
4385
4386 Ok(snapshot)
4387 }
4388
4389 fn phase_tax_generation(
4391 &mut self,
4392 document_flows: &DocumentFlowSnapshot,
4393 stats: &mut EnhancedGenerationStatistics,
4394 ) -> SynthResult<TaxSnapshot> {
4395 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4396 debug!("Phase 20: Skipped (tax generation disabled)");
4397 return Ok(TaxSnapshot::default());
4398 }
4399 info!("Phase 20: Generating Tax Data");
4400
4401 let seed = self.seed;
4402 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4403 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4404 let fiscal_year = start_date.year();
4405 let company_code = self
4406 .config
4407 .companies
4408 .first()
4409 .map(|c| c.code.as_str())
4410 .unwrap_or("1000");
4411
4412 let mut gen =
4413 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4414
4415 let pack = self.primary_pack().clone();
4416 let (jurisdictions, codes) =
4417 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4418
4419 let mut provisions = Vec::new();
4421 if self.config.tax.provisions.enabled {
4422 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4423 for company in &self.config.companies {
4424 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4425 let statutory_rate = rust_decimal::Decimal::new(
4426 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
4427 2,
4428 );
4429 let provision = provision_gen.generate(
4430 &company.code,
4431 start_date,
4432 pre_tax_income,
4433 statutory_rate,
4434 );
4435 provisions.push(provision);
4436 }
4437 }
4438
4439 let mut tax_lines = Vec::new();
4441 if !codes.is_empty() {
4442 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4443 datasynth_generators::TaxLineGeneratorConfig::default(),
4444 codes.clone(),
4445 seed + 72,
4446 );
4447
4448 let buyer_country = self
4451 .config
4452 .companies
4453 .first()
4454 .map(|c| c.country.as_str())
4455 .unwrap_or("US");
4456 for vi in &document_flows.vendor_invoices {
4457 let lines = tax_line_gen.generate_for_document(
4458 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4459 &vi.header.document_id,
4460 buyer_country, buyer_country,
4462 vi.payable_amount,
4463 vi.header.document_date,
4464 None,
4465 );
4466 tax_lines.extend(lines);
4467 }
4468
4469 for ci in &document_flows.customer_invoices {
4471 let lines = tax_line_gen.generate_for_document(
4472 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4473 &ci.header.document_id,
4474 buyer_country, buyer_country,
4476 ci.total_gross_amount,
4477 ci.header.document_date,
4478 None,
4479 );
4480 tax_lines.extend(lines);
4481 }
4482 }
4483
4484 let snapshot = TaxSnapshot {
4485 jurisdiction_count: jurisdictions.len(),
4486 code_count: codes.len(),
4487 jurisdictions,
4488 codes,
4489 tax_provisions: provisions,
4490 tax_lines,
4491 tax_returns: Vec::new(),
4492 withholding_records: Vec::new(),
4493 tax_anomaly_labels: Vec::new(),
4494 };
4495
4496 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4497 stats.tax_code_count = snapshot.code_count;
4498 stats.tax_provision_count = snapshot.tax_provisions.len();
4499 stats.tax_line_count = snapshot.tax_lines.len();
4500
4501 info!(
4502 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4503 snapshot.jurisdiction_count,
4504 snapshot.code_count,
4505 snapshot.tax_provisions.len()
4506 );
4507 self.check_resources_with_log("post-tax")?;
4508
4509 Ok(snapshot)
4510 }
4511
4512 fn phase_esg_generation(
4514 &mut self,
4515 document_flows: &DocumentFlowSnapshot,
4516 stats: &mut EnhancedGenerationStatistics,
4517 ) -> SynthResult<EsgSnapshot> {
4518 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4519 debug!("Phase 21: Skipped (ESG generation disabled)");
4520 return Ok(EsgSnapshot::default());
4521 }
4522 info!("Phase 21: Generating ESG Data");
4523
4524 let seed = self.seed;
4525 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4526 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4527 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4528 let entity_id = self
4529 .config
4530 .companies
4531 .first()
4532 .map(|c| c.code.as_str())
4533 .unwrap_or("1000");
4534
4535 let esg_cfg = &self.config.esg;
4536 let mut snapshot = EsgSnapshot::default();
4537
4538 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4540 esg_cfg.environmental.energy.clone(),
4541 seed + 80,
4542 );
4543 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4544
4545 let facility_count = esg_cfg.environmental.energy.facility_count;
4547 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4548 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4549
4550 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4552 seed + 82,
4553 esg_cfg.environmental.waste.diversion_target,
4554 facility_count,
4555 );
4556 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4557
4558 let mut emission_gen =
4560 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4561
4562 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4564 .iter()
4565 .map(|e| datasynth_generators::EnergyInput {
4566 facility_id: e.facility_id.clone(),
4567 energy_type: match e.energy_source {
4568 EnergySourceType::NaturalGas => {
4569 datasynth_generators::EnergyInputType::NaturalGas
4570 }
4571 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4572 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4573 _ => datasynth_generators::EnergyInputType::Electricity,
4574 },
4575 consumption_kwh: e.consumption_kwh,
4576 period: e.period,
4577 })
4578 .collect();
4579
4580 let mut emissions = Vec::new();
4581 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4582 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4583
4584 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4586 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4587 for payment in &document_flows.payments {
4588 if payment.is_vendor {
4589 *totals
4590 .entry(payment.business_partner_id.clone())
4591 .or_default() += payment.amount;
4592 }
4593 }
4594 totals
4595 };
4596 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4597 .master_data
4598 .vendors
4599 .iter()
4600 .map(|v| {
4601 let spend = vendor_payment_totals
4602 .get(&v.vendor_id)
4603 .copied()
4604 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4605 datasynth_generators::VendorSpendInput {
4606 vendor_id: v.vendor_id.clone(),
4607 category: format!("{:?}", v.vendor_type).to_lowercase(),
4608 spend,
4609 country: v.country.clone(),
4610 }
4611 })
4612 .collect();
4613 if !vendor_spend.is_empty() {
4614 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4615 entity_id,
4616 &vendor_spend,
4617 start_date,
4618 end_date,
4619 ));
4620 }
4621
4622 let headcount = self.master_data.employees.len() as u32;
4624 if headcount > 0 {
4625 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4626 emissions.extend(emission_gen.generate_scope3_business_travel(
4627 entity_id,
4628 travel_spend,
4629 start_date,
4630 ));
4631 emissions
4632 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4633 }
4634
4635 snapshot.emission_count = emissions.len();
4636 snapshot.emissions = emissions;
4637 snapshot.energy = energy_records;
4638
4639 let mut workforce_gen =
4641 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4642 let total_headcount = headcount.max(100);
4643 snapshot.diversity =
4644 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4645 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4646 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4647 entity_id,
4648 facility_count,
4649 start_date,
4650 end_date,
4651 );
4652
4653 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4656 entity_id,
4657 &snapshot.safety_incidents,
4658 total_hours,
4659 start_date,
4660 );
4661 snapshot.safety_metrics = vec![safety_metric];
4662
4663 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4665 seed + 85,
4666 esg_cfg.governance.board_size,
4667 esg_cfg.governance.independence_target,
4668 );
4669 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4670
4671 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4673 esg_cfg.supply_chain_esg.clone(),
4674 seed + 86,
4675 );
4676 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4677 .master_data
4678 .vendors
4679 .iter()
4680 .map(|v| datasynth_generators::VendorInput {
4681 vendor_id: v.vendor_id.clone(),
4682 country: v.country.clone(),
4683 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4684 quality_score: None,
4685 })
4686 .collect();
4687 snapshot.supplier_assessments =
4688 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4689
4690 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4692 seed + 87,
4693 esg_cfg.reporting.clone(),
4694 esg_cfg.climate_scenarios.clone(),
4695 );
4696 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4697 snapshot.disclosures = disclosure_gen.generate_disclosures(
4698 entity_id,
4699 &snapshot.materiality,
4700 start_date,
4701 end_date,
4702 );
4703 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4704 snapshot.disclosure_count = snapshot.disclosures.len();
4705
4706 if esg_cfg.anomaly_rate > 0.0 {
4708 let mut anomaly_injector =
4709 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4710 let mut labels = Vec::new();
4711 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4712 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4713 labels.extend(
4714 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4715 );
4716 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4717 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4718 snapshot.anomaly_labels = labels;
4719 }
4720
4721 stats.esg_emission_count = snapshot.emission_count;
4722 stats.esg_disclosure_count = snapshot.disclosure_count;
4723
4724 info!(
4725 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4726 snapshot.emission_count,
4727 snapshot.disclosure_count,
4728 snapshot.supplier_assessments.len()
4729 );
4730 self.check_resources_with_log("post-esg")?;
4731
4732 Ok(snapshot)
4733 }
4734
4735 fn phase_treasury_data(
4737 &mut self,
4738 document_flows: &DocumentFlowSnapshot,
4739 subledger: &SubledgerSnapshot,
4740 intercompany: &IntercompanySnapshot,
4741 stats: &mut EnhancedGenerationStatistics,
4742 ) -> SynthResult<TreasurySnapshot> {
4743 if !self.config.treasury.enabled {
4744 debug!("Phase 22: Skipped (treasury generation disabled)");
4745 return Ok(TreasurySnapshot::default());
4746 }
4747 info!("Phase 22: Generating Treasury Data");
4748
4749 let seed = self.seed;
4750 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4751 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4752 let currency = self
4753 .config
4754 .companies
4755 .first()
4756 .map(|c| c.currency.as_str())
4757 .unwrap_or("USD");
4758 let entity_id = self
4759 .config
4760 .companies
4761 .first()
4762 .map(|c| c.code.as_str())
4763 .unwrap_or("1000");
4764
4765 let mut snapshot = TreasurySnapshot::default();
4766
4767 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4769 self.config.treasury.debt.clone(),
4770 seed + 90,
4771 );
4772 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4773
4774 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4776 self.config.treasury.hedging.clone(),
4777 seed + 91,
4778 );
4779 for debt in &snapshot.debt_instruments {
4780 if debt.rate_type == InterestRateType::Variable {
4781 let swap = hedge_gen.generate_ir_swap(
4782 currency,
4783 debt.principal,
4784 debt.origination_date,
4785 debt.maturity_date,
4786 );
4787 snapshot.hedging_instruments.push(swap);
4788 }
4789 }
4790
4791 {
4794 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4795 for payment in &document_flows.payments {
4796 if payment.currency != currency {
4797 let entry = fx_map
4798 .entry(payment.currency.clone())
4799 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4800 entry.0 += payment.amount;
4801 if payment.header.document_date > entry.1 {
4803 entry.1 = payment.header.document_date;
4804 }
4805 }
4806 }
4807 if !fx_map.is_empty() {
4808 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4809 .into_iter()
4810 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4811 datasynth_generators::treasury::FxExposure {
4812 currency_pair: format!("{foreign_ccy}/{currency}"),
4813 foreign_currency: foreign_ccy,
4814 net_amount,
4815 settlement_date,
4816 description: "AP payment FX exposure".to_string(),
4817 }
4818 })
4819 .collect();
4820 let (fx_instruments, fx_relationships) =
4821 hedge_gen.generate(start_date, &fx_exposures);
4822 snapshot.hedging_instruments.extend(fx_instruments);
4823 snapshot.hedge_relationships.extend(fx_relationships);
4824 }
4825 }
4826
4827 if self.config.treasury.anomaly_rate > 0.0 {
4829 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4830 seed + 92,
4831 self.config.treasury.anomaly_rate,
4832 );
4833 let mut labels = Vec::new();
4834 labels.extend(
4835 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4836 );
4837 snapshot.treasury_anomaly_labels = labels;
4838 }
4839
4840 if self.config.treasury.cash_positioning.enabled {
4842 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4843
4844 for payment in &document_flows.payments {
4846 cash_flows.push(datasynth_generators::treasury::CashFlow {
4847 date: payment.header.document_date,
4848 account_id: format!("{entity_id}-MAIN"),
4849 amount: payment.amount,
4850 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4851 });
4852 }
4853
4854 for chain in &document_flows.o2c_chains {
4856 if let Some(ref receipt) = chain.customer_receipt {
4857 cash_flows.push(datasynth_generators::treasury::CashFlow {
4858 date: receipt.header.document_date,
4859 account_id: format!("{entity_id}-MAIN"),
4860 amount: receipt.amount,
4861 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4862 });
4863 }
4864 for receipt in &chain.remainder_receipts {
4866 cash_flows.push(datasynth_generators::treasury::CashFlow {
4867 date: receipt.header.document_date,
4868 account_id: format!("{entity_id}-MAIN"),
4869 amount: receipt.amount,
4870 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4871 });
4872 }
4873 }
4874
4875 if !cash_flows.is_empty() {
4876 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4877 self.config.treasury.cash_positioning.clone(),
4878 seed + 93,
4879 );
4880 let account_id = format!("{entity_id}-MAIN");
4881 snapshot.cash_positions = cash_gen.generate(
4882 entity_id,
4883 &account_id,
4884 currency,
4885 &cash_flows,
4886 start_date,
4887 start_date + chrono::Months::new(self.config.global.period_months),
4888 rust_decimal::Decimal::new(1_000_000, 0), );
4890 }
4891 }
4892
4893 if self.config.treasury.cash_forecasting.enabled {
4895 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4896
4897 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
4899 .ar_invoices
4900 .iter()
4901 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4902 .map(|inv| {
4903 let days_past_due = if inv.due_date < end_date {
4904 (end_date - inv.due_date).num_days().max(0) as u32
4905 } else {
4906 0
4907 };
4908 datasynth_generators::treasury::ArAgingItem {
4909 expected_date: inv.due_date,
4910 amount: inv.amount_remaining,
4911 days_past_due,
4912 document_id: inv.invoice_number.clone(),
4913 }
4914 })
4915 .collect();
4916
4917 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
4919 .ap_invoices
4920 .iter()
4921 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4922 .map(|inv| datasynth_generators::treasury::ApAgingItem {
4923 payment_date: inv.due_date,
4924 amount: inv.amount_remaining,
4925 document_id: inv.invoice_number.clone(),
4926 })
4927 .collect();
4928
4929 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
4930 self.config.treasury.cash_forecasting.clone(),
4931 seed + 94,
4932 );
4933 let forecast = forecast_gen.generate(
4934 entity_id,
4935 currency,
4936 end_date,
4937 &ar_items,
4938 &ap_items,
4939 &[], );
4941 snapshot.cash_forecasts.push(forecast);
4942 }
4943
4944 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
4946 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4947 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
4948 self.config.treasury.cash_pooling.clone(),
4949 seed + 95,
4950 );
4951
4952 let account_ids: Vec<String> = snapshot
4954 .cash_positions
4955 .iter()
4956 .map(|cp| cp.bank_account_id.clone())
4957 .collect::<std::collections::HashSet<_>>()
4958 .into_iter()
4959 .collect();
4960
4961 if let Some(pool) =
4962 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
4963 {
4964 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4966 for cp in &snapshot.cash_positions {
4967 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
4968 }
4969
4970 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
4971 latest_balances
4972 .into_iter()
4973 .filter(|(id, _)| pool.participant_accounts.contains(id))
4974 .map(
4975 |(id, balance)| datasynth_generators::treasury::AccountBalance {
4976 account_id: id,
4977 balance,
4978 },
4979 )
4980 .collect();
4981
4982 let sweeps =
4983 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
4984 snapshot.cash_pool_sweeps = sweeps;
4985 snapshot.cash_pools.push(pool);
4986 }
4987 }
4988
4989 if self.config.treasury.bank_guarantees.enabled {
4991 let vendor_names: Vec<String> = self
4992 .master_data
4993 .vendors
4994 .iter()
4995 .map(|v| v.name.clone())
4996 .collect();
4997 if !vendor_names.is_empty() {
4998 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
4999 self.config.treasury.bank_guarantees.clone(),
5000 seed + 96,
5001 );
5002 snapshot.bank_guarantees =
5003 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
5004 }
5005 }
5006
5007 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
5009 let entity_ids: Vec<String> = self
5010 .config
5011 .companies
5012 .iter()
5013 .map(|c| c.code.clone())
5014 .collect();
5015 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
5016 .matched_pairs
5017 .iter()
5018 .map(|mp| {
5019 (
5020 mp.seller_company.clone(),
5021 mp.buyer_company.clone(),
5022 mp.amount,
5023 )
5024 })
5025 .collect();
5026 if entity_ids.len() >= 2 {
5027 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
5028 self.config.treasury.netting.clone(),
5029 seed + 97,
5030 );
5031 snapshot.netting_runs = netting_gen.generate(
5032 &entity_ids,
5033 currency,
5034 start_date,
5035 self.config.global.period_months,
5036 &ic_amounts,
5037 );
5038 }
5039 }
5040
5041 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
5042 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
5043 stats.cash_position_count = snapshot.cash_positions.len();
5044 stats.cash_forecast_count = snapshot.cash_forecasts.len();
5045 stats.cash_pool_count = snapshot.cash_pools.len();
5046
5047 info!(
5048 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
5049 snapshot.debt_instruments.len(),
5050 snapshot.hedging_instruments.len(),
5051 snapshot.cash_positions.len(),
5052 snapshot.cash_forecasts.len(),
5053 snapshot.cash_pools.len(),
5054 snapshot.bank_guarantees.len(),
5055 snapshot.netting_runs.len(),
5056 );
5057 self.check_resources_with_log("post-treasury")?;
5058
5059 Ok(snapshot)
5060 }
5061
5062 fn phase_project_accounting(
5064 &mut self,
5065 document_flows: &DocumentFlowSnapshot,
5066 hr: &HrSnapshot,
5067 stats: &mut EnhancedGenerationStatistics,
5068 ) -> SynthResult<ProjectAccountingSnapshot> {
5069 if !self.config.project_accounting.enabled {
5070 debug!("Phase 23: Skipped (project accounting disabled)");
5071 return Ok(ProjectAccountingSnapshot::default());
5072 }
5073 info!("Phase 23: Generating Project Accounting Data");
5074
5075 let seed = self.seed;
5076 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5077 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5078 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5079 let company_code = self
5080 .config
5081 .companies
5082 .first()
5083 .map(|c| c.code.as_str())
5084 .unwrap_or("1000");
5085
5086 let mut snapshot = ProjectAccountingSnapshot::default();
5087
5088 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
5090 self.config.project_accounting.clone(),
5091 seed + 95,
5092 );
5093 let pool = project_gen.generate(company_code, start_date, end_date);
5094 snapshot.projects = pool.projects.clone();
5095
5096 {
5098 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
5099 Vec::new();
5100
5101 for te in &hr.time_entries {
5103 let total_hours = te.hours_regular + te.hours_overtime;
5104 if total_hours > 0.0 {
5105 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5106 id: te.entry_id.clone(),
5107 entity_id: company_code.to_string(),
5108 date: te.date,
5109 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
5110 .unwrap_or(rust_decimal::Decimal::ZERO),
5111 source_type: CostSourceType::TimeEntry,
5112 hours: Some(
5113 rust_decimal::Decimal::from_f64_retain(total_hours)
5114 .unwrap_or(rust_decimal::Decimal::ZERO),
5115 ),
5116 });
5117 }
5118 }
5119
5120 for er in &hr.expense_reports {
5122 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5123 id: er.report_id.clone(),
5124 entity_id: company_code.to_string(),
5125 date: er.submission_date,
5126 amount: er.total_amount,
5127 source_type: CostSourceType::ExpenseReport,
5128 hours: None,
5129 });
5130 }
5131
5132 for po in &document_flows.purchase_orders {
5134 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5135 id: po.header.document_id.clone(),
5136 entity_id: company_code.to_string(),
5137 date: po.header.document_date,
5138 amount: po.total_net_amount,
5139 source_type: CostSourceType::PurchaseOrder,
5140 hours: None,
5141 });
5142 }
5143
5144 for vi in &document_flows.vendor_invoices {
5146 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5147 id: vi.header.document_id.clone(),
5148 entity_id: company_code.to_string(),
5149 date: vi.header.document_date,
5150 amount: vi.payable_amount,
5151 source_type: CostSourceType::VendorInvoice,
5152 hours: None,
5153 });
5154 }
5155
5156 if !source_docs.is_empty() && !pool.projects.is_empty() {
5157 let mut cost_gen =
5158 datasynth_generators::project_accounting::ProjectCostGenerator::new(
5159 self.config.project_accounting.cost_allocation.clone(),
5160 seed + 99,
5161 );
5162 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
5163 }
5164 }
5165
5166 if self.config.project_accounting.change_orders.enabled {
5168 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
5169 self.config.project_accounting.change_orders.clone(),
5170 seed + 96,
5171 );
5172 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
5173 }
5174
5175 if self.config.project_accounting.milestones.enabled {
5177 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
5178 self.config.project_accounting.milestones.clone(),
5179 seed + 97,
5180 );
5181 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
5182 }
5183
5184 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
5186 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
5187 self.config.project_accounting.earned_value.clone(),
5188 seed + 98,
5189 );
5190 snapshot.earned_value_metrics =
5191 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
5192 }
5193
5194 stats.project_count = snapshot.projects.len();
5195 stats.project_change_order_count = snapshot.change_orders.len();
5196 stats.project_cost_line_count = snapshot.cost_lines.len();
5197
5198 info!(
5199 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
5200 snapshot.projects.len(),
5201 snapshot.change_orders.len(),
5202 snapshot.milestones.len(),
5203 snapshot.earned_value_metrics.len()
5204 );
5205 self.check_resources_with_log("post-project-accounting")?;
5206
5207 Ok(snapshot)
5208 }
5209
5210 fn phase_evolution_events(
5212 &mut self,
5213 stats: &mut EnhancedGenerationStatistics,
5214 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
5215 if !self.phase_config.generate_evolution_events {
5216 debug!("Phase 24: Skipped (evolution events disabled)");
5217 return Ok((Vec::new(), Vec::new()));
5218 }
5219 info!("Phase 24: Generating Process Evolution + Organizational Events");
5220
5221 let seed = self.seed;
5222 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5223 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5224 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5225
5226 let mut proc_gen =
5228 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
5229 seed + 100,
5230 );
5231 let process_events = proc_gen.generate_events(start_date, end_date);
5232
5233 let company_codes: Vec<String> = self
5235 .config
5236 .companies
5237 .iter()
5238 .map(|c| c.code.clone())
5239 .collect();
5240 let mut org_gen =
5241 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
5242 seed + 101,
5243 );
5244 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
5245
5246 stats.process_evolution_event_count = process_events.len();
5247 stats.organizational_event_count = org_events.len();
5248
5249 info!(
5250 "Evolution events generated: {} process evolution, {} organizational",
5251 process_events.len(),
5252 org_events.len()
5253 );
5254 self.check_resources_with_log("post-evolution-events")?;
5255
5256 Ok((process_events, org_events))
5257 }
5258
5259 fn phase_disruption_events(
5262 &self,
5263 stats: &mut EnhancedGenerationStatistics,
5264 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
5265 if !self.config.organizational_events.enabled {
5266 debug!("Phase 24b: Skipped (organizational events disabled)");
5267 return Ok(Vec::new());
5268 }
5269 info!("Phase 24b: Generating Disruption Events");
5270
5271 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5272 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5273 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5274
5275 let company_codes: Vec<String> = self
5276 .config
5277 .companies
5278 .iter()
5279 .map(|c| c.code.clone())
5280 .collect();
5281
5282 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
5283 let events = gen.generate(start_date, end_date, &company_codes);
5284
5285 stats.disruption_event_count = events.len();
5286 info!("Disruption events generated: {} events", events.len());
5287 self.check_resources_with_log("post-disruption-events")?;
5288
5289 Ok(events)
5290 }
5291
5292 fn phase_counterfactuals(
5299 &self,
5300 journal_entries: &[JournalEntry],
5301 stats: &mut EnhancedGenerationStatistics,
5302 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
5303 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
5304 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
5305 return Ok(Vec::new());
5306 }
5307 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
5308
5309 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
5310
5311 let mut gen = CounterfactualGenerator::new(self.seed + 110);
5312
5313 let specs = [
5315 CounterfactualSpec::ScaleAmount { factor: 2.5 },
5316 CounterfactualSpec::ShiftDate { days: -14 },
5317 CounterfactualSpec::SelfApprove,
5318 CounterfactualSpec::SplitTransaction { split_count: 3 },
5319 ];
5320
5321 let pairs: Vec<_> = journal_entries
5322 .iter()
5323 .enumerate()
5324 .map(|(i, je)| {
5325 let spec = &specs[i % specs.len()];
5326 gen.generate(je, spec)
5327 })
5328 .collect();
5329
5330 stats.counterfactual_pair_count = pairs.len();
5331 info!(
5332 "Counterfactual pairs generated: {} pairs from {} journal entries",
5333 pairs.len(),
5334 journal_entries.len()
5335 );
5336 self.check_resources_with_log("post-counterfactuals")?;
5337
5338 Ok(pairs)
5339 }
5340
5341 fn phase_red_flags(
5348 &self,
5349 anomaly_labels: &AnomalyLabels,
5350 document_flows: &DocumentFlowSnapshot,
5351 stats: &mut EnhancedGenerationStatistics,
5352 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
5353 if !self.config.fraud.enabled {
5354 debug!("Phase 26: Skipped (fraud generation disabled)");
5355 return Ok(Vec::new());
5356 }
5357 info!("Phase 26: Generating Fraud Red-Flag Indicators");
5358
5359 use datasynth_generators::fraud::RedFlagGenerator;
5360
5361 let generator = RedFlagGenerator::new();
5362 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
5363
5364 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
5366 .labels
5367 .iter()
5368 .filter(|label| label.anomaly_type.is_intentional())
5369 .map(|label| label.document_id.as_str())
5370 .collect();
5371
5372 let mut flags = Vec::new();
5373
5374 for chain in &document_flows.p2p_chains {
5376 let doc_id = &chain.purchase_order.header.document_id;
5377 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5378 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5379 }
5380
5381 for chain in &document_flows.o2c_chains {
5383 let doc_id = &chain.sales_order.header.document_id;
5384 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5385 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5386 }
5387
5388 stats.red_flag_count = flags.len();
5389 info!(
5390 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
5391 flags.len(),
5392 document_flows.p2p_chains.len(),
5393 document_flows.o2c_chains.len(),
5394 fraud_doc_ids.len()
5395 );
5396 self.check_resources_with_log("post-red-flags")?;
5397
5398 Ok(flags)
5399 }
5400
5401 fn phase_collusion_rings(
5407 &mut self,
5408 stats: &mut EnhancedGenerationStatistics,
5409 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
5410 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
5411 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
5412 return Ok(Vec::new());
5413 }
5414 info!("Phase 26b: Generating Collusion Rings");
5415
5416 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5417 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5418 let months = self.config.global.period_months;
5419
5420 let employee_ids: Vec<String> = self
5421 .master_data
5422 .employees
5423 .iter()
5424 .map(|e| e.employee_id.clone())
5425 .collect();
5426 let vendor_ids: Vec<String> = self
5427 .master_data
5428 .vendors
5429 .iter()
5430 .map(|v| v.vendor_id.clone())
5431 .collect();
5432
5433 let mut generator =
5434 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
5435 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
5436
5437 stats.collusion_ring_count = rings.len();
5438 info!(
5439 "Collusion rings generated: {} rings, total members: {}",
5440 rings.len(),
5441 rings
5442 .iter()
5443 .map(datasynth_generators::fraud::CollusionRing::size)
5444 .sum::<usize>()
5445 );
5446 self.check_resources_with_log("post-collusion-rings")?;
5447
5448 Ok(rings)
5449 }
5450
5451 fn phase_temporal_attributes(
5456 &mut self,
5457 stats: &mut EnhancedGenerationStatistics,
5458 ) -> SynthResult<
5459 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
5460 > {
5461 if !self.config.temporal_attributes.enabled {
5462 debug!("Phase 27: Skipped (temporal attributes disabled)");
5463 return Ok(Vec::new());
5464 }
5465 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
5466
5467 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5468 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5469
5470 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
5474 || self.config.temporal_attributes.enabled;
5475 let temporal_config = {
5476 let ta = &self.config.temporal_attributes;
5477 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
5478 .enabled(ta.enabled)
5479 .closed_probability(ta.valid_time.closed_probability)
5480 .avg_validity_days(ta.valid_time.avg_validity_days)
5481 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
5482 .with_version_chains(if generate_version_chains {
5483 ta.avg_versions_per_entity
5484 } else {
5485 1.0
5486 })
5487 .build()
5488 };
5489 let temporal_config = if self
5491 .config
5492 .temporal_attributes
5493 .transaction_time
5494 .allow_backdating
5495 {
5496 let mut c = temporal_config;
5497 c.transaction_time.allow_backdating = true;
5498 c.transaction_time.backdating_probability = self
5499 .config
5500 .temporal_attributes
5501 .transaction_time
5502 .backdating_probability;
5503 c.transaction_time.max_backdate_days = self
5504 .config
5505 .temporal_attributes
5506 .transaction_time
5507 .max_backdate_days;
5508 c
5509 } else {
5510 temporal_config
5511 };
5512 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
5513 temporal_config,
5514 self.seed + 130,
5515 start_date,
5516 );
5517
5518 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
5519 self.seed + 130,
5520 datasynth_core::GeneratorType::Vendor,
5521 );
5522
5523 let chains: Vec<_> = self
5524 .master_data
5525 .vendors
5526 .iter()
5527 .map(|vendor| {
5528 let id = uuid_factory.next();
5529 gen.generate_version_chain(vendor.clone(), id)
5530 })
5531 .collect();
5532
5533 stats.temporal_version_chain_count = chains.len();
5534 info!("Temporal version chains generated: {} chains", chains.len());
5535 self.check_resources_with_log("post-temporal-attributes")?;
5536
5537 Ok(chains)
5538 }
5539
5540 fn phase_entity_relationships(
5550 &self,
5551 journal_entries: &[JournalEntry],
5552 document_flows: &DocumentFlowSnapshot,
5553 stats: &mut EnhancedGenerationStatistics,
5554 ) -> SynthResult<(
5555 Option<datasynth_core::models::EntityGraph>,
5556 Vec<datasynth_core::models::CrossProcessLink>,
5557 )> {
5558 use datasynth_generators::relationships::{
5559 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
5560 TransactionSummary,
5561 };
5562
5563 let rs_enabled = self.config.relationship_strength.enabled;
5564 let cpl_enabled = self.config.cross_process_links.enabled
5565 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
5566
5567 if !rs_enabled && !cpl_enabled {
5568 debug!(
5569 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
5570 );
5571 return Ok((None, Vec::new()));
5572 }
5573
5574 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
5575
5576 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5577 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5578
5579 let company_code = self
5580 .config
5581 .companies
5582 .first()
5583 .map(|c| c.code.as_str())
5584 .unwrap_or("1000");
5585
5586 let gen_config = EntityGraphConfig {
5588 enabled: rs_enabled,
5589 cross_process: datasynth_generators::relationships::CrossProcessConfig {
5590 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
5591 enable_return_flows: false,
5592 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
5593 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
5594 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
5596 1.0
5597 } else {
5598 0.30
5599 },
5600 ..Default::default()
5601 },
5602 strength_config: datasynth_generators::relationships::StrengthConfig {
5603 transaction_volume_weight: self
5604 .config
5605 .relationship_strength
5606 .calculation
5607 .transaction_volume_weight,
5608 transaction_count_weight: self
5609 .config
5610 .relationship_strength
5611 .calculation
5612 .transaction_count_weight,
5613 duration_weight: self
5614 .config
5615 .relationship_strength
5616 .calculation
5617 .relationship_duration_weight,
5618 recency_weight: self.config.relationship_strength.calculation.recency_weight,
5619 mutual_connections_weight: self
5620 .config
5621 .relationship_strength
5622 .calculation
5623 .mutual_connections_weight,
5624 recency_half_life_days: self
5625 .config
5626 .relationship_strength
5627 .calculation
5628 .recency_half_life_days,
5629 },
5630 ..Default::default()
5631 };
5632
5633 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
5634
5635 let entity_graph = if rs_enabled {
5637 let vendor_summaries: Vec<EntitySummary> = self
5639 .master_data
5640 .vendors
5641 .iter()
5642 .map(|v| {
5643 EntitySummary::new(
5644 &v.vendor_id,
5645 &v.name,
5646 datasynth_core::models::GraphEntityType::Vendor,
5647 start_date,
5648 )
5649 })
5650 .collect();
5651
5652 let customer_summaries: Vec<EntitySummary> = self
5653 .master_data
5654 .customers
5655 .iter()
5656 .map(|c| {
5657 EntitySummary::new(
5658 &c.customer_id,
5659 &c.name,
5660 datasynth_core::models::GraphEntityType::Customer,
5661 start_date,
5662 )
5663 })
5664 .collect();
5665
5666 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
5671 std::collections::HashMap::new();
5672
5673 for je in journal_entries {
5674 let cc = je.header.company_code.clone();
5675 let posting_date = je.header.posting_date;
5676 for line in &je.lines {
5677 if let Some(ref tp) = line.trading_partner {
5678 let amount = if line.debit_amount > line.credit_amount {
5679 line.debit_amount
5680 } else {
5681 line.credit_amount
5682 };
5683 let entry = txn_summaries
5684 .entry((cc.clone(), tp.clone()))
5685 .or_insert_with(|| TransactionSummary {
5686 total_volume: rust_decimal::Decimal::ZERO,
5687 transaction_count: 0,
5688 first_transaction_date: posting_date,
5689 last_transaction_date: posting_date,
5690 related_entities: std::collections::HashSet::new(),
5691 });
5692 entry.total_volume += amount;
5693 entry.transaction_count += 1;
5694 if posting_date < entry.first_transaction_date {
5695 entry.first_transaction_date = posting_date;
5696 }
5697 if posting_date > entry.last_transaction_date {
5698 entry.last_transaction_date = posting_date;
5699 }
5700 entry.related_entities.insert(cc.clone());
5701 }
5702 }
5703 }
5704
5705 for chain in &document_flows.p2p_chains {
5708 let cc = chain.purchase_order.header.company_code.clone();
5709 let vendor_id = chain.purchase_order.vendor_id.clone();
5710 let po_date = chain.purchase_order.header.document_date;
5711 let amount = chain.purchase_order.total_net_amount;
5712
5713 let entry = txn_summaries
5714 .entry((cc.clone(), vendor_id))
5715 .or_insert_with(|| TransactionSummary {
5716 total_volume: rust_decimal::Decimal::ZERO,
5717 transaction_count: 0,
5718 first_transaction_date: po_date,
5719 last_transaction_date: po_date,
5720 related_entities: std::collections::HashSet::new(),
5721 });
5722 entry.total_volume += amount;
5723 entry.transaction_count += 1;
5724 if po_date < entry.first_transaction_date {
5725 entry.first_transaction_date = po_date;
5726 }
5727 if po_date > entry.last_transaction_date {
5728 entry.last_transaction_date = po_date;
5729 }
5730 entry.related_entities.insert(cc);
5731 }
5732
5733 for chain in &document_flows.o2c_chains {
5735 let cc = chain.sales_order.header.company_code.clone();
5736 let customer_id = chain.sales_order.customer_id.clone();
5737 let so_date = chain.sales_order.header.document_date;
5738 let amount = chain.sales_order.total_net_amount;
5739
5740 let entry = txn_summaries
5741 .entry((cc.clone(), customer_id))
5742 .or_insert_with(|| TransactionSummary {
5743 total_volume: rust_decimal::Decimal::ZERO,
5744 transaction_count: 0,
5745 first_transaction_date: so_date,
5746 last_transaction_date: so_date,
5747 related_entities: std::collections::HashSet::new(),
5748 });
5749 entry.total_volume += amount;
5750 entry.transaction_count += 1;
5751 if so_date < entry.first_transaction_date {
5752 entry.first_transaction_date = so_date;
5753 }
5754 if so_date > entry.last_transaction_date {
5755 entry.last_transaction_date = so_date;
5756 }
5757 entry.related_entities.insert(cc);
5758 }
5759
5760 let as_of_date = journal_entries
5761 .last()
5762 .map(|je| je.header.posting_date)
5763 .unwrap_or(start_date);
5764
5765 let graph = gen.generate_entity_graph(
5766 company_code,
5767 as_of_date,
5768 &vendor_summaries,
5769 &customer_summaries,
5770 &txn_summaries,
5771 );
5772
5773 info!(
5774 "Entity relationship graph: {} nodes, {} edges",
5775 graph.nodes.len(),
5776 graph.edges.len()
5777 );
5778 stats.entity_relationship_node_count = graph.nodes.len();
5779 stats.entity_relationship_edge_count = graph.edges.len();
5780 Some(graph)
5781 } else {
5782 None
5783 };
5784
5785 let cross_process_links = if cpl_enabled {
5787 let gr_refs: Vec<GoodsReceiptRef> = document_flows
5789 .p2p_chains
5790 .iter()
5791 .flat_map(|chain| {
5792 let vendor_id = chain.purchase_order.vendor_id.clone();
5793 let cc = chain.purchase_order.header.company_code.clone();
5794 chain.goods_receipts.iter().flat_map(move |gr| {
5795 gr.items.iter().filter_map({
5796 let doc_id = gr.header.document_id.clone();
5797 let v_id = vendor_id.clone();
5798 let company = cc.clone();
5799 let receipt_date = gr.header.document_date;
5800 move |item| {
5801 item.base
5802 .material_id
5803 .as_ref()
5804 .map(|mat_id| GoodsReceiptRef {
5805 document_id: doc_id.clone(),
5806 material_id: mat_id.clone(),
5807 quantity: item.base.quantity,
5808 receipt_date,
5809 vendor_id: v_id.clone(),
5810 company_code: company.clone(),
5811 })
5812 }
5813 })
5814 })
5815 })
5816 .collect();
5817
5818 let del_refs: Vec<DeliveryRef> = document_flows
5820 .o2c_chains
5821 .iter()
5822 .flat_map(|chain| {
5823 let customer_id = chain.sales_order.customer_id.clone();
5824 let cc = chain.sales_order.header.company_code.clone();
5825 chain.deliveries.iter().flat_map(move |del| {
5826 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
5827 del.items.iter().filter_map({
5828 let doc_id = del.header.document_id.clone();
5829 let c_id = customer_id.clone();
5830 let company = cc.clone();
5831 move |item| {
5832 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
5833 document_id: doc_id.clone(),
5834 material_id: mat_id.clone(),
5835 quantity: item.base.quantity,
5836 delivery_date,
5837 customer_id: c_id.clone(),
5838 company_code: company.clone(),
5839 })
5840 }
5841 })
5842 })
5843 })
5844 .collect();
5845
5846 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
5847 info!("Cross-process links generated: {} links", links.len());
5848 stats.cross_process_link_count = links.len();
5849 links
5850 } else {
5851 Vec::new()
5852 };
5853
5854 self.check_resources_with_log("post-entity-relationships")?;
5855 Ok((entity_graph, cross_process_links))
5856 }
5857
5858 fn phase_industry_data(
5860 &self,
5861 stats: &mut EnhancedGenerationStatistics,
5862 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
5863 if !self.config.industry_specific.enabled {
5864 return None;
5865 }
5866 info!("Phase 29: Generating industry-specific data");
5867 let output = datasynth_generators::industry::factory::generate_industry_output(
5868 self.config.global.industry,
5869 );
5870 stats.industry_gl_account_count = output.gl_accounts.len();
5871 info!(
5872 "Industry data generated: {} GL accounts for {:?}",
5873 output.gl_accounts.len(),
5874 self.config.global.industry
5875 );
5876 Some(output)
5877 }
5878
5879 fn phase_opening_balances(
5881 &mut self,
5882 coa: &Arc<ChartOfAccounts>,
5883 stats: &mut EnhancedGenerationStatistics,
5884 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
5885 if !self.config.balance.generate_opening_balances {
5886 debug!("Phase 3b: Skipped (opening balance generation disabled)");
5887 return Ok(Vec::new());
5888 }
5889 info!("Phase 3b: Generating Opening Balances");
5890
5891 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5892 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5893 let fiscal_year = start_date.year();
5894
5895 let industry = match self.config.global.industry {
5896 IndustrySector::Manufacturing => IndustryType::Manufacturing,
5897 IndustrySector::Retail => IndustryType::Retail,
5898 IndustrySector::FinancialServices => IndustryType::Financial,
5899 IndustrySector::Healthcare => IndustryType::Healthcare,
5900 IndustrySector::Technology => IndustryType::Technology,
5901 _ => IndustryType::Manufacturing,
5902 };
5903
5904 let config = datasynth_generators::OpeningBalanceConfig {
5905 industry,
5906 ..Default::default()
5907 };
5908 let mut gen =
5909 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
5910
5911 let mut results = Vec::new();
5912 for company in &self.config.companies {
5913 let spec = OpeningBalanceSpec::new(
5914 company.code.clone(),
5915 start_date,
5916 fiscal_year,
5917 company.currency.clone(),
5918 rust_decimal::Decimal::new(10_000_000, 0),
5919 industry,
5920 );
5921 let ob = gen.generate(&spec, coa, start_date, &company.code);
5922 results.push(ob);
5923 }
5924
5925 stats.opening_balance_count = results.len();
5926 info!("Opening balances generated: {} companies", results.len());
5927 self.check_resources_with_log("post-opening-balances")?;
5928
5929 Ok(results)
5930 }
5931
5932 fn phase_subledger_reconciliation(
5934 &mut self,
5935 subledger: &SubledgerSnapshot,
5936 entries: &[JournalEntry],
5937 stats: &mut EnhancedGenerationStatistics,
5938 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
5939 if !self.config.balance.reconcile_subledgers {
5940 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
5941 return Ok(Vec::new());
5942 }
5943 info!("Phase 9b: Reconciling GL to subledger balances");
5944
5945 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5946 .map(|d| d + chrono::Months::new(self.config.global.period_months))
5947 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5948
5949 let tracker_config = BalanceTrackerConfig {
5951 validate_on_each_entry: false,
5952 track_history: false,
5953 fail_on_validation_error: false,
5954 ..Default::default()
5955 };
5956 let recon_currency = self
5957 .config
5958 .companies
5959 .first()
5960 .map(|c| c.currency.clone())
5961 .unwrap_or_else(|| "USD".to_string());
5962 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
5963 let validation_errors = tracker.apply_entries(entries);
5964 if !validation_errors.is_empty() {
5965 warn!(
5966 error_count = validation_errors.len(),
5967 "Balance tracker encountered validation errors during subledger reconciliation"
5968 );
5969 for err in &validation_errors {
5970 debug!("Balance validation error: {:?}", err);
5971 }
5972 }
5973
5974 let mut engine = datasynth_generators::ReconciliationEngine::new(
5975 datasynth_generators::ReconciliationConfig::default(),
5976 );
5977
5978 let mut results = Vec::new();
5979 let company_code = self
5980 .config
5981 .companies
5982 .first()
5983 .map(|c| c.code.as_str())
5984 .unwrap_or("1000");
5985
5986 if !subledger.ar_invoices.is_empty() {
5988 let gl_balance = tracker
5989 .get_account_balance(
5990 company_code,
5991 datasynth_core::accounts::control_accounts::AR_CONTROL,
5992 )
5993 .map(|b| b.closing_balance)
5994 .unwrap_or_default();
5995 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
5996 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
5997 }
5998
5999 if !subledger.ap_invoices.is_empty() {
6001 let gl_balance = tracker
6002 .get_account_balance(
6003 company_code,
6004 datasynth_core::accounts::control_accounts::AP_CONTROL,
6005 )
6006 .map(|b| b.closing_balance)
6007 .unwrap_or_default();
6008 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
6009 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
6010 }
6011
6012 if !subledger.fa_records.is_empty() {
6014 let gl_asset_balance = tracker
6015 .get_account_balance(
6016 company_code,
6017 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
6018 )
6019 .map(|b| b.closing_balance)
6020 .unwrap_or_default();
6021 let gl_accum_depr_balance = tracker
6022 .get_account_balance(
6023 company_code,
6024 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
6025 )
6026 .map(|b| b.closing_balance)
6027 .unwrap_or_default();
6028 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
6029 subledger.fa_records.iter().collect();
6030 let (asset_recon, depr_recon) = engine.reconcile_fa(
6031 company_code,
6032 end_date,
6033 gl_asset_balance,
6034 gl_accum_depr_balance,
6035 &fa_refs,
6036 );
6037 results.push(asset_recon);
6038 results.push(depr_recon);
6039 }
6040
6041 if !subledger.inventory_positions.is_empty() {
6043 let gl_balance = tracker
6044 .get_account_balance(
6045 company_code,
6046 datasynth_core::accounts::control_accounts::INVENTORY,
6047 )
6048 .map(|b| b.closing_balance)
6049 .unwrap_or_default();
6050 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
6051 subledger.inventory_positions.iter().collect();
6052 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
6053 }
6054
6055 stats.subledger_reconciliation_count = results.len();
6056 info!(
6057 "Subledger reconciliation complete: {} reconciliations",
6058 results.len()
6059 );
6060 self.check_resources_with_log("post-subledger-reconciliation")?;
6061
6062 Ok(results)
6063 }
6064
6065 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
6067 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
6068
6069 let coa_framework = self.resolve_coa_framework();
6070
6071 let mut gen = ChartOfAccountsGenerator::new(
6072 self.config.chart_of_accounts.complexity,
6073 self.config.global.industry,
6074 self.seed,
6075 )
6076 .with_coa_framework(coa_framework);
6077
6078 let coa = Arc::new(gen.generate());
6079 self.coa = Some(Arc::clone(&coa));
6080
6081 if let Some(pb) = pb {
6082 pb.finish_with_message("Chart of Accounts complete");
6083 }
6084
6085 Ok(coa)
6086 }
6087
6088 fn generate_master_data(&mut self) -> SynthResult<()> {
6090 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6091 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6092 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6093
6094 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
6096
6097 let pack = self.primary_pack().clone();
6099
6100 let vendors_per_company = self.phase_config.vendors_per_company;
6102 let customers_per_company = self.phase_config.customers_per_company;
6103 let materials_per_company = self.phase_config.materials_per_company;
6104 let assets_per_company = self.phase_config.assets_per_company;
6105 let coa_framework = self.resolve_coa_framework();
6106
6107 let per_company_results: Vec<_> = self
6110 .config
6111 .companies
6112 .par_iter()
6113 .enumerate()
6114 .map(|(i, company)| {
6115 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
6116 let pack = pack.clone();
6117
6118 let mut vendor_gen = VendorGenerator::new(company_seed);
6120 vendor_gen.set_country_pack(pack.clone());
6121 vendor_gen.set_coa_framework(coa_framework);
6122 vendor_gen.set_counter_offset(i * vendors_per_company);
6123 let vendor_pool =
6124 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
6125
6126 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
6128 customer_gen.set_country_pack(pack.clone());
6129 customer_gen.set_coa_framework(coa_framework);
6130 customer_gen.set_counter_offset(i * customers_per_company);
6131 let customer_pool = customer_gen.generate_customer_pool(
6132 customers_per_company,
6133 &company.code,
6134 start_date,
6135 );
6136
6137 let mut material_gen = MaterialGenerator::new(company_seed + 200);
6139 material_gen.set_country_pack(pack.clone());
6140 material_gen.set_counter_offset(i * materials_per_company);
6141 let material_pool = material_gen.generate_material_pool(
6142 materials_per_company,
6143 &company.code,
6144 start_date,
6145 );
6146
6147 let mut asset_gen = AssetGenerator::new(company_seed + 300);
6149 let asset_pool = asset_gen.generate_asset_pool(
6150 assets_per_company,
6151 &company.code,
6152 (start_date, end_date),
6153 );
6154
6155 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
6157 employee_gen.set_country_pack(pack);
6158 let employee_pool =
6159 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
6160
6161 (
6162 vendor_pool.vendors,
6163 customer_pool.customers,
6164 material_pool.materials,
6165 asset_pool.assets,
6166 employee_pool.employees,
6167 )
6168 })
6169 .collect();
6170
6171 for (vendors, customers, materials, assets, employees) in per_company_results {
6173 self.master_data.vendors.extend(vendors);
6174 self.master_data.customers.extend(customers);
6175 self.master_data.materials.extend(materials);
6176 self.master_data.assets.extend(assets);
6177 self.master_data.employees.extend(employees);
6178 }
6179
6180 if let Some(pb) = &pb {
6181 pb.inc(total);
6182 }
6183 if let Some(pb) = pb {
6184 pb.finish_with_message("Master data generation complete");
6185 }
6186
6187 Ok(())
6188 }
6189
6190 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
6192 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6193 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6194
6195 let months = (self.config.global.period_months as usize).max(1);
6198 let p2p_count = self
6199 .phase_config
6200 .p2p_chains
6201 .min(self.master_data.vendors.len() * 2 * months);
6202 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
6203
6204 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
6206 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
6207 p2p_gen.set_country_pack(self.primary_pack().clone());
6208
6209 for i in 0..p2p_count {
6210 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
6211 let materials: Vec<&Material> = self
6212 .master_data
6213 .materials
6214 .iter()
6215 .skip(i % self.master_data.materials.len().max(1))
6216 .take(2.min(self.master_data.materials.len()))
6217 .collect();
6218
6219 if materials.is_empty() {
6220 continue;
6221 }
6222
6223 let company = &self.config.companies[i % self.config.companies.len()];
6224 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
6225 let fiscal_period = po_date.month() as u8;
6226 let created_by = if self.master_data.employees.is_empty() {
6227 "SYSTEM"
6228 } else {
6229 self.master_data.employees[i % self.master_data.employees.len()]
6230 .user_id
6231 .as_str()
6232 };
6233
6234 let chain = p2p_gen.generate_chain(
6235 &company.code,
6236 vendor,
6237 &materials,
6238 po_date,
6239 start_date.year() as u16,
6240 fiscal_period,
6241 created_by,
6242 );
6243
6244 flows.purchase_orders.push(chain.purchase_order.clone());
6246 flows.goods_receipts.extend(chain.goods_receipts.clone());
6247 if let Some(vi) = &chain.vendor_invoice {
6248 flows.vendor_invoices.push(vi.clone());
6249 }
6250 if let Some(payment) = &chain.payment {
6251 flows.payments.push(payment.clone());
6252 }
6253 for remainder in &chain.remainder_payments {
6254 flows.payments.push(remainder.clone());
6255 }
6256 flows.p2p_chains.push(chain);
6257
6258 if let Some(pb) = &pb {
6259 pb.inc(1);
6260 }
6261 }
6262
6263 if let Some(pb) = pb {
6264 pb.finish_with_message("P2P document flows complete");
6265 }
6266
6267 let o2c_count = self
6270 .phase_config
6271 .o2c_chains
6272 .min(self.master_data.customers.len() * 2 * months);
6273 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
6274
6275 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
6277 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
6278 o2c_gen.set_country_pack(self.primary_pack().clone());
6279
6280 for i in 0..o2c_count {
6281 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
6282 let materials: Vec<&Material> = self
6283 .master_data
6284 .materials
6285 .iter()
6286 .skip(i % self.master_data.materials.len().max(1))
6287 .take(2.min(self.master_data.materials.len()))
6288 .collect();
6289
6290 if materials.is_empty() {
6291 continue;
6292 }
6293
6294 let company = &self.config.companies[i % self.config.companies.len()];
6295 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
6296 let fiscal_period = so_date.month() as u8;
6297 let created_by = if self.master_data.employees.is_empty() {
6298 "SYSTEM"
6299 } else {
6300 self.master_data.employees[i % self.master_data.employees.len()]
6301 .user_id
6302 .as_str()
6303 };
6304
6305 let chain = o2c_gen.generate_chain(
6306 &company.code,
6307 customer,
6308 &materials,
6309 so_date,
6310 start_date.year() as u16,
6311 fiscal_period,
6312 created_by,
6313 );
6314
6315 flows.sales_orders.push(chain.sales_order.clone());
6317 flows.deliveries.extend(chain.deliveries.clone());
6318 if let Some(ci) = &chain.customer_invoice {
6319 flows.customer_invoices.push(ci.clone());
6320 }
6321 if let Some(receipt) = &chain.customer_receipt {
6322 flows.payments.push(receipt.clone());
6323 }
6324 for receipt in &chain.remainder_receipts {
6326 flows.payments.push(receipt.clone());
6327 }
6328 flows.o2c_chains.push(chain);
6329
6330 if let Some(pb) = &pb {
6331 pb.inc(1);
6332 }
6333 }
6334
6335 if let Some(pb) = pb {
6336 pb.finish_with_message("O2C document flows complete");
6337 }
6338
6339 Ok(())
6340 }
6341
6342 fn generate_journal_entries(
6344 &mut self,
6345 coa: &Arc<ChartOfAccounts>,
6346 ) -> SynthResult<Vec<JournalEntry>> {
6347 use datasynth_core::traits::ParallelGenerator;
6348
6349 let total = self.calculate_total_transactions();
6350 let pb = self.create_progress_bar(total, "Generating Journal Entries");
6351
6352 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6353 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6354 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6355
6356 let company_codes: Vec<String> = self
6357 .config
6358 .companies
6359 .iter()
6360 .map(|c| c.code.clone())
6361 .collect();
6362
6363 let generator = JournalEntryGenerator::new_with_params(
6364 self.config.transactions.clone(),
6365 Arc::clone(coa),
6366 company_codes,
6367 start_date,
6368 end_date,
6369 self.seed,
6370 );
6371
6372 let je_pack = self.primary_pack();
6376
6377 let mut generator = generator
6378 .with_master_data(
6379 &self.master_data.vendors,
6380 &self.master_data.customers,
6381 &self.master_data.materials,
6382 )
6383 .with_country_pack_names(je_pack)
6384 .with_country_pack_temporal(
6385 self.config.temporal_patterns.clone(),
6386 self.seed + 200,
6387 je_pack,
6388 )
6389 .with_persona_errors(true)
6390 .with_fraud_config(self.config.fraud.clone());
6391
6392 if self.config.temporal.enabled {
6394 let drift_config = self.config.temporal.to_core_config();
6395 generator = generator.with_drift_config(drift_config, self.seed + 100);
6396 }
6397
6398 self.check_memory_limit()?;
6400
6401 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
6403
6404 let entries = if total >= 10_000 && num_threads > 1 {
6408 let sub_generators = generator.split(num_threads);
6411 let entries_per_thread = total as usize / num_threads;
6412 let remainder = total as usize % num_threads;
6413
6414 let batches: Vec<Vec<JournalEntry>> = sub_generators
6415 .into_par_iter()
6416 .enumerate()
6417 .map(|(i, mut gen)| {
6418 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
6419 gen.generate_batch(count)
6420 })
6421 .collect();
6422
6423 let entries = JournalEntryGenerator::merge_results(batches);
6425
6426 if let Some(pb) = &pb {
6427 pb.inc(total);
6428 }
6429 entries
6430 } else {
6431 let mut entries = Vec::with_capacity(total as usize);
6433 for _ in 0..total {
6434 let entry = generator.generate();
6435 entries.push(entry);
6436 if let Some(pb) = &pb {
6437 pb.inc(1);
6438 }
6439 }
6440 entries
6441 };
6442
6443 if let Some(pb) = pb {
6444 pb.finish_with_message("Journal entries complete");
6445 }
6446
6447 Ok(entries)
6448 }
6449
6450 fn generate_jes_from_document_flows(
6455 &mut self,
6456 flows: &DocumentFlowSnapshot,
6457 ) -> SynthResult<Vec<JournalEntry>> {
6458 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
6459 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
6460
6461 let je_config = match self.resolve_coa_framework() {
6462 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
6463 CoAFramework::GermanSkr04 => {
6464 let fa = datasynth_core::FrameworkAccounts::german_gaap();
6465 DocumentFlowJeConfig::from(&fa)
6466 }
6467 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
6468 };
6469
6470 let populate_fec = je_config.populate_fec_fields;
6471 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
6472
6473 if populate_fec {
6477 let mut aux_lookup = std::collections::HashMap::new();
6478 for vendor in &self.master_data.vendors {
6479 if let Some(ref aux) = vendor.auxiliary_gl_account {
6480 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
6481 }
6482 }
6483 for customer in &self.master_data.customers {
6484 if let Some(ref aux) = customer.auxiliary_gl_account {
6485 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
6486 }
6487 }
6488 if !aux_lookup.is_empty() {
6489 generator.set_auxiliary_account_lookup(aux_lookup);
6490 }
6491 }
6492
6493 let mut entries = Vec::new();
6494
6495 for chain in &flows.p2p_chains {
6497 let chain_entries = generator.generate_from_p2p_chain(chain);
6498 entries.extend(chain_entries);
6499 if let Some(pb) = &pb {
6500 pb.inc(1);
6501 }
6502 }
6503
6504 for chain in &flows.o2c_chains {
6506 let chain_entries = generator.generate_from_o2c_chain(chain);
6507 entries.extend(chain_entries);
6508 if let Some(pb) = &pb {
6509 pb.inc(1);
6510 }
6511 }
6512
6513 if let Some(pb) = pb {
6514 pb.finish_with_message(format!(
6515 "Generated {} JEs from document flows",
6516 entries.len()
6517 ));
6518 }
6519
6520 Ok(entries)
6521 }
6522
6523 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
6529 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
6530
6531 let mut jes = Vec::with_capacity(payroll_runs.len());
6532
6533 for run in payroll_runs {
6534 let mut je = JournalEntry::new_simple(
6535 format!("JE-PAYROLL-{}", run.payroll_id),
6536 run.company_code.clone(),
6537 run.run_date,
6538 format!("Payroll {}", run.payroll_id),
6539 );
6540
6541 je.add_line(JournalEntryLine {
6543 line_number: 1,
6544 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
6545 debit_amount: run.total_gross,
6546 reference: Some(run.payroll_id.clone()),
6547 text: Some(format!(
6548 "Payroll {} ({} employees)",
6549 run.payroll_id, run.employee_count
6550 )),
6551 ..Default::default()
6552 });
6553
6554 je.add_line(JournalEntryLine {
6556 line_number: 2,
6557 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
6558 credit_amount: run.total_gross,
6559 reference: Some(run.payroll_id.clone()),
6560 ..Default::default()
6561 });
6562
6563 jes.push(je);
6564 }
6565
6566 jes
6567 }
6568
6569 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
6575 use datasynth_core::accounts::{control_accounts, expense_accounts};
6576 use datasynth_core::models::ProductionOrderStatus;
6577
6578 let mut jes = Vec::new();
6579
6580 for order in production_orders {
6581 if !matches!(
6583 order.status,
6584 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
6585 ) {
6586 continue;
6587 }
6588
6589 let mut je = JournalEntry::new_simple(
6590 format!("JE-MFG-{}", order.order_id),
6591 order.company_code.clone(),
6592 order.actual_end.unwrap_or(order.planned_end),
6593 format!(
6594 "Production Order {} - {}",
6595 order.order_id, order.material_description
6596 ),
6597 );
6598
6599 je.add_line(JournalEntryLine {
6601 line_number: 1,
6602 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
6603 debit_amount: order.actual_cost,
6604 reference: Some(order.order_id.clone()),
6605 text: Some(format!(
6606 "Material consumption for {}",
6607 order.material_description
6608 )),
6609 quantity: Some(order.actual_quantity),
6610 unit: Some("EA".to_string()),
6611 ..Default::default()
6612 });
6613
6614 je.add_line(JournalEntryLine {
6616 line_number: 2,
6617 gl_account: control_accounts::INVENTORY.to_string(),
6618 credit_amount: order.actual_cost,
6619 reference: Some(order.order_id.clone()),
6620 ..Default::default()
6621 });
6622
6623 jes.push(je);
6624 }
6625
6626 jes
6627 }
6628
6629 fn link_document_flows_to_subledgers(
6634 &mut self,
6635 flows: &DocumentFlowSnapshot,
6636 ) -> SynthResult<SubledgerSnapshot> {
6637 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
6638 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
6639
6640 let vendor_names: std::collections::HashMap<String, String> = self
6642 .master_data
6643 .vendors
6644 .iter()
6645 .map(|v| (v.vendor_id.clone(), v.name.clone()))
6646 .collect();
6647 let customer_names: std::collections::HashMap<String, String> = self
6648 .master_data
6649 .customers
6650 .iter()
6651 .map(|c| (c.customer_id.clone(), c.name.clone()))
6652 .collect();
6653
6654 let mut linker = DocumentFlowLinker::new()
6655 .with_vendor_names(vendor_names)
6656 .with_customer_names(customer_names);
6657
6658 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
6660 if let Some(pb) = &pb {
6661 pb.inc(flows.vendor_invoices.len() as u64);
6662 }
6663
6664 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
6666 if let Some(pb) = &pb {
6667 pb.inc(flows.customer_invoices.len() as u64);
6668 }
6669
6670 if let Some(pb) = pb {
6671 pb.finish_with_message(format!(
6672 "Linked {} AP and {} AR invoices",
6673 ap_invoices.len(),
6674 ar_invoices.len()
6675 ));
6676 }
6677
6678 Ok(SubledgerSnapshot {
6679 ap_invoices,
6680 ar_invoices,
6681 fa_records: Vec::new(),
6682 inventory_positions: Vec::new(),
6683 inventory_movements: Vec::new(),
6684 })
6685 }
6686
6687 #[allow(clippy::too_many_arguments)]
6692 fn generate_ocpm_events(
6693 &mut self,
6694 flows: &DocumentFlowSnapshot,
6695 sourcing: &SourcingSnapshot,
6696 hr: &HrSnapshot,
6697 manufacturing: &ManufacturingSnapshot,
6698 banking: &BankingSnapshot,
6699 audit: &AuditSnapshot,
6700 financial_reporting: &FinancialReportingSnapshot,
6701 ) -> SynthResult<OcpmSnapshot> {
6702 let total_chains = flows.p2p_chains.len()
6703 + flows.o2c_chains.len()
6704 + sourcing.sourcing_projects.len()
6705 + hr.payroll_runs.len()
6706 + manufacturing.production_orders.len()
6707 + banking.customers.len()
6708 + audit.engagements.len()
6709 + financial_reporting.bank_reconciliations.len();
6710 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
6711
6712 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
6714 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
6715
6716 let ocpm_config = OcpmGeneratorConfig {
6718 generate_p2p: true,
6719 generate_o2c: true,
6720 generate_s2c: !sourcing.sourcing_projects.is_empty(),
6721 generate_h2r: !hr.payroll_runs.is_empty(),
6722 generate_mfg: !manufacturing.production_orders.is_empty(),
6723 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
6724 generate_bank: !banking.customers.is_empty(),
6725 generate_audit: !audit.engagements.is_empty(),
6726 happy_path_rate: 0.75,
6727 exception_path_rate: 0.20,
6728 error_path_rate: 0.05,
6729 add_duration_variability: true,
6730 duration_std_dev_factor: 0.3,
6731 };
6732 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
6733 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
6734
6735 let available_users: Vec<String> = self
6737 .master_data
6738 .employees
6739 .iter()
6740 .take(20)
6741 .map(|e| e.user_id.clone())
6742 .collect();
6743
6744 let fallback_date =
6746 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
6747 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6748 .unwrap_or(fallback_date);
6749 let base_midnight = base_date
6750 .and_hms_opt(0, 0, 0)
6751 .expect("midnight is always valid");
6752 let base_datetime =
6753 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
6754
6755 let add_result = |event_log: &mut OcpmEventLog,
6757 result: datasynth_ocpm::CaseGenerationResult| {
6758 for event in result.events {
6759 event_log.add_event(event);
6760 }
6761 for object in result.objects {
6762 event_log.add_object(object);
6763 }
6764 for relationship in result.relationships {
6765 event_log.add_relationship(relationship);
6766 }
6767 for corr in result.correlation_events {
6768 event_log.add_correlation_event(corr);
6769 }
6770 event_log.add_case(result.case_trace);
6771 };
6772
6773 for chain in &flows.p2p_chains {
6775 let po = &chain.purchase_order;
6776 let documents = P2pDocuments::new(
6777 &po.header.document_id,
6778 &po.vendor_id,
6779 &po.header.company_code,
6780 po.total_net_amount,
6781 &po.header.currency,
6782 &ocpm_uuid_factory,
6783 )
6784 .with_goods_receipt(
6785 chain
6786 .goods_receipts
6787 .first()
6788 .map(|gr| gr.header.document_id.as_str())
6789 .unwrap_or(""),
6790 &ocpm_uuid_factory,
6791 )
6792 .with_invoice(
6793 chain
6794 .vendor_invoice
6795 .as_ref()
6796 .map(|vi| vi.header.document_id.as_str())
6797 .unwrap_or(""),
6798 &ocpm_uuid_factory,
6799 )
6800 .with_payment(
6801 chain
6802 .payment
6803 .as_ref()
6804 .map(|p| p.header.document_id.as_str())
6805 .unwrap_or(""),
6806 &ocpm_uuid_factory,
6807 );
6808
6809 let start_time =
6810 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
6811 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
6812 add_result(&mut event_log, result);
6813
6814 if let Some(pb) = &pb {
6815 pb.inc(1);
6816 }
6817 }
6818
6819 for chain in &flows.o2c_chains {
6821 let so = &chain.sales_order;
6822 let documents = O2cDocuments::new(
6823 &so.header.document_id,
6824 &so.customer_id,
6825 &so.header.company_code,
6826 so.total_net_amount,
6827 &so.header.currency,
6828 &ocpm_uuid_factory,
6829 )
6830 .with_delivery(
6831 chain
6832 .deliveries
6833 .first()
6834 .map(|d| d.header.document_id.as_str())
6835 .unwrap_or(""),
6836 &ocpm_uuid_factory,
6837 )
6838 .with_invoice(
6839 chain
6840 .customer_invoice
6841 .as_ref()
6842 .map(|ci| ci.header.document_id.as_str())
6843 .unwrap_or(""),
6844 &ocpm_uuid_factory,
6845 )
6846 .with_receipt(
6847 chain
6848 .customer_receipt
6849 .as_ref()
6850 .map(|r| r.header.document_id.as_str())
6851 .unwrap_or(""),
6852 &ocpm_uuid_factory,
6853 );
6854
6855 let start_time =
6856 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
6857 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
6858 add_result(&mut event_log, result);
6859
6860 if let Some(pb) = &pb {
6861 pb.inc(1);
6862 }
6863 }
6864
6865 for project in &sourcing.sourcing_projects {
6867 let vendor_id = sourcing
6869 .contracts
6870 .iter()
6871 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6872 .map(|c| c.vendor_id.clone())
6873 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
6874 .or_else(|| {
6875 self.master_data
6876 .vendors
6877 .first()
6878 .map(|v| v.vendor_id.clone())
6879 })
6880 .unwrap_or_else(|| "V000".to_string());
6881 let mut docs = S2cDocuments::new(
6882 &project.project_id,
6883 &vendor_id,
6884 &project.company_code,
6885 project.estimated_annual_spend,
6886 &ocpm_uuid_factory,
6887 );
6888 if let Some(rfx) = sourcing
6890 .rfx_events
6891 .iter()
6892 .find(|r| r.sourcing_project_id == project.project_id)
6893 {
6894 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
6895 if let Some(bid) = sourcing.bids.iter().find(|b| {
6897 b.rfx_id == rfx.rfx_id
6898 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
6899 }) {
6900 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
6901 }
6902 }
6903 if let Some(contract) = sourcing
6905 .contracts
6906 .iter()
6907 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6908 {
6909 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
6910 }
6911 let start_time = base_datetime - chrono::Duration::days(90);
6912 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
6913 add_result(&mut event_log, result);
6914
6915 if let Some(pb) = &pb {
6916 pb.inc(1);
6917 }
6918 }
6919
6920 for run in &hr.payroll_runs {
6922 let employee_id = hr
6924 .payroll_line_items
6925 .iter()
6926 .find(|li| li.payroll_id == run.payroll_id)
6927 .map(|li| li.employee_id.as_str())
6928 .unwrap_or("EMP000");
6929 let docs = H2rDocuments::new(
6930 &run.payroll_id,
6931 employee_id,
6932 &run.company_code,
6933 run.total_gross,
6934 &ocpm_uuid_factory,
6935 )
6936 .with_time_entries(
6937 hr.time_entries
6938 .iter()
6939 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
6940 .take(5)
6941 .map(|t| t.entry_id.as_str())
6942 .collect(),
6943 );
6944 let start_time = base_datetime - chrono::Duration::days(30);
6945 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
6946 add_result(&mut event_log, result);
6947
6948 if let Some(pb) = &pb {
6949 pb.inc(1);
6950 }
6951 }
6952
6953 for order in &manufacturing.production_orders {
6955 let mut docs = MfgDocuments::new(
6956 &order.order_id,
6957 &order.material_id,
6958 &order.company_code,
6959 order.planned_quantity,
6960 &ocpm_uuid_factory,
6961 )
6962 .with_operations(
6963 order
6964 .operations
6965 .iter()
6966 .map(|o| format!("OP-{:04}", o.operation_number))
6967 .collect::<Vec<_>>()
6968 .iter()
6969 .map(std::string::String::as_str)
6970 .collect(),
6971 );
6972 if let Some(insp) = manufacturing
6974 .quality_inspections
6975 .iter()
6976 .find(|i| i.reference_id == order.order_id)
6977 {
6978 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
6979 }
6980 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
6982 cc.items
6983 .iter()
6984 .any(|item| item.material_id == order.material_id)
6985 }) {
6986 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
6987 }
6988 let start_time = base_datetime - chrono::Duration::days(60);
6989 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
6990 add_result(&mut event_log, result);
6991
6992 if let Some(pb) = &pb {
6993 pb.inc(1);
6994 }
6995 }
6996
6997 for customer in &banking.customers {
6999 let customer_id_str = customer.customer_id.to_string();
7000 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
7001 if let Some(account) = banking
7003 .accounts
7004 .iter()
7005 .find(|a| a.primary_owner_id == customer.customer_id)
7006 {
7007 let account_id_str = account.account_id.to_string();
7008 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
7009 let txn_strs: Vec<String> = banking
7011 .transactions
7012 .iter()
7013 .filter(|t| t.account_id == account.account_id)
7014 .take(10)
7015 .map(|t| t.transaction_id.to_string())
7016 .collect();
7017 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
7018 let txn_amounts: Vec<rust_decimal::Decimal> = banking
7019 .transactions
7020 .iter()
7021 .filter(|t| t.account_id == account.account_id)
7022 .take(10)
7023 .map(|t| t.amount)
7024 .collect();
7025 if !txn_ids.is_empty() {
7026 docs = docs.with_transactions(txn_ids, txn_amounts);
7027 }
7028 }
7029 let start_time = base_datetime - chrono::Duration::days(180);
7030 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
7031 add_result(&mut event_log, result);
7032
7033 if let Some(pb) = &pb {
7034 pb.inc(1);
7035 }
7036 }
7037
7038 for engagement in &audit.engagements {
7040 let engagement_id_str = engagement.engagement_id.to_string();
7041 let docs = AuditDocuments::new(
7042 &engagement_id_str,
7043 &engagement.client_entity_id,
7044 &ocpm_uuid_factory,
7045 )
7046 .with_workpapers(
7047 audit
7048 .workpapers
7049 .iter()
7050 .filter(|w| w.engagement_id == engagement.engagement_id)
7051 .take(10)
7052 .map(|w| w.workpaper_id.to_string())
7053 .collect::<Vec<_>>()
7054 .iter()
7055 .map(std::string::String::as_str)
7056 .collect(),
7057 )
7058 .with_evidence(
7059 audit
7060 .evidence
7061 .iter()
7062 .filter(|e| e.engagement_id == engagement.engagement_id)
7063 .take(10)
7064 .map(|e| e.evidence_id.to_string())
7065 .collect::<Vec<_>>()
7066 .iter()
7067 .map(std::string::String::as_str)
7068 .collect(),
7069 )
7070 .with_risks(
7071 audit
7072 .risk_assessments
7073 .iter()
7074 .filter(|r| r.engagement_id == engagement.engagement_id)
7075 .take(5)
7076 .map(|r| r.risk_id.to_string())
7077 .collect::<Vec<_>>()
7078 .iter()
7079 .map(std::string::String::as_str)
7080 .collect(),
7081 )
7082 .with_findings(
7083 audit
7084 .findings
7085 .iter()
7086 .filter(|f| f.engagement_id == engagement.engagement_id)
7087 .take(5)
7088 .map(|f| f.finding_id.to_string())
7089 .collect::<Vec<_>>()
7090 .iter()
7091 .map(std::string::String::as_str)
7092 .collect(),
7093 )
7094 .with_judgments(
7095 audit
7096 .judgments
7097 .iter()
7098 .filter(|j| j.engagement_id == engagement.engagement_id)
7099 .take(5)
7100 .map(|j| j.judgment_id.to_string())
7101 .collect::<Vec<_>>()
7102 .iter()
7103 .map(std::string::String::as_str)
7104 .collect(),
7105 );
7106 let start_time = base_datetime - chrono::Duration::days(120);
7107 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
7108 add_result(&mut event_log, result);
7109
7110 if let Some(pb) = &pb {
7111 pb.inc(1);
7112 }
7113 }
7114
7115 for recon in &financial_reporting.bank_reconciliations {
7117 let docs = BankReconDocuments::new(
7118 &recon.reconciliation_id,
7119 &recon.bank_account_id,
7120 &recon.company_code,
7121 recon.bank_ending_balance,
7122 &ocpm_uuid_factory,
7123 )
7124 .with_statement_lines(
7125 recon
7126 .statement_lines
7127 .iter()
7128 .take(20)
7129 .map(|l| l.line_id.as_str())
7130 .collect(),
7131 )
7132 .with_reconciling_items(
7133 recon
7134 .reconciling_items
7135 .iter()
7136 .take(10)
7137 .map(|i| i.item_id.as_str())
7138 .collect(),
7139 );
7140 let start_time = base_datetime - chrono::Duration::days(30);
7141 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
7142 add_result(&mut event_log, result);
7143
7144 if let Some(pb) = &pb {
7145 pb.inc(1);
7146 }
7147 }
7148
7149 event_log.compute_variants();
7151
7152 let summary = event_log.summary();
7153
7154 if let Some(pb) = pb {
7155 pb.finish_with_message(format!(
7156 "Generated {} OCPM events, {} objects",
7157 summary.event_count, summary.object_count
7158 ));
7159 }
7160
7161 Ok(OcpmSnapshot {
7162 event_count: summary.event_count,
7163 object_count: summary.object_count,
7164 case_count: summary.case_count,
7165 event_log: Some(event_log),
7166 })
7167 }
7168
7169 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
7171 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
7172
7173 let total_rate = if self.config.anomaly_injection.enabled {
7176 self.config.anomaly_injection.rates.total_rate
7177 } else if self.config.fraud.enabled {
7178 self.config.fraud.fraud_rate
7179 } else {
7180 0.02
7181 };
7182
7183 let fraud_rate = if self.config.anomaly_injection.enabled {
7184 self.config.anomaly_injection.rates.fraud_rate
7185 } else {
7186 AnomalyRateConfig::default().fraud_rate
7187 };
7188
7189 let error_rate = if self.config.anomaly_injection.enabled {
7190 self.config.anomaly_injection.rates.error_rate
7191 } else {
7192 AnomalyRateConfig::default().error_rate
7193 };
7194
7195 let process_issue_rate = if self.config.anomaly_injection.enabled {
7196 self.config.anomaly_injection.rates.process_rate
7197 } else {
7198 AnomalyRateConfig::default().process_issue_rate
7199 };
7200
7201 let anomaly_config = AnomalyInjectorConfig {
7202 rates: AnomalyRateConfig {
7203 total_rate,
7204 fraud_rate,
7205 error_rate,
7206 process_issue_rate,
7207 ..Default::default()
7208 },
7209 seed: self.seed + 5000,
7210 ..Default::default()
7211 };
7212
7213 let mut injector = AnomalyInjector::new(anomaly_config);
7214 let result = injector.process_entries(entries);
7215
7216 if let Some(pb) = &pb {
7217 pb.inc(entries.len() as u64);
7218 pb.finish_with_message("Anomaly injection complete");
7219 }
7220
7221 let mut by_type = HashMap::new();
7222 for label in &result.labels {
7223 *by_type
7224 .entry(format!("{:?}", label.anomaly_type))
7225 .or_insert(0) += 1;
7226 }
7227
7228 Ok(AnomalyLabels {
7229 labels: result.labels,
7230 summary: Some(result.summary),
7231 by_type,
7232 })
7233 }
7234
7235 fn validate_journal_entries(
7244 &mut self,
7245 entries: &[JournalEntry],
7246 ) -> SynthResult<BalanceValidationResult> {
7247 let clean_entries: Vec<&JournalEntry> = entries
7249 .iter()
7250 .filter(|e| {
7251 e.header
7252 .header_text
7253 .as_ref()
7254 .map(|t| !t.contains("[HUMAN_ERROR:"))
7255 .unwrap_or(true)
7256 })
7257 .collect();
7258
7259 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
7260
7261 let config = BalanceTrackerConfig {
7263 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
7267 };
7268 let validation_currency = self
7269 .config
7270 .companies
7271 .first()
7272 .map(|c| c.currency.clone())
7273 .unwrap_or_else(|| "USD".to_string());
7274
7275 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
7276
7277 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
7279 let errors = tracker.apply_entries(&clean_refs);
7280
7281 if let Some(pb) = &pb {
7282 pb.inc(entries.len() as u64);
7283 }
7284
7285 let has_unbalanced = tracker
7288 .get_validation_errors()
7289 .iter()
7290 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
7291
7292 let mut all_errors = errors;
7295 all_errors.extend(tracker.get_validation_errors().iter().cloned());
7296 let company_codes: Vec<String> = self
7297 .config
7298 .companies
7299 .iter()
7300 .map(|c| c.code.clone())
7301 .collect();
7302
7303 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7304 .map(|d| d + chrono::Months::new(self.config.global.period_months))
7305 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7306
7307 for company_code in &company_codes {
7308 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
7309 all_errors.push(e);
7310 }
7311 }
7312
7313 let stats = tracker.get_statistics();
7315
7316 let is_balanced = all_errors.is_empty();
7318
7319 if let Some(pb) = pb {
7320 let msg = if is_balanced {
7321 "Balance validation passed"
7322 } else {
7323 "Balance validation completed with errors"
7324 };
7325 pb.finish_with_message(msg);
7326 }
7327
7328 Ok(BalanceValidationResult {
7329 validated: true,
7330 is_balanced,
7331 entries_processed: stats.entries_processed,
7332 total_debits: stats.total_debits,
7333 total_credits: stats.total_credits,
7334 accounts_tracked: stats.accounts_tracked,
7335 companies_tracked: stats.companies_tracked,
7336 validation_errors: all_errors,
7337 has_unbalanced_entries: has_unbalanced,
7338 })
7339 }
7340
7341 fn inject_data_quality(
7346 &mut self,
7347 entries: &mut [JournalEntry],
7348 ) -> SynthResult<DataQualityStats> {
7349 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
7350
7351 let config = if self.config.data_quality.enabled {
7354 let dq = &self.config.data_quality;
7355 DataQualityConfig {
7356 enable_missing_values: dq.missing_values.enabled,
7357 missing_values: datasynth_generators::MissingValueConfig {
7358 global_rate: dq.effective_missing_rate(),
7359 ..Default::default()
7360 },
7361 enable_format_variations: dq.format_variations.enabled,
7362 format_variations: datasynth_generators::FormatVariationConfig {
7363 date_variation_rate: dq.format_variations.dates.rate,
7364 amount_variation_rate: dq.format_variations.amounts.rate,
7365 identifier_variation_rate: dq.format_variations.identifiers.rate,
7366 ..Default::default()
7367 },
7368 enable_duplicates: dq.duplicates.enabled,
7369 duplicates: datasynth_generators::DuplicateConfig {
7370 duplicate_rate: dq.effective_duplicate_rate(),
7371 ..Default::default()
7372 },
7373 enable_typos: dq.typos.enabled,
7374 typos: datasynth_generators::TypoConfig {
7375 char_error_rate: dq.effective_typo_rate(),
7376 ..Default::default()
7377 },
7378 enable_encoding_issues: dq.encoding_issues.enabled,
7379 encoding_issue_rate: dq.encoding_issues.rate,
7380 seed: self.seed.wrapping_add(77), track_statistics: true,
7382 }
7383 } else {
7384 DataQualityConfig::minimal()
7385 };
7386 let mut injector = DataQualityInjector::new(config);
7387
7388 injector.set_country_pack(self.primary_pack().clone());
7390
7391 let context = HashMap::new();
7393
7394 for entry in entries.iter_mut() {
7395 if let Some(text) = &entry.header.header_text {
7397 let processed = injector.process_text_field(
7398 "header_text",
7399 text,
7400 &entry.header.document_id.to_string(),
7401 &context,
7402 );
7403 match processed {
7404 Some(new_text) if new_text != *text => {
7405 entry.header.header_text = Some(new_text);
7406 }
7407 None => {
7408 entry.header.header_text = None; }
7410 _ => {}
7411 }
7412 }
7413
7414 if let Some(ref_text) = &entry.header.reference {
7416 let processed = injector.process_text_field(
7417 "reference",
7418 ref_text,
7419 &entry.header.document_id.to_string(),
7420 &context,
7421 );
7422 match processed {
7423 Some(new_text) if new_text != *ref_text => {
7424 entry.header.reference = Some(new_text);
7425 }
7426 None => {
7427 entry.header.reference = None;
7428 }
7429 _ => {}
7430 }
7431 }
7432
7433 let user_persona = entry.header.user_persona.clone();
7435 if let Some(processed) = injector.process_text_field(
7436 "user_persona",
7437 &user_persona,
7438 &entry.header.document_id.to_string(),
7439 &context,
7440 ) {
7441 if processed != user_persona {
7442 entry.header.user_persona = processed;
7443 }
7444 }
7445
7446 for line in &mut entry.lines {
7448 if let Some(ref text) = line.line_text {
7450 let processed = injector.process_text_field(
7451 "line_text",
7452 text,
7453 &entry.header.document_id.to_string(),
7454 &context,
7455 );
7456 match processed {
7457 Some(new_text) if new_text != *text => {
7458 line.line_text = Some(new_text);
7459 }
7460 None => {
7461 line.line_text = None;
7462 }
7463 _ => {}
7464 }
7465 }
7466
7467 if let Some(cc) = &line.cost_center {
7469 let processed = injector.process_text_field(
7470 "cost_center",
7471 cc,
7472 &entry.header.document_id.to_string(),
7473 &context,
7474 );
7475 match processed {
7476 Some(new_cc) if new_cc != *cc => {
7477 line.cost_center = Some(new_cc);
7478 }
7479 None => {
7480 line.cost_center = None;
7481 }
7482 _ => {}
7483 }
7484 }
7485 }
7486
7487 if let Some(pb) = &pb {
7488 pb.inc(1);
7489 }
7490 }
7491
7492 if let Some(pb) = pb {
7493 pb.finish_with_message("Data quality injection complete");
7494 }
7495
7496 Ok(injector.stats().clone())
7497 }
7498
7499 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
7510 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7511 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7512 let fiscal_year = start_date.year() as u16;
7513 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
7514
7515 let total_revenue: rust_decimal::Decimal = entries
7517 .iter()
7518 .flat_map(|e| e.lines.iter())
7519 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
7520 .map(|l| l.credit_amount)
7521 .sum();
7522
7523 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
7525
7526 let mut snapshot = AuditSnapshot::default();
7527
7528 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
7530 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
7531 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
7532 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
7533 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
7534 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
7535
7536 let accounts: Vec<String> = self
7538 .coa
7539 .as_ref()
7540 .map(|coa| {
7541 coa.get_postable_accounts()
7542 .iter()
7543 .map(|acc| acc.account_code().to_string())
7544 .collect()
7545 })
7546 .unwrap_or_default();
7547
7548 for (i, company) in self.config.companies.iter().enumerate() {
7550 let company_revenue = total_revenue
7552 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
7553
7554 let engagements_for_company =
7556 self.phase_config.audit_engagements / self.config.companies.len().max(1);
7557 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
7558 1
7559 } else {
7560 0
7561 };
7562
7563 for _eng_idx in 0..(engagements_for_company + extra) {
7564 let mut engagement = engagement_gen.generate_engagement(
7566 &company.code,
7567 &company.name,
7568 fiscal_year,
7569 period_end,
7570 company_revenue,
7571 None, );
7573
7574 if !self.master_data.employees.is_empty() {
7576 let emp_count = self.master_data.employees.len();
7577 let base = (i * 10 + _eng_idx) % emp_count;
7579 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
7580 .employee_id
7581 .clone();
7582 engagement.engagement_manager_id = self.master_data.employees
7583 [(base + 1) % emp_count]
7584 .employee_id
7585 .clone();
7586 let real_team: Vec<String> = engagement
7587 .team_member_ids
7588 .iter()
7589 .enumerate()
7590 .map(|(j, _)| {
7591 self.master_data.employees[(base + 2 + j) % emp_count]
7592 .employee_id
7593 .clone()
7594 })
7595 .collect();
7596 engagement.team_member_ids = real_team;
7597 }
7598
7599 if let Some(pb) = &pb {
7600 pb.inc(1);
7601 }
7602
7603 let team_members: Vec<String> = engagement.team_member_ids.clone();
7605
7606 let workpapers =
7608 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
7609
7610 for wp in &workpapers {
7611 if let Some(pb) = &pb {
7612 pb.inc(1);
7613 }
7614
7615 let evidence = evidence_gen.generate_evidence_for_workpaper(
7617 wp,
7618 &team_members,
7619 wp.preparer_date,
7620 );
7621
7622 for _ in &evidence {
7623 if let Some(pb) = &pb {
7624 pb.inc(1);
7625 }
7626 }
7627
7628 snapshot.evidence.extend(evidence);
7629 }
7630
7631 let risks =
7633 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
7634
7635 for _ in &risks {
7636 if let Some(pb) = &pb {
7637 pb.inc(1);
7638 }
7639 }
7640 snapshot.risk_assessments.extend(risks);
7641
7642 let findings = finding_gen.generate_findings_for_engagement(
7644 &engagement,
7645 &workpapers,
7646 &team_members,
7647 );
7648
7649 for _ in &findings {
7650 if let Some(pb) = &pb {
7651 pb.inc(1);
7652 }
7653 }
7654 snapshot.findings.extend(findings);
7655
7656 let judgments =
7658 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
7659
7660 for _ in &judgments {
7661 if let Some(pb) = &pb {
7662 pb.inc(1);
7663 }
7664 }
7665 snapshot.judgments.extend(judgments);
7666
7667 snapshot.workpapers.extend(workpapers);
7669 snapshot.engagements.push(engagement);
7670 }
7671 }
7672
7673 if let Some(pb) = pb {
7674 pb.finish_with_message(format!(
7675 "Audit data: {} engagements, {} workpapers, {} evidence",
7676 snapshot.engagements.len(),
7677 snapshot.workpapers.len(),
7678 snapshot.evidence.len()
7679 ));
7680 }
7681
7682 Ok(snapshot)
7683 }
7684
7685 fn export_graphs(
7692 &mut self,
7693 entries: &[JournalEntry],
7694 _coa: &Arc<ChartOfAccounts>,
7695 stats: &mut EnhancedGenerationStatistics,
7696 ) -> SynthResult<GraphExportSnapshot> {
7697 let pb = self.create_progress_bar(100, "Exporting Graphs");
7698
7699 let mut snapshot = GraphExportSnapshot::default();
7700
7701 let output_dir = self
7703 .output_path
7704 .clone()
7705 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7706 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7707
7708 for graph_type in &self.config.graph_export.graph_types {
7710 if let Some(pb) = &pb {
7711 pb.inc(10);
7712 }
7713
7714 let graph_config = TransactionGraphConfig {
7716 include_vendors: false,
7717 include_customers: false,
7718 create_debit_credit_edges: true,
7719 include_document_nodes: graph_type.include_document_nodes,
7720 min_edge_weight: graph_type.min_edge_weight,
7721 aggregate_parallel_edges: graph_type.aggregate_edges,
7722 framework: None,
7723 };
7724
7725 let mut builder = TransactionGraphBuilder::new(graph_config);
7726 builder.add_journal_entries(entries);
7727 let graph = builder.build();
7728
7729 stats.graph_node_count += graph.node_count();
7731 stats.graph_edge_count += graph.edge_count();
7732
7733 if let Some(pb) = &pb {
7734 pb.inc(40);
7735 }
7736
7737 for format in &self.config.graph_export.formats {
7739 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
7740
7741 if let Err(e) = std::fs::create_dir_all(&format_dir) {
7743 warn!("Failed to create graph output directory: {}", e);
7744 continue;
7745 }
7746
7747 match format {
7748 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
7749 let pyg_config = PyGExportConfig {
7750 common: datasynth_graph::CommonExportConfig {
7751 export_node_features: true,
7752 export_edge_features: true,
7753 export_node_labels: true,
7754 export_edge_labels: true,
7755 export_masks: true,
7756 train_ratio: self.config.graph_export.train_ratio,
7757 val_ratio: self.config.graph_export.validation_ratio,
7758 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7759 },
7760 one_hot_categoricals: false,
7761 };
7762
7763 let exporter = PyGExporter::new(pyg_config);
7764 match exporter.export(&graph, &format_dir) {
7765 Ok(metadata) => {
7766 snapshot.exports.insert(
7767 format!("{}_{}", graph_type.name, "pytorch_geometric"),
7768 GraphExportInfo {
7769 name: graph_type.name.clone(),
7770 format: "pytorch_geometric".to_string(),
7771 output_path: format_dir.clone(),
7772 node_count: metadata.num_nodes,
7773 edge_count: metadata.num_edges,
7774 },
7775 );
7776 snapshot.graph_count += 1;
7777 }
7778 Err(e) => {
7779 warn!("Failed to export PyTorch Geometric graph: {}", e);
7780 }
7781 }
7782 }
7783 datasynth_config::schema::GraphExportFormat::Neo4j => {
7784 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
7785
7786 let neo4j_config = Neo4jExportConfig {
7787 export_node_properties: true,
7788 export_edge_properties: true,
7789 export_features: true,
7790 generate_cypher: true,
7791 generate_admin_import: true,
7792 database_name: "synth".to_string(),
7793 cypher_batch_size: 1000,
7794 };
7795
7796 let exporter = Neo4jExporter::new(neo4j_config);
7797 match exporter.export(&graph, &format_dir) {
7798 Ok(metadata) => {
7799 snapshot.exports.insert(
7800 format!("{}_{}", graph_type.name, "neo4j"),
7801 GraphExportInfo {
7802 name: graph_type.name.clone(),
7803 format: "neo4j".to_string(),
7804 output_path: format_dir.clone(),
7805 node_count: metadata.num_nodes,
7806 edge_count: metadata.num_edges,
7807 },
7808 );
7809 snapshot.graph_count += 1;
7810 }
7811 Err(e) => {
7812 warn!("Failed to export Neo4j graph: {}", e);
7813 }
7814 }
7815 }
7816 datasynth_config::schema::GraphExportFormat::Dgl => {
7817 use datasynth_graph::{DGLExportConfig, DGLExporter};
7818
7819 let dgl_config = DGLExportConfig {
7820 common: datasynth_graph::CommonExportConfig {
7821 export_node_features: true,
7822 export_edge_features: true,
7823 export_node_labels: true,
7824 export_edge_labels: true,
7825 export_masks: true,
7826 train_ratio: self.config.graph_export.train_ratio,
7827 val_ratio: self.config.graph_export.validation_ratio,
7828 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7829 },
7830 heterogeneous: false,
7831 include_pickle_script: true, };
7833
7834 let exporter = DGLExporter::new(dgl_config);
7835 match exporter.export(&graph, &format_dir) {
7836 Ok(metadata) => {
7837 snapshot.exports.insert(
7838 format!("{}_{}", graph_type.name, "dgl"),
7839 GraphExportInfo {
7840 name: graph_type.name.clone(),
7841 format: "dgl".to_string(),
7842 output_path: format_dir.clone(),
7843 node_count: metadata.common.num_nodes,
7844 edge_count: metadata.common.num_edges,
7845 },
7846 );
7847 snapshot.graph_count += 1;
7848 }
7849 Err(e) => {
7850 warn!("Failed to export DGL graph: {}", e);
7851 }
7852 }
7853 }
7854 datasynth_config::schema::GraphExportFormat::RustGraph => {
7855 use datasynth_graph::{
7856 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
7857 };
7858
7859 let rustgraph_config = RustGraphExportConfig {
7860 include_features: true,
7861 include_temporal: true,
7862 include_labels: true,
7863 source_name: "datasynth".to_string(),
7864 batch_id: None,
7865 output_format: RustGraphOutputFormat::JsonLines,
7866 export_node_properties: true,
7867 export_edge_properties: true,
7868 pretty_print: false,
7869 };
7870
7871 let exporter = RustGraphExporter::new(rustgraph_config);
7872 match exporter.export(&graph, &format_dir) {
7873 Ok(metadata) => {
7874 snapshot.exports.insert(
7875 format!("{}_{}", graph_type.name, "rustgraph"),
7876 GraphExportInfo {
7877 name: graph_type.name.clone(),
7878 format: "rustgraph".to_string(),
7879 output_path: format_dir.clone(),
7880 node_count: metadata.num_nodes,
7881 edge_count: metadata.num_edges,
7882 },
7883 );
7884 snapshot.graph_count += 1;
7885 }
7886 Err(e) => {
7887 warn!("Failed to export RustGraph: {}", e);
7888 }
7889 }
7890 }
7891 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
7892 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
7894 }
7895 }
7896 }
7897
7898 if let Some(pb) = &pb {
7899 pb.inc(40);
7900 }
7901 }
7902
7903 stats.graph_export_count = snapshot.graph_count;
7904 snapshot.exported = snapshot.graph_count > 0;
7905
7906 if let Some(pb) = pb {
7907 pb.finish_with_message(format!(
7908 "Graphs exported: {} graphs ({} nodes, {} edges)",
7909 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
7910 ));
7911 }
7912
7913 Ok(snapshot)
7914 }
7915
7916 fn build_additional_graphs(
7921 &self,
7922 banking: &BankingSnapshot,
7923 intercompany: &IntercompanySnapshot,
7924 entries: &[JournalEntry],
7925 stats: &mut EnhancedGenerationStatistics,
7926 ) {
7927 let output_dir = self
7928 .output_path
7929 .clone()
7930 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7931 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7932
7933 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
7935 info!("Phase 10c: Building banking network graph");
7936 let config = BankingGraphConfig::default();
7937 let mut builder = BankingGraphBuilder::new(config);
7938 builder.add_customers(&banking.customers);
7939 builder.add_accounts(&banking.accounts, &banking.customers);
7940 builder.add_transactions(&banking.transactions);
7941 let graph = builder.build();
7942
7943 let node_count = graph.node_count();
7944 let edge_count = graph.edge_count();
7945 stats.graph_node_count += node_count;
7946 stats.graph_edge_count += edge_count;
7947
7948 for format in &self.config.graph_export.formats {
7950 if matches!(
7951 format,
7952 datasynth_config::schema::GraphExportFormat::PytorchGeometric
7953 ) {
7954 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
7955 if let Err(e) = std::fs::create_dir_all(&format_dir) {
7956 warn!("Failed to create banking graph output dir: {}", e);
7957 continue;
7958 }
7959 let pyg_config = PyGExportConfig::default();
7960 let exporter = PyGExporter::new(pyg_config);
7961 if let Err(e) = exporter.export(&graph, &format_dir) {
7962 warn!("Failed to export banking graph as PyG: {}", e);
7963 } else {
7964 info!(
7965 "Banking network graph exported: {} nodes, {} edges",
7966 node_count, edge_count
7967 );
7968 }
7969 }
7970 }
7971 }
7972
7973 let approval_entries: Vec<_> = entries
7975 .iter()
7976 .filter(|je| je.header.approval_workflow.is_some())
7977 .collect();
7978
7979 if !approval_entries.is_empty() {
7980 info!(
7981 "Phase 10c: Building approval network graph ({} entries with approvals)",
7982 approval_entries.len()
7983 );
7984 let config = ApprovalGraphConfig::default();
7985 let mut builder = ApprovalGraphBuilder::new(config);
7986
7987 for je in &approval_entries {
7988 if let Some(ref wf) = je.header.approval_workflow {
7989 for action in &wf.actions {
7990 let record = datasynth_core::models::ApprovalRecord {
7991 approval_id: format!(
7992 "APR-{}-{}",
7993 je.header.document_id, action.approval_level
7994 ),
7995 document_number: je.header.document_id.to_string(),
7996 document_type: "JE".to_string(),
7997 company_code: je.company_code().to_string(),
7998 requester_id: wf.preparer_id.clone(),
7999 requester_name: Some(wf.preparer_name.clone()),
8000 approver_id: action.actor_id.clone(),
8001 approver_name: action.actor_name.clone(),
8002 approval_date: je.posting_date(),
8003 action: format!("{:?}", action.action),
8004 amount: wf.amount,
8005 approval_limit: None,
8006 comments: action.comments.clone(),
8007 delegation_from: None,
8008 is_auto_approved: false,
8009 };
8010 builder.add_approval(&record);
8011 }
8012 }
8013 }
8014
8015 let graph = builder.build();
8016 let node_count = graph.node_count();
8017 let edge_count = graph.edge_count();
8018 stats.graph_node_count += node_count;
8019 stats.graph_edge_count += edge_count;
8020
8021 for format in &self.config.graph_export.formats {
8023 if matches!(
8024 format,
8025 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8026 ) {
8027 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
8028 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8029 warn!("Failed to create approval graph output dir: {}", e);
8030 continue;
8031 }
8032 let pyg_config = PyGExportConfig::default();
8033 let exporter = PyGExporter::new(pyg_config);
8034 if let Err(e) = exporter.export(&graph, &format_dir) {
8035 warn!("Failed to export approval graph as PyG: {}", e);
8036 } else {
8037 info!(
8038 "Approval network graph exported: {} nodes, {} edges",
8039 node_count, edge_count
8040 );
8041 }
8042 }
8043 }
8044 }
8045
8046 if self.config.companies.len() >= 2 {
8048 info!(
8049 "Phase 10c: Building entity relationship graph ({} companies)",
8050 self.config.companies.len()
8051 );
8052
8053 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8054 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
8055
8056 let parent_code = &self.config.companies[0].code;
8058 let mut companies: Vec<datasynth_core::models::Company> =
8059 Vec::with_capacity(self.config.companies.len());
8060
8061 let first = &self.config.companies[0];
8063 companies.push(datasynth_core::models::Company::parent(
8064 &first.code,
8065 &first.name,
8066 &first.country,
8067 &first.currency,
8068 ));
8069
8070 for cc in self.config.companies.iter().skip(1) {
8072 companies.push(datasynth_core::models::Company::subsidiary(
8073 &cc.code,
8074 &cc.name,
8075 &cc.country,
8076 &cc.currency,
8077 parent_code,
8078 rust_decimal::Decimal::from(100),
8079 ));
8080 }
8081
8082 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
8084 self.config
8085 .companies
8086 .iter()
8087 .skip(1)
8088 .enumerate()
8089 .map(|(i, cc)| {
8090 let mut rel =
8091 datasynth_core::models::intercompany::IntercompanyRelationship::new(
8092 format!("REL{:03}", i + 1),
8093 parent_code.clone(),
8094 cc.code.clone(),
8095 rust_decimal::Decimal::from(100),
8096 start_date,
8097 );
8098 rel.functional_currency = cc.currency.clone();
8099 rel
8100 })
8101 .collect();
8102
8103 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
8104 builder.add_companies(&companies);
8105 builder.add_ownership_relationships(&relationships);
8106
8107 for pair in &intercompany.matched_pairs {
8109 builder.add_intercompany_edge(
8110 &pair.seller_company,
8111 &pair.buyer_company,
8112 pair.amount,
8113 &format!("{:?}", pair.transaction_type),
8114 );
8115 }
8116
8117 let graph = builder.build();
8118 let node_count = graph.node_count();
8119 let edge_count = graph.edge_count();
8120 stats.graph_node_count += node_count;
8121 stats.graph_edge_count += edge_count;
8122
8123 for format in &self.config.graph_export.formats {
8125 if matches!(
8126 format,
8127 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8128 ) {
8129 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
8130 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8131 warn!("Failed to create entity graph output dir: {}", e);
8132 continue;
8133 }
8134 let pyg_config = PyGExportConfig::default();
8135 let exporter = PyGExporter::new(pyg_config);
8136 if let Err(e) = exporter.export(&graph, &format_dir) {
8137 warn!("Failed to export entity graph as PyG: {}", e);
8138 } else {
8139 info!(
8140 "Entity relationship graph exported: {} nodes, {} edges",
8141 node_count, edge_count
8142 );
8143 }
8144 }
8145 }
8146 } else {
8147 debug!(
8148 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
8149 self.config.companies.len()
8150 );
8151 }
8152 }
8153
8154 #[allow(clippy::too_many_arguments)]
8161 fn export_hypergraph(
8162 &self,
8163 coa: &Arc<ChartOfAccounts>,
8164 entries: &[JournalEntry],
8165 document_flows: &DocumentFlowSnapshot,
8166 sourcing: &SourcingSnapshot,
8167 hr: &HrSnapshot,
8168 manufacturing: &ManufacturingSnapshot,
8169 banking: &BankingSnapshot,
8170 audit: &AuditSnapshot,
8171 financial_reporting: &FinancialReportingSnapshot,
8172 ocpm: &OcpmSnapshot,
8173 stats: &mut EnhancedGenerationStatistics,
8174 ) -> SynthResult<HypergraphExportInfo> {
8175 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
8176 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
8177 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
8178 use datasynth_graph::models::hypergraph::AggregationStrategy;
8179
8180 let hg_settings = &self.config.graph_export.hypergraph;
8181
8182 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
8184 "truncate" => AggregationStrategy::Truncate,
8185 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
8186 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
8187 "importance_sample" => AggregationStrategy::ImportanceSample,
8188 _ => AggregationStrategy::PoolByCounterparty,
8189 };
8190
8191 let builder_config = HypergraphConfig {
8192 max_nodes: hg_settings.max_nodes,
8193 aggregation_strategy,
8194 include_coso: hg_settings.governance_layer.include_coso,
8195 include_controls: hg_settings.governance_layer.include_controls,
8196 include_sox: hg_settings.governance_layer.include_sox,
8197 include_vendors: hg_settings.governance_layer.include_vendors,
8198 include_customers: hg_settings.governance_layer.include_customers,
8199 include_employees: hg_settings.governance_layer.include_employees,
8200 include_p2p: hg_settings.process_layer.include_p2p,
8201 include_o2c: hg_settings.process_layer.include_o2c,
8202 include_s2c: hg_settings.process_layer.include_s2c,
8203 include_h2r: hg_settings.process_layer.include_h2r,
8204 include_mfg: hg_settings.process_layer.include_mfg,
8205 include_bank: hg_settings.process_layer.include_bank,
8206 include_audit: hg_settings.process_layer.include_audit,
8207 include_r2r: hg_settings.process_layer.include_r2r,
8208 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
8209 docs_per_counterparty_threshold: hg_settings
8210 .process_layer
8211 .docs_per_counterparty_threshold,
8212 include_accounts: hg_settings.accounting_layer.include_accounts,
8213 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
8214 include_cross_layer_edges: hg_settings.cross_layer.enabled,
8215 };
8216
8217 let mut builder = HypergraphBuilder::new(builder_config);
8218
8219 builder.add_coso_framework();
8221
8222 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
8225 let controls = InternalControl::standard_controls();
8226 builder.add_controls(&controls);
8227 }
8228
8229 builder.add_vendors(&self.master_data.vendors);
8231 builder.add_customers(&self.master_data.customers);
8232 builder.add_employees(&self.master_data.employees);
8233
8234 builder.add_p2p_documents(
8236 &document_flows.purchase_orders,
8237 &document_flows.goods_receipts,
8238 &document_flows.vendor_invoices,
8239 &document_flows.payments,
8240 );
8241 builder.add_o2c_documents(
8242 &document_flows.sales_orders,
8243 &document_flows.deliveries,
8244 &document_flows.customer_invoices,
8245 );
8246 builder.add_s2c_documents(
8247 &sourcing.sourcing_projects,
8248 &sourcing.qualifications,
8249 &sourcing.rfx_events,
8250 &sourcing.bids,
8251 &sourcing.bid_evaluations,
8252 &sourcing.contracts,
8253 );
8254 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
8255 builder.add_mfg_documents(
8256 &manufacturing.production_orders,
8257 &manufacturing.quality_inspections,
8258 &manufacturing.cycle_counts,
8259 );
8260 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
8261 builder.add_audit_documents(
8262 &audit.engagements,
8263 &audit.workpapers,
8264 &audit.findings,
8265 &audit.evidence,
8266 &audit.risk_assessments,
8267 &audit.judgments,
8268 );
8269 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
8270
8271 if let Some(ref event_log) = ocpm.event_log {
8273 builder.add_ocpm_events(event_log);
8274 }
8275
8276 builder.add_accounts(coa);
8278 builder.add_journal_entries_as_hyperedges(entries);
8279
8280 let hypergraph = builder.build();
8282
8283 let output_dir = self
8285 .output_path
8286 .clone()
8287 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8288 let hg_dir = output_dir
8289 .join(&self.config.graph_export.output_subdirectory)
8290 .join(&hg_settings.output_subdirectory);
8291
8292 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
8294 "unified" => {
8295 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8296 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8297 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
8298 })?;
8299 (
8300 metadata.num_nodes,
8301 metadata.num_edges,
8302 metadata.num_hyperedges,
8303 )
8304 }
8305 _ => {
8306 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
8308 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8309 SynthError::generation(format!("Hypergraph export failed: {e}"))
8310 })?;
8311 (
8312 metadata.num_nodes,
8313 metadata.num_edges,
8314 metadata.num_hyperedges,
8315 )
8316 }
8317 };
8318
8319 #[cfg(feature = "streaming")]
8321 if let Some(ref target_url) = hg_settings.stream_target {
8322 use crate::stream_client::{StreamClient, StreamConfig};
8323 use std::io::Write as _;
8324
8325 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
8326 let stream_config = StreamConfig {
8327 target_url: target_url.clone(),
8328 batch_size: hg_settings.stream_batch_size,
8329 api_key,
8330 ..StreamConfig::default()
8331 };
8332
8333 match StreamClient::new(stream_config) {
8334 Ok(mut client) => {
8335 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8336 match exporter.export_to_writer(&hypergraph, &mut client) {
8337 Ok(_) => {
8338 if let Err(e) = client.flush() {
8339 warn!("Failed to flush stream client: {}", e);
8340 } else {
8341 info!("Streamed {} records to {}", client.total_sent(), target_url);
8342 }
8343 }
8344 Err(e) => {
8345 warn!("Streaming export failed: {}", e);
8346 }
8347 }
8348 }
8349 Err(e) => {
8350 warn!("Failed to create stream client: {}", e);
8351 }
8352 }
8353 }
8354
8355 stats.graph_node_count += num_nodes;
8357 stats.graph_edge_count += num_edges;
8358 stats.graph_export_count += 1;
8359
8360 Ok(HypergraphExportInfo {
8361 node_count: num_nodes,
8362 edge_count: num_edges,
8363 hyperedge_count: num_hyperedges,
8364 output_path: hg_dir,
8365 })
8366 }
8367
8368 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
8373 let pb = self.create_progress_bar(100, "Generating Banking Data");
8374
8375 let orchestrator = BankingOrchestratorBuilder::new()
8377 .config(self.config.banking.clone())
8378 .seed(self.seed + 9000)
8379 .country_pack(self.primary_pack().clone())
8380 .build();
8381
8382 if let Some(pb) = &pb {
8383 pb.inc(10);
8384 }
8385
8386 let result = orchestrator.generate();
8388
8389 if let Some(pb) = &pb {
8390 pb.inc(90);
8391 pb.finish_with_message(format!(
8392 "Banking: {} customers, {} transactions",
8393 result.customers.len(),
8394 result.transactions.len()
8395 ));
8396 }
8397
8398 let mut banking_customers = result.customers;
8403 let core_customers = &self.master_data.customers;
8404 if !core_customers.is_empty() {
8405 for (i, bc) in banking_customers.iter_mut().enumerate() {
8406 let core = &core_customers[i % core_customers.len()];
8407 bc.name = CustomerName::business(&core.name);
8408 bc.residence_country = core.country.clone();
8409 bc.enterprise_customer_id = Some(core.customer_id.clone());
8410 }
8411 debug!(
8412 "Cross-referenced {} banking customers with {} core customers",
8413 banking_customers.len(),
8414 core_customers.len()
8415 );
8416 }
8417
8418 Ok(BankingSnapshot {
8419 customers: banking_customers,
8420 accounts: result.accounts,
8421 transactions: result.transactions,
8422 transaction_labels: result.transaction_labels,
8423 customer_labels: result.customer_labels,
8424 account_labels: result.account_labels,
8425 relationship_labels: result.relationship_labels,
8426 narratives: result.narratives,
8427 suspicious_count: result.stats.suspicious_count,
8428 scenario_count: result.scenarios.len(),
8429 })
8430 }
8431
8432 fn calculate_total_transactions(&self) -> u64 {
8434 let months = self.config.global.period_months as f64;
8435 self.config
8436 .companies
8437 .iter()
8438 .map(|c| {
8439 let annual = c.annual_transaction_volume.count() as f64;
8440 let weighted = annual * c.volume_weight;
8441 (weighted * months / 12.0) as u64
8442 })
8443 .sum()
8444 }
8445
8446 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
8448 if !self.phase_config.show_progress {
8449 return None;
8450 }
8451
8452 let pb = if let Some(mp) = &self.multi_progress {
8453 mp.add(ProgressBar::new(total))
8454 } else {
8455 ProgressBar::new(total)
8456 };
8457
8458 pb.set_style(
8459 ProgressStyle::default_bar()
8460 .template(&format!(
8461 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
8462 ))
8463 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
8464 .progress_chars("#>-"),
8465 );
8466
8467 Some(pb)
8468 }
8469
8470 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
8472 self.coa.clone()
8473 }
8474
8475 pub fn get_master_data(&self) -> &MasterDataSnapshot {
8477 &self.master_data
8478 }
8479
8480 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
8482 use super::lineage::LineageGraphBuilder;
8483
8484 let mut builder = LineageGraphBuilder::new();
8485
8486 builder.add_config_section("config:global", "Global Config");
8488 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
8489 builder.add_config_section("config:transactions", "Transaction Config");
8490
8491 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
8493 builder.add_generator_phase("phase:je", "Journal Entry Generation");
8494
8495 builder.configured_by("phase:coa", "config:chart_of_accounts");
8497 builder.configured_by("phase:je", "config:transactions");
8498
8499 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
8501 builder.produced_by("output:je", "phase:je");
8502
8503 if self.phase_config.generate_master_data {
8505 builder.add_config_section("config:master_data", "Master Data Config");
8506 builder.add_generator_phase("phase:master_data", "Master Data Generation");
8507 builder.configured_by("phase:master_data", "config:master_data");
8508 builder.input_to("phase:master_data", "phase:je");
8509 }
8510
8511 if self.phase_config.generate_document_flows {
8512 builder.add_config_section("config:document_flows", "Document Flow Config");
8513 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
8514 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
8515 builder.configured_by("phase:p2p", "config:document_flows");
8516 builder.configured_by("phase:o2c", "config:document_flows");
8517
8518 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
8519 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
8520 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
8521 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
8522 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
8523
8524 builder.produced_by("output:po", "phase:p2p");
8525 builder.produced_by("output:gr", "phase:p2p");
8526 builder.produced_by("output:vi", "phase:p2p");
8527 builder.produced_by("output:so", "phase:o2c");
8528 builder.produced_by("output:ci", "phase:o2c");
8529 }
8530
8531 if self.phase_config.inject_anomalies {
8532 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
8533 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
8534 builder.configured_by("phase:anomaly", "config:fraud");
8535 builder.add_output_file(
8536 "output:labels",
8537 "Anomaly Labels",
8538 "labels/anomaly_labels.csv",
8539 );
8540 builder.produced_by("output:labels", "phase:anomaly");
8541 }
8542
8543 if self.phase_config.generate_audit {
8544 builder.add_config_section("config:audit", "Audit Config");
8545 builder.add_generator_phase("phase:audit", "Audit Data Generation");
8546 builder.configured_by("phase:audit", "config:audit");
8547 }
8548
8549 if self.phase_config.generate_banking {
8550 builder.add_config_section("config:banking", "Banking Config");
8551 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
8552 builder.configured_by("phase:banking", "config:banking");
8553 }
8554
8555 if self.config.llm.enabled {
8556 builder.add_config_section("config:llm", "LLM Enrichment Config");
8557 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
8558 builder.configured_by("phase:llm_enrichment", "config:llm");
8559 }
8560
8561 if self.config.diffusion.enabled {
8562 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
8563 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
8564 builder.configured_by("phase:diffusion", "config:diffusion");
8565 }
8566
8567 if self.config.causal.enabled {
8568 builder.add_config_section("config:causal", "Causal Generation Config");
8569 builder.add_generator_phase("phase:causal", "Causal Overlay");
8570 builder.configured_by("phase:causal", "config:causal");
8571 }
8572
8573 builder.build()
8574 }
8575}
8576
8577fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
8579 match format {
8580 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
8581 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
8582 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
8583 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
8584 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
8585 }
8586}
8587
8588#[cfg(test)]
8589#[allow(clippy::unwrap_used)]
8590mod tests {
8591 use super::*;
8592 use datasynth_config::schema::*;
8593
8594 fn create_test_config() -> GeneratorConfig {
8595 GeneratorConfig {
8596 global: GlobalConfig {
8597 industry: IndustrySector::Manufacturing,
8598 start_date: "2024-01-01".to_string(),
8599 period_months: 1,
8600 seed: Some(42),
8601 parallel: false,
8602 group_currency: "USD".to_string(),
8603 worker_threads: 0,
8604 memory_limit_mb: 0,
8605 fiscal_year_months: None,
8606 },
8607 companies: vec![CompanyConfig {
8608 code: "1000".to_string(),
8609 name: "Test Company".to_string(),
8610 currency: "USD".to_string(),
8611 country: "US".to_string(),
8612 annual_transaction_volume: TransactionVolume::TenK,
8613 volume_weight: 1.0,
8614 fiscal_year_variant: "K4".to_string(),
8615 }],
8616 chart_of_accounts: ChartOfAccountsConfig {
8617 complexity: CoAComplexity::Small,
8618 industry_specific: true,
8619 custom_accounts: None,
8620 min_hierarchy_depth: 2,
8621 max_hierarchy_depth: 4,
8622 },
8623 transactions: TransactionConfig::default(),
8624 output: OutputConfig::default(),
8625 fraud: FraudConfig::default(),
8626 internal_controls: InternalControlsConfig::default(),
8627 business_processes: BusinessProcessConfig::default(),
8628 user_personas: UserPersonaConfig::default(),
8629 templates: TemplateConfig::default(),
8630 approval: ApprovalConfig::default(),
8631 departments: DepartmentConfig::default(),
8632 master_data: MasterDataConfig::default(),
8633 document_flows: DocumentFlowConfig::default(),
8634 intercompany: IntercompanyConfig::default(),
8635 balance: BalanceConfig::default(),
8636 ocpm: OcpmConfig::default(),
8637 audit: AuditGenerationConfig::default(),
8638 banking: datasynth_banking::BankingConfig::default(),
8639 data_quality: DataQualitySchemaConfig::default(),
8640 scenario: ScenarioConfig::default(),
8641 temporal: TemporalDriftConfig::default(),
8642 graph_export: GraphExportConfig::default(),
8643 streaming: StreamingSchemaConfig::default(),
8644 rate_limit: RateLimitSchemaConfig::default(),
8645 temporal_attributes: TemporalAttributeSchemaConfig::default(),
8646 relationships: RelationshipSchemaConfig::default(),
8647 accounting_standards: AccountingStandardsConfig::default(),
8648 audit_standards: AuditStandardsConfig::default(),
8649 distributions: Default::default(),
8650 temporal_patterns: Default::default(),
8651 vendor_network: VendorNetworkSchemaConfig::default(),
8652 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
8653 relationship_strength: RelationshipStrengthSchemaConfig::default(),
8654 cross_process_links: CrossProcessLinksSchemaConfig::default(),
8655 organizational_events: OrganizationalEventsSchemaConfig::default(),
8656 behavioral_drift: BehavioralDriftSchemaConfig::default(),
8657 market_drift: MarketDriftSchemaConfig::default(),
8658 drift_labeling: DriftLabelingSchemaConfig::default(),
8659 anomaly_injection: Default::default(),
8660 industry_specific: Default::default(),
8661 fingerprint_privacy: Default::default(),
8662 quality_gates: Default::default(),
8663 compliance: Default::default(),
8664 webhooks: Default::default(),
8665 llm: Default::default(),
8666 diffusion: Default::default(),
8667 causal: Default::default(),
8668 source_to_pay: Default::default(),
8669 financial_reporting: Default::default(),
8670 hr: Default::default(),
8671 manufacturing: Default::default(),
8672 sales_quotes: Default::default(),
8673 tax: Default::default(),
8674 treasury: Default::default(),
8675 project_accounting: Default::default(),
8676 esg: Default::default(),
8677 country_packs: None,
8678 scenarios: Default::default(),
8679 session: Default::default(),
8680 }
8681 }
8682
8683 #[test]
8684 fn test_enhanced_orchestrator_creation() {
8685 let config = create_test_config();
8686 let orchestrator = EnhancedOrchestrator::with_defaults(config);
8687 assert!(orchestrator.is_ok());
8688 }
8689
8690 #[test]
8691 fn test_minimal_generation() {
8692 let config = create_test_config();
8693 let phase_config = PhaseConfig {
8694 generate_master_data: false,
8695 generate_document_flows: false,
8696 generate_journal_entries: true,
8697 inject_anomalies: false,
8698 show_progress: false,
8699 ..Default::default()
8700 };
8701
8702 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8703 let result = orchestrator.generate();
8704
8705 assert!(result.is_ok());
8706 let result = result.unwrap();
8707 assert!(!result.journal_entries.is_empty());
8708 }
8709
8710 #[test]
8711 fn test_master_data_generation() {
8712 let config = create_test_config();
8713 let phase_config = PhaseConfig {
8714 generate_master_data: true,
8715 generate_document_flows: false,
8716 generate_journal_entries: false,
8717 inject_anomalies: false,
8718 show_progress: false,
8719 vendors_per_company: 5,
8720 customers_per_company: 5,
8721 materials_per_company: 10,
8722 assets_per_company: 5,
8723 employees_per_company: 10,
8724 ..Default::default()
8725 };
8726
8727 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8728 let result = orchestrator.generate().unwrap();
8729
8730 assert!(!result.master_data.vendors.is_empty());
8731 assert!(!result.master_data.customers.is_empty());
8732 assert!(!result.master_data.materials.is_empty());
8733 }
8734
8735 #[test]
8736 fn test_document_flow_generation() {
8737 let config = create_test_config();
8738 let phase_config = PhaseConfig {
8739 generate_master_data: true,
8740 generate_document_flows: true,
8741 generate_journal_entries: false,
8742 inject_anomalies: false,
8743 inject_data_quality: false,
8744 validate_balances: false,
8745 generate_ocpm_events: false,
8746 show_progress: false,
8747 vendors_per_company: 5,
8748 customers_per_company: 5,
8749 materials_per_company: 10,
8750 assets_per_company: 5,
8751 employees_per_company: 10,
8752 p2p_chains: 5,
8753 o2c_chains: 5,
8754 ..Default::default()
8755 };
8756
8757 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8758 let result = orchestrator.generate().unwrap();
8759
8760 assert!(!result.document_flows.p2p_chains.is_empty());
8762 assert!(!result.document_flows.o2c_chains.is_empty());
8763
8764 assert!(!result.document_flows.purchase_orders.is_empty());
8766 assert!(!result.document_flows.sales_orders.is_empty());
8767 }
8768
8769 #[test]
8770 fn test_anomaly_injection() {
8771 let config = create_test_config();
8772 let phase_config = PhaseConfig {
8773 generate_master_data: false,
8774 generate_document_flows: false,
8775 generate_journal_entries: true,
8776 inject_anomalies: true,
8777 show_progress: false,
8778 ..Default::default()
8779 };
8780
8781 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8782 let result = orchestrator.generate().unwrap();
8783
8784 assert!(!result.journal_entries.is_empty());
8786
8787 assert!(result.anomaly_labels.summary.is_some());
8790 }
8791
8792 #[test]
8793 fn test_full_generation_pipeline() {
8794 let config = create_test_config();
8795 let phase_config = PhaseConfig {
8796 generate_master_data: true,
8797 generate_document_flows: true,
8798 generate_journal_entries: true,
8799 inject_anomalies: false,
8800 inject_data_quality: false,
8801 validate_balances: true,
8802 generate_ocpm_events: false,
8803 show_progress: false,
8804 vendors_per_company: 3,
8805 customers_per_company: 3,
8806 materials_per_company: 5,
8807 assets_per_company: 3,
8808 employees_per_company: 5,
8809 p2p_chains: 3,
8810 o2c_chains: 3,
8811 ..Default::default()
8812 };
8813
8814 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8815 let result = orchestrator.generate().unwrap();
8816
8817 assert!(!result.master_data.vendors.is_empty());
8819 assert!(!result.master_data.customers.is_empty());
8820 assert!(!result.document_flows.p2p_chains.is_empty());
8821 assert!(!result.document_flows.o2c_chains.is_empty());
8822 assert!(!result.journal_entries.is_empty());
8823 assert!(result.statistics.accounts_count > 0);
8824
8825 assert!(!result.subledger.ap_invoices.is_empty());
8827 assert!(!result.subledger.ar_invoices.is_empty());
8828
8829 assert!(result.balance_validation.validated);
8831 assert!(result.balance_validation.entries_processed > 0);
8832 }
8833
8834 #[test]
8835 fn test_subledger_linking() {
8836 let config = create_test_config();
8837 let phase_config = PhaseConfig {
8838 generate_master_data: true,
8839 generate_document_flows: true,
8840 generate_journal_entries: false,
8841 inject_anomalies: false,
8842 inject_data_quality: false,
8843 validate_balances: false,
8844 generate_ocpm_events: false,
8845 show_progress: false,
8846 vendors_per_company: 5,
8847 customers_per_company: 5,
8848 materials_per_company: 10,
8849 assets_per_company: 3,
8850 employees_per_company: 5,
8851 p2p_chains: 5,
8852 o2c_chains: 5,
8853 ..Default::default()
8854 };
8855
8856 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8857 let result = orchestrator.generate().unwrap();
8858
8859 assert!(!result.document_flows.vendor_invoices.is_empty());
8861 assert!(!result.document_flows.customer_invoices.is_empty());
8862
8863 assert!(!result.subledger.ap_invoices.is_empty());
8865 assert!(!result.subledger.ar_invoices.is_empty());
8866
8867 assert_eq!(
8869 result.subledger.ap_invoices.len(),
8870 result.document_flows.vendor_invoices.len()
8871 );
8872
8873 assert_eq!(
8875 result.subledger.ar_invoices.len(),
8876 result.document_flows.customer_invoices.len()
8877 );
8878
8879 assert_eq!(
8881 result.statistics.ap_invoice_count,
8882 result.subledger.ap_invoices.len()
8883 );
8884 assert_eq!(
8885 result.statistics.ar_invoice_count,
8886 result.subledger.ar_invoices.len()
8887 );
8888 }
8889
8890 #[test]
8891 fn test_balance_validation() {
8892 let config = create_test_config();
8893 let phase_config = PhaseConfig {
8894 generate_master_data: false,
8895 generate_document_flows: false,
8896 generate_journal_entries: true,
8897 inject_anomalies: false,
8898 validate_balances: true,
8899 show_progress: false,
8900 ..Default::default()
8901 };
8902
8903 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8904 let result = orchestrator.generate().unwrap();
8905
8906 assert!(result.balance_validation.validated);
8908 assert!(result.balance_validation.entries_processed > 0);
8909
8910 assert!(!result.balance_validation.has_unbalanced_entries);
8912
8913 assert_eq!(
8915 result.balance_validation.total_debits,
8916 result.balance_validation.total_credits
8917 );
8918 }
8919
8920 #[test]
8921 fn test_statistics_accuracy() {
8922 let config = create_test_config();
8923 let phase_config = PhaseConfig {
8924 generate_master_data: true,
8925 generate_document_flows: false,
8926 generate_journal_entries: true,
8927 inject_anomalies: false,
8928 show_progress: false,
8929 vendors_per_company: 10,
8930 customers_per_company: 20,
8931 materials_per_company: 15,
8932 assets_per_company: 5,
8933 employees_per_company: 8,
8934 ..Default::default()
8935 };
8936
8937 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8938 let result = orchestrator.generate().unwrap();
8939
8940 assert_eq!(
8942 result.statistics.vendor_count,
8943 result.master_data.vendors.len()
8944 );
8945 assert_eq!(
8946 result.statistics.customer_count,
8947 result.master_data.customers.len()
8948 );
8949 assert_eq!(
8950 result.statistics.material_count,
8951 result.master_data.materials.len()
8952 );
8953 assert_eq!(
8954 result.statistics.total_entries as usize,
8955 result.journal_entries.len()
8956 );
8957 }
8958
8959 #[test]
8960 fn test_phase_config_defaults() {
8961 let config = PhaseConfig::default();
8962 assert!(config.generate_master_data);
8963 assert!(config.generate_document_flows);
8964 assert!(config.generate_journal_entries);
8965 assert!(!config.inject_anomalies);
8966 assert!(config.validate_balances);
8967 assert!(config.show_progress);
8968 assert!(config.vendors_per_company > 0);
8969 assert!(config.customers_per_company > 0);
8970 }
8971
8972 #[test]
8973 fn test_get_coa_before_generation() {
8974 let config = create_test_config();
8975 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
8976
8977 assert!(orchestrator.get_coa().is_none());
8979 }
8980
8981 #[test]
8982 fn test_get_coa_after_generation() {
8983 let config = create_test_config();
8984 let phase_config = PhaseConfig {
8985 generate_master_data: false,
8986 generate_document_flows: false,
8987 generate_journal_entries: true,
8988 inject_anomalies: false,
8989 show_progress: false,
8990 ..Default::default()
8991 };
8992
8993 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8994 let _ = orchestrator.generate().unwrap();
8995
8996 assert!(orchestrator.get_coa().is_some());
8998 }
8999
9000 #[test]
9001 fn test_get_master_data() {
9002 let config = create_test_config();
9003 let phase_config = PhaseConfig {
9004 generate_master_data: true,
9005 generate_document_flows: false,
9006 generate_journal_entries: false,
9007 inject_anomalies: false,
9008 show_progress: false,
9009 vendors_per_company: 5,
9010 customers_per_company: 5,
9011 materials_per_company: 5,
9012 assets_per_company: 5,
9013 employees_per_company: 5,
9014 ..Default::default()
9015 };
9016
9017 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9018 let result = orchestrator.generate().unwrap();
9019
9020 assert!(!result.master_data.vendors.is_empty());
9022 }
9023
9024 #[test]
9025 fn test_with_progress_builder() {
9026 let config = create_test_config();
9027 let orchestrator = EnhancedOrchestrator::with_defaults(config)
9028 .unwrap()
9029 .with_progress(false);
9030
9031 assert!(!orchestrator.phase_config.show_progress);
9033 }
9034
9035 #[test]
9036 fn test_multi_company_generation() {
9037 let mut config = create_test_config();
9038 config.companies.push(CompanyConfig {
9039 code: "2000".to_string(),
9040 name: "Subsidiary".to_string(),
9041 currency: "EUR".to_string(),
9042 country: "DE".to_string(),
9043 annual_transaction_volume: TransactionVolume::TenK,
9044 volume_weight: 0.5,
9045 fiscal_year_variant: "K4".to_string(),
9046 });
9047
9048 let phase_config = PhaseConfig {
9049 generate_master_data: true,
9050 generate_document_flows: false,
9051 generate_journal_entries: true,
9052 inject_anomalies: false,
9053 show_progress: false,
9054 vendors_per_company: 5,
9055 customers_per_company: 5,
9056 materials_per_company: 5,
9057 assets_per_company: 5,
9058 employees_per_company: 5,
9059 ..Default::default()
9060 };
9061
9062 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9063 let result = orchestrator.generate().unwrap();
9064
9065 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
9068 assert!(result.statistics.companies_count == 2);
9069 }
9070
9071 #[test]
9072 fn test_empty_master_data_skips_document_flows() {
9073 let config = create_test_config();
9074 let phase_config = PhaseConfig {
9075 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
9078 inject_anomalies: false,
9079 show_progress: false,
9080 ..Default::default()
9081 };
9082
9083 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9084 let result = orchestrator.generate().unwrap();
9085
9086 assert!(result.document_flows.p2p_chains.is_empty());
9088 assert!(result.document_flows.o2c_chains.is_empty());
9089 }
9090
9091 #[test]
9092 fn test_journal_entry_line_item_count() {
9093 let config = create_test_config();
9094 let phase_config = PhaseConfig {
9095 generate_master_data: false,
9096 generate_document_flows: false,
9097 generate_journal_entries: true,
9098 inject_anomalies: false,
9099 show_progress: false,
9100 ..Default::default()
9101 };
9102
9103 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9104 let result = orchestrator.generate().unwrap();
9105
9106 let calculated_line_items: u64 = result
9108 .journal_entries
9109 .iter()
9110 .map(|e| e.line_count() as u64)
9111 .sum();
9112 assert_eq!(result.statistics.total_line_items, calculated_line_items);
9113 }
9114
9115 #[test]
9116 fn test_audit_generation() {
9117 let config = create_test_config();
9118 let phase_config = PhaseConfig {
9119 generate_master_data: false,
9120 generate_document_flows: false,
9121 generate_journal_entries: true,
9122 inject_anomalies: false,
9123 show_progress: false,
9124 generate_audit: true,
9125 audit_engagements: 2,
9126 workpapers_per_engagement: 5,
9127 evidence_per_workpaper: 2,
9128 risks_per_engagement: 3,
9129 findings_per_engagement: 2,
9130 judgments_per_engagement: 2,
9131 ..Default::default()
9132 };
9133
9134 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9135 let result = orchestrator.generate().unwrap();
9136
9137 assert_eq!(result.audit.engagements.len(), 2);
9139 assert!(!result.audit.workpapers.is_empty());
9140 assert!(!result.audit.evidence.is_empty());
9141 assert!(!result.audit.risk_assessments.is_empty());
9142 assert!(!result.audit.findings.is_empty());
9143 assert!(!result.audit.judgments.is_empty());
9144
9145 assert_eq!(
9147 result.statistics.audit_engagement_count,
9148 result.audit.engagements.len()
9149 );
9150 assert_eq!(
9151 result.statistics.audit_workpaper_count,
9152 result.audit.workpapers.len()
9153 );
9154 assert_eq!(
9155 result.statistics.audit_evidence_count,
9156 result.audit.evidence.len()
9157 );
9158 assert_eq!(
9159 result.statistics.audit_risk_count,
9160 result.audit.risk_assessments.len()
9161 );
9162 assert_eq!(
9163 result.statistics.audit_finding_count,
9164 result.audit.findings.len()
9165 );
9166 assert_eq!(
9167 result.statistics.audit_judgment_count,
9168 result.audit.judgments.len()
9169 );
9170 }
9171
9172 #[test]
9173 fn test_new_phases_disabled_by_default() {
9174 let config = create_test_config();
9175 assert!(!config.llm.enabled);
9177 assert!(!config.diffusion.enabled);
9178 assert!(!config.causal.enabled);
9179
9180 let phase_config = PhaseConfig {
9181 generate_master_data: false,
9182 generate_document_flows: false,
9183 generate_journal_entries: true,
9184 inject_anomalies: false,
9185 show_progress: false,
9186 ..Default::default()
9187 };
9188
9189 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9190 let result = orchestrator.generate().unwrap();
9191
9192 assert_eq!(result.statistics.llm_enrichment_ms, 0);
9194 assert_eq!(result.statistics.llm_vendors_enriched, 0);
9195 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
9196 assert_eq!(result.statistics.diffusion_samples_generated, 0);
9197 assert_eq!(result.statistics.causal_generation_ms, 0);
9198 assert_eq!(result.statistics.causal_samples_generated, 0);
9199 assert!(result.statistics.causal_validation_passed.is_none());
9200 assert_eq!(result.statistics.counterfactual_pair_count, 0);
9201 assert!(result.counterfactual_pairs.is_empty());
9202 }
9203
9204 #[test]
9205 fn test_counterfactual_generation_enabled() {
9206 let config = create_test_config();
9207 let phase_config = PhaseConfig {
9208 generate_master_data: false,
9209 generate_document_flows: false,
9210 generate_journal_entries: true,
9211 inject_anomalies: false,
9212 show_progress: false,
9213 generate_counterfactuals: true,
9214 ..Default::default()
9215 };
9216
9217 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9218 let result = orchestrator.generate().unwrap();
9219
9220 if !result.journal_entries.is_empty() {
9222 assert_eq!(
9223 result.counterfactual_pairs.len(),
9224 result.journal_entries.len()
9225 );
9226 assert_eq!(
9227 result.statistics.counterfactual_pair_count,
9228 result.journal_entries.len()
9229 );
9230 let ids: std::collections::HashSet<_> = result
9232 .counterfactual_pairs
9233 .iter()
9234 .map(|p| p.pair_id.clone())
9235 .collect();
9236 assert_eq!(ids.len(), result.counterfactual_pairs.len());
9237 }
9238 }
9239
9240 #[test]
9241 fn test_llm_enrichment_enabled() {
9242 let mut config = create_test_config();
9243 config.llm.enabled = true;
9244 config.llm.max_vendor_enrichments = 3;
9245
9246 let phase_config = PhaseConfig {
9247 generate_master_data: true,
9248 generate_document_flows: false,
9249 generate_journal_entries: false,
9250 inject_anomalies: false,
9251 show_progress: false,
9252 vendors_per_company: 5,
9253 customers_per_company: 3,
9254 materials_per_company: 3,
9255 assets_per_company: 3,
9256 employees_per_company: 3,
9257 ..Default::default()
9258 };
9259
9260 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9261 let result = orchestrator.generate().unwrap();
9262
9263 assert!(result.statistics.llm_vendors_enriched > 0);
9265 assert!(result.statistics.llm_vendors_enriched <= 3);
9266 }
9267
9268 #[test]
9269 fn test_diffusion_enhancement_enabled() {
9270 let mut config = create_test_config();
9271 config.diffusion.enabled = true;
9272 config.diffusion.n_steps = 50;
9273 config.diffusion.sample_size = 20;
9274
9275 let phase_config = PhaseConfig {
9276 generate_master_data: false,
9277 generate_document_flows: false,
9278 generate_journal_entries: true,
9279 inject_anomalies: false,
9280 show_progress: false,
9281 ..Default::default()
9282 };
9283
9284 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9285 let result = orchestrator.generate().unwrap();
9286
9287 assert_eq!(result.statistics.diffusion_samples_generated, 20);
9289 }
9290
9291 #[test]
9292 fn test_causal_overlay_enabled() {
9293 let mut config = create_test_config();
9294 config.causal.enabled = true;
9295 config.causal.template = "fraud_detection".to_string();
9296 config.causal.sample_size = 100;
9297 config.causal.validate = true;
9298
9299 let phase_config = PhaseConfig {
9300 generate_master_data: false,
9301 generate_document_flows: false,
9302 generate_journal_entries: true,
9303 inject_anomalies: false,
9304 show_progress: false,
9305 ..Default::default()
9306 };
9307
9308 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9309 let result = orchestrator.generate().unwrap();
9310
9311 assert_eq!(result.statistics.causal_samples_generated, 100);
9313 assert!(result.statistics.causal_validation_passed.is_some());
9315 }
9316
9317 #[test]
9318 fn test_causal_overlay_revenue_cycle_template() {
9319 let mut config = create_test_config();
9320 config.causal.enabled = true;
9321 config.causal.template = "revenue_cycle".to_string();
9322 config.causal.sample_size = 50;
9323 config.causal.validate = false;
9324
9325 let phase_config = PhaseConfig {
9326 generate_master_data: false,
9327 generate_document_flows: false,
9328 generate_journal_entries: true,
9329 inject_anomalies: false,
9330 show_progress: false,
9331 ..Default::default()
9332 };
9333
9334 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9335 let result = orchestrator.generate().unwrap();
9336
9337 assert_eq!(result.statistics.causal_samples_generated, 50);
9339 assert!(result.statistics.causal_validation_passed.is_none());
9341 }
9342
9343 #[test]
9344 fn test_all_new_phases_enabled_together() {
9345 let mut config = create_test_config();
9346 config.llm.enabled = true;
9347 config.llm.max_vendor_enrichments = 2;
9348 config.diffusion.enabled = true;
9349 config.diffusion.n_steps = 20;
9350 config.diffusion.sample_size = 10;
9351 config.causal.enabled = true;
9352 config.causal.sample_size = 50;
9353 config.causal.validate = true;
9354
9355 let phase_config = PhaseConfig {
9356 generate_master_data: true,
9357 generate_document_flows: false,
9358 generate_journal_entries: true,
9359 inject_anomalies: false,
9360 show_progress: false,
9361 vendors_per_company: 5,
9362 customers_per_company: 3,
9363 materials_per_company: 3,
9364 assets_per_company: 3,
9365 employees_per_company: 3,
9366 ..Default::default()
9367 };
9368
9369 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9370 let result = orchestrator.generate().unwrap();
9371
9372 assert!(result.statistics.llm_vendors_enriched > 0);
9374 assert_eq!(result.statistics.diffusion_samples_generated, 10);
9375 assert_eq!(result.statistics.causal_samples_generated, 50);
9376 assert!(result.statistics.causal_validation_passed.is_some());
9377 }
9378
9379 #[test]
9380 fn test_statistics_serialization_with_new_fields() {
9381 let stats = EnhancedGenerationStatistics {
9382 total_entries: 100,
9383 total_line_items: 500,
9384 llm_enrichment_ms: 42,
9385 llm_vendors_enriched: 10,
9386 diffusion_enhancement_ms: 100,
9387 diffusion_samples_generated: 50,
9388 causal_generation_ms: 200,
9389 causal_samples_generated: 100,
9390 causal_validation_passed: Some(true),
9391 ..Default::default()
9392 };
9393
9394 let json = serde_json::to_string(&stats).unwrap();
9395 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
9396
9397 assert_eq!(deserialized.llm_enrichment_ms, 42);
9398 assert_eq!(deserialized.llm_vendors_enriched, 10);
9399 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
9400 assert_eq!(deserialized.diffusion_samples_generated, 50);
9401 assert_eq!(deserialized.causal_generation_ms, 200);
9402 assert_eq!(deserialized.causal_samples_generated, 100);
9403 assert_eq!(deserialized.causal_validation_passed, Some(true));
9404 }
9405
9406 #[test]
9407 fn test_statistics_backward_compat_deserialization() {
9408 let old_json = r#"{
9410 "total_entries": 100,
9411 "total_line_items": 500,
9412 "accounts_count": 50,
9413 "companies_count": 1,
9414 "period_months": 12,
9415 "vendor_count": 10,
9416 "customer_count": 20,
9417 "material_count": 15,
9418 "asset_count": 5,
9419 "employee_count": 8,
9420 "p2p_chain_count": 5,
9421 "o2c_chain_count": 5,
9422 "ap_invoice_count": 5,
9423 "ar_invoice_count": 5,
9424 "ocpm_event_count": 0,
9425 "ocpm_object_count": 0,
9426 "ocpm_case_count": 0,
9427 "audit_engagement_count": 0,
9428 "audit_workpaper_count": 0,
9429 "audit_evidence_count": 0,
9430 "audit_risk_count": 0,
9431 "audit_finding_count": 0,
9432 "audit_judgment_count": 0,
9433 "anomalies_injected": 0,
9434 "data_quality_issues": 0,
9435 "banking_customer_count": 0,
9436 "banking_account_count": 0,
9437 "banking_transaction_count": 0,
9438 "banking_suspicious_count": 0,
9439 "graph_export_count": 0,
9440 "graph_node_count": 0,
9441 "graph_edge_count": 0
9442 }"#;
9443
9444 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
9445
9446 assert_eq!(stats.llm_enrichment_ms, 0);
9448 assert_eq!(stats.llm_vendors_enriched, 0);
9449 assert_eq!(stats.diffusion_enhancement_ms, 0);
9450 assert_eq!(stats.diffusion_samples_generated, 0);
9451 assert_eq!(stats.causal_generation_ms, 0);
9452 assert_eq!(stats.causal_samples_generated, 0);
9453 assert!(stats.causal_validation_passed.is_none());
9454 }
9455}