1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33 BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42 SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
48use datasynth_fingerprint::{
49 io::FingerprintReader,
50 models::Fingerprint,
51 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
52};
53use datasynth_generators::{
54 AnomalyInjector,
56 AnomalyInjectorConfig,
57 AssetGenerator,
58 AuditEngagementGenerator,
60 BalanceTrackerConfig,
61 BankReconciliationGenerator,
63 BidEvaluationGenerator,
65 BidGenerator,
66 CatalogGenerator,
67 ChartOfAccountsGenerator,
69 ContractGenerator,
70 CustomerGenerator,
71 DataQualityConfig,
72 DataQualityInjector,
74 DataQualityStats,
75 DocumentFlowJeConfig,
77 DocumentFlowJeGenerator,
78 DocumentFlowLinker,
80 EmployeeGenerator,
81 EsgAnomalyLabel,
83 EvidenceGenerator,
84 FinancialStatementGenerator,
86 FindingGenerator,
87 JournalEntryGenerator,
88 JudgmentGenerator,
89 LatePaymentDistribution,
90 MaterialGenerator,
91 O2CDocumentChain,
92 O2CGenerator,
93 O2CGeneratorConfig,
94 O2CPaymentBehavior,
95 P2PDocumentChain,
96 P2PGenerator,
98 P2PGeneratorConfig,
99 P2PPaymentBehavior,
100 PaymentReference,
101 QualificationGenerator,
102 RfxGenerator,
103 RiskAssessmentGenerator,
104 RunningBalanceTracker,
106 ScorecardGenerator,
107 SourcingProjectGenerator,
108 SpendAnalysisGenerator,
109 ValidationError,
110 VendorGenerator,
112 WorkpaperGenerator,
113};
114use datasynth_graph::{
115 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
116 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
117};
118use datasynth_ocpm::{
119 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
120 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
121 OcpmUuidFactory, P2pDocuments, S2cDocuments,
122};
123
124use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
125use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
126use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
127use datasynth_core::llm::MockLlmProvider;
128use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
129use datasynth_core::models::documents::PaymentMethod;
130use datasynth_core::models::IndustrySector;
131use datasynth_generators::llm_enrichment::VendorLlmEnricher;
132
133fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
139 let payment_behavior = &schema_config.payment_behavior;
140 let late_dist = &payment_behavior.late_payment_days_distribution;
141
142 P2PGeneratorConfig {
143 three_way_match_rate: schema_config.three_way_match_rate,
144 partial_delivery_rate: schema_config.partial_delivery_rate,
145 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
147 max_price_variance_percent: schema_config.max_price_variance_percent,
148 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
149 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
150 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
151 payment_method_distribution: vec![
152 (PaymentMethod::BankTransfer, 0.60),
153 (PaymentMethod::Check, 0.25),
154 (PaymentMethod::Wire, 0.10),
155 (PaymentMethod::CreditCard, 0.05),
156 ],
157 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
159 late_payment_rate: payment_behavior.late_payment_rate,
160 late_payment_distribution: LatePaymentDistribution {
161 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
162 late_8_to_14: late_dist.late_8_to_14,
163 very_late_15_to_30: late_dist.very_late_15_to_30,
164 severely_late_31_to_60: late_dist.severely_late_31_to_60,
165 extremely_late_over_60: late_dist.extremely_late_over_60,
166 },
167 partial_payment_rate: payment_behavior.partial_payment_rate,
168 payment_correction_rate: payment_behavior.payment_correction_rate,
169 },
170 }
171}
172
173fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
175 let payment_behavior = &schema_config.payment_behavior;
176
177 O2CGeneratorConfig {
178 credit_check_failure_rate: schema_config.credit_check_failure_rate,
179 partial_shipment_rate: schema_config.partial_shipment_rate,
180 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
181 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
182 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
183 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
185 returns_rate: schema_config.return_rate,
186 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
187 payment_method_distribution: vec![
188 (PaymentMethod::BankTransfer, 0.50),
189 (PaymentMethod::Check, 0.30),
190 (PaymentMethod::Wire, 0.15),
191 (PaymentMethod::CreditCard, 0.05),
192 ],
193 payment_behavior: O2CPaymentBehavior {
194 partial_payment_rate: payment_behavior.partial_payments.rate,
195 short_payment_rate: payment_behavior.short_payments.rate,
196 max_short_percent: payment_behavior.short_payments.max_short_percent,
197 on_account_rate: payment_behavior.on_account_payments.rate,
198 payment_correction_rate: payment_behavior.payment_corrections.rate,
199 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
200 },
201 }
202}
203
204#[derive(Debug, Clone)]
206pub struct PhaseConfig {
207 pub generate_master_data: bool,
209 pub generate_document_flows: bool,
211 pub generate_ocpm_events: bool,
213 pub generate_journal_entries: bool,
215 pub inject_anomalies: bool,
217 pub inject_data_quality: bool,
219 pub validate_balances: bool,
221 pub show_progress: bool,
223 pub vendors_per_company: usize,
225 pub customers_per_company: usize,
227 pub materials_per_company: usize,
229 pub assets_per_company: usize,
231 pub employees_per_company: usize,
233 pub p2p_chains: usize,
235 pub o2c_chains: usize,
237 pub generate_audit: bool,
239 pub audit_engagements: usize,
241 pub workpapers_per_engagement: usize,
243 pub evidence_per_workpaper: usize,
245 pub risks_per_engagement: usize,
247 pub findings_per_engagement: usize,
249 pub judgments_per_engagement: usize,
251 pub generate_banking: bool,
253 pub generate_graph_export: bool,
255 pub generate_sourcing: bool,
257 pub generate_bank_reconciliation: bool,
259 pub generate_financial_statements: bool,
261 pub generate_accounting_standards: bool,
263 pub generate_manufacturing: bool,
265 pub generate_sales_kpi_budgets: bool,
267 pub generate_tax: bool,
269 pub generate_esg: bool,
271 pub generate_intercompany: bool,
273}
274
275impl Default for PhaseConfig {
276 fn default() -> Self {
277 Self {
278 generate_master_data: true,
279 generate_document_flows: true,
280 generate_ocpm_events: false, generate_journal_entries: true,
282 inject_anomalies: false,
283 inject_data_quality: false, validate_balances: true,
285 show_progress: true,
286 vendors_per_company: 50,
287 customers_per_company: 100,
288 materials_per_company: 200,
289 assets_per_company: 50,
290 employees_per_company: 100,
291 p2p_chains: 100,
292 o2c_chains: 100,
293 generate_audit: false, audit_engagements: 5,
295 workpapers_per_engagement: 20,
296 evidence_per_workpaper: 5,
297 risks_per_engagement: 15,
298 findings_per_engagement: 8,
299 judgments_per_engagement: 10,
300 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, }
312 }
313}
314
315#[derive(Debug, Clone, Default)]
317pub struct MasterDataSnapshot {
318 pub vendors: Vec<Vendor>,
320 pub customers: Vec<Customer>,
322 pub materials: Vec<Material>,
324 pub assets: Vec<FixedAsset>,
326 pub employees: Vec<Employee>,
328}
329
330#[derive(Debug, Clone)]
332pub struct HypergraphExportInfo {
333 pub node_count: usize,
335 pub edge_count: usize,
337 pub hyperedge_count: usize,
339 pub output_path: PathBuf,
341}
342
343#[derive(Debug, Clone, Default)]
345pub struct DocumentFlowSnapshot {
346 pub p2p_chains: Vec<P2PDocumentChain>,
348 pub o2c_chains: Vec<O2CDocumentChain>,
350 pub purchase_orders: Vec<documents::PurchaseOrder>,
352 pub goods_receipts: Vec<documents::GoodsReceipt>,
354 pub vendor_invoices: Vec<documents::VendorInvoice>,
356 pub sales_orders: Vec<documents::SalesOrder>,
358 pub deliveries: Vec<documents::Delivery>,
360 pub customer_invoices: Vec<documents::CustomerInvoice>,
362 pub payments: Vec<documents::Payment>,
364}
365
366#[derive(Debug, Clone, Default)]
368pub struct SubledgerSnapshot {
369 pub ap_invoices: Vec<APInvoice>,
371 pub ar_invoices: Vec<ARInvoice>,
373 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
375 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
377 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
379}
380
381#[derive(Debug, Clone, Default)]
383pub struct OcpmSnapshot {
384 pub event_log: Option<OcpmEventLog>,
386 pub event_count: usize,
388 pub object_count: usize,
390 pub case_count: usize,
392}
393
394#[derive(Debug, Clone, Default)]
396pub struct AuditSnapshot {
397 pub engagements: Vec<AuditEngagement>,
399 pub workpapers: Vec<Workpaper>,
401 pub evidence: Vec<AuditEvidence>,
403 pub risk_assessments: Vec<RiskAssessment>,
405 pub findings: Vec<AuditFinding>,
407 pub judgments: Vec<ProfessionalJudgment>,
409}
410
411#[derive(Debug, Clone, Default)]
413pub struct BankingSnapshot {
414 pub customers: Vec<BankingCustomer>,
416 pub accounts: Vec<BankAccount>,
418 pub transactions: Vec<BankTransaction>,
420 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
422 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
424 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
426 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
428 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
430 pub suspicious_count: usize,
432 pub scenario_count: usize,
434}
435
436#[derive(Debug, Clone, Default, Serialize)]
438pub struct GraphExportSnapshot {
439 pub exported: bool,
441 pub graph_count: usize,
443 pub exports: HashMap<String, GraphExportInfo>,
445}
446
447#[derive(Debug, Clone, Serialize)]
449pub struct GraphExportInfo {
450 pub name: String,
452 pub format: String,
454 pub output_path: PathBuf,
456 pub node_count: usize,
458 pub edge_count: usize,
460}
461
462#[derive(Debug, Clone, Default)]
464pub struct SourcingSnapshot {
465 pub spend_analyses: Vec<SpendAnalysis>,
467 pub sourcing_projects: Vec<SourcingProject>,
469 pub qualifications: Vec<SupplierQualification>,
471 pub rfx_events: Vec<RfxEvent>,
473 pub bids: Vec<SupplierBid>,
475 pub bid_evaluations: Vec<BidEvaluation>,
477 pub contracts: Vec<ProcurementContract>,
479 pub catalog_items: Vec<CatalogItem>,
481 pub scorecards: Vec<SupplierScorecard>,
483}
484
485#[derive(Debug, Clone, Serialize, Deserialize)]
487pub struct PeriodTrialBalance {
488 pub fiscal_year: u16,
490 pub fiscal_period: u8,
492 pub period_start: NaiveDate,
494 pub period_end: NaiveDate,
496 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
498}
499
500#[derive(Debug, Clone, Default)]
502pub struct FinancialReportingSnapshot {
503 pub financial_statements: Vec<FinancialStatement>,
505 pub bank_reconciliations: Vec<BankReconciliation>,
507 pub trial_balances: Vec<PeriodTrialBalance>,
509}
510
511#[derive(Debug, Clone, Default)]
513pub struct HrSnapshot {
514 pub payroll_runs: Vec<PayrollRun>,
516 pub payroll_line_items: Vec<PayrollLineItem>,
518 pub time_entries: Vec<TimeEntry>,
520 pub expense_reports: Vec<ExpenseReport>,
522 pub payroll_run_count: usize,
524 pub payroll_line_item_count: usize,
526 pub time_entry_count: usize,
528 pub expense_report_count: usize,
530}
531
532#[derive(Debug, Clone, Default)]
534pub struct AccountingStandardsSnapshot {
535 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
537 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
539 pub revenue_contract_count: usize,
541 pub impairment_test_count: usize,
543}
544
545#[derive(Debug, Clone, Default)]
547pub struct ManufacturingSnapshot {
548 pub production_orders: Vec<ProductionOrder>,
550 pub quality_inspections: Vec<QualityInspection>,
552 pub cycle_counts: Vec<CycleCount>,
554 pub production_order_count: usize,
556 pub quality_inspection_count: usize,
558 pub cycle_count_count: usize,
560}
561
562#[derive(Debug, Clone, Default)]
564pub struct SalesKpiBudgetsSnapshot {
565 pub sales_quotes: Vec<SalesQuote>,
567 pub kpis: Vec<ManagementKpi>,
569 pub budgets: Vec<Budget>,
571 pub sales_quote_count: usize,
573 pub kpi_count: usize,
575 pub budget_line_count: usize,
577}
578
579#[derive(Debug, Clone, Default)]
581pub struct AnomalyLabels {
582 pub labels: Vec<LabeledAnomaly>,
584 pub summary: Option<AnomalySummary>,
586 pub by_type: HashMap<String, usize>,
588}
589
590#[derive(Debug, Clone, Default)]
592pub struct BalanceValidationResult {
593 pub validated: bool,
595 pub is_balanced: bool,
597 pub entries_processed: u64,
599 pub total_debits: rust_decimal::Decimal,
601 pub total_credits: rust_decimal::Decimal,
603 pub accounts_tracked: usize,
605 pub companies_tracked: usize,
607 pub validation_errors: Vec<ValidationError>,
609 pub has_unbalanced_entries: bool,
611}
612
613#[derive(Debug, Clone, Default)]
615pub struct TaxSnapshot {
616 pub jurisdictions: Vec<TaxJurisdiction>,
618 pub codes: Vec<TaxCode>,
620 pub tax_lines: Vec<TaxLine>,
622 pub tax_returns: Vec<TaxReturn>,
624 pub tax_provisions: Vec<TaxProvision>,
626 pub withholding_records: Vec<WithholdingTaxRecord>,
628 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
630 pub jurisdiction_count: usize,
632 pub code_count: usize,
634}
635
636#[derive(Debug, Clone, Default, Serialize, Deserialize)]
638pub struct IntercompanySnapshot {
639 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
641 pub seller_journal_entries: Vec<JournalEntry>,
643 pub buyer_journal_entries: Vec<JournalEntry>,
645 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
647 pub matched_pair_count: usize,
649 pub elimination_entry_count: usize,
651 pub match_rate: f64,
653}
654
655#[derive(Debug, Clone, Default)]
657pub struct EsgSnapshot {
658 pub emissions: Vec<EmissionRecord>,
660 pub energy: Vec<EnergyConsumption>,
662 pub water: Vec<WaterUsage>,
664 pub waste: Vec<WasteRecord>,
666 pub diversity: Vec<WorkforceDiversityMetric>,
668 pub pay_equity: Vec<PayEquityMetric>,
670 pub safety_incidents: Vec<SafetyIncident>,
672 pub safety_metrics: Vec<SafetyMetric>,
674 pub governance: Vec<GovernanceMetric>,
676 pub supplier_assessments: Vec<SupplierEsgAssessment>,
678 pub materiality: Vec<MaterialityAssessment>,
680 pub disclosures: Vec<EsgDisclosure>,
682 pub climate_scenarios: Vec<ClimateScenario>,
684 pub anomaly_labels: Vec<EsgAnomalyLabel>,
686 pub emission_count: usize,
688 pub disclosure_count: usize,
690}
691
692#[derive(Debug, Clone, Default)]
694pub struct TreasurySnapshot {
695 pub cash_positions: Vec<CashPosition>,
697 pub cash_forecasts: Vec<CashForecast>,
699 pub cash_pools: Vec<CashPool>,
701 pub cash_pool_sweeps: Vec<CashPoolSweep>,
703 pub hedging_instruments: Vec<HedgingInstrument>,
705 pub hedge_relationships: Vec<HedgeRelationship>,
707 pub debt_instruments: Vec<DebtInstrument>,
709 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
711}
712
713#[derive(Debug, Clone, Default)]
715pub struct ProjectAccountingSnapshot {
716 pub projects: Vec<Project>,
718 pub cost_lines: Vec<ProjectCostLine>,
720 pub revenue_records: Vec<ProjectRevenue>,
722 pub earned_value_metrics: Vec<EarnedValueMetric>,
724 pub change_orders: Vec<ChangeOrder>,
726 pub milestones: Vec<ProjectMilestone>,
728}
729
730#[derive(Debug)]
732pub struct EnhancedGenerationResult {
733 pub chart_of_accounts: ChartOfAccounts,
735 pub master_data: MasterDataSnapshot,
737 pub document_flows: DocumentFlowSnapshot,
739 pub subledger: SubledgerSnapshot,
741 pub ocpm: OcpmSnapshot,
743 pub audit: AuditSnapshot,
745 pub banking: BankingSnapshot,
747 pub graph_export: GraphExportSnapshot,
749 pub sourcing: SourcingSnapshot,
751 pub financial_reporting: FinancialReportingSnapshot,
753 pub hr: HrSnapshot,
755 pub accounting_standards: AccountingStandardsSnapshot,
757 pub manufacturing: ManufacturingSnapshot,
759 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
761 pub tax: TaxSnapshot,
763 pub esg: EsgSnapshot,
765 pub treasury: TreasurySnapshot,
767 pub project_accounting: ProjectAccountingSnapshot,
769 pub intercompany: IntercompanySnapshot,
771 pub journal_entries: Vec<JournalEntry>,
773 pub anomaly_labels: AnomalyLabels,
775 pub balance_validation: BalanceValidationResult,
777 pub data_quality_stats: DataQualityStats,
779 pub statistics: EnhancedGenerationStatistics,
781 pub lineage: Option<super::lineage::LineageGraph>,
783 pub gate_result: Option<datasynth_eval::gates::GateResult>,
785 pub internal_controls: Vec<InternalControl>,
787 pub opening_balances: Vec<GeneratedOpeningBalance>,
789 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
791}
792
793#[derive(Debug, Clone, Default, Serialize, Deserialize)]
795pub struct EnhancedGenerationStatistics {
796 pub total_entries: u64,
798 pub total_line_items: u64,
800 pub accounts_count: usize,
802 pub companies_count: usize,
804 pub period_months: u32,
806 pub vendor_count: usize,
808 pub customer_count: usize,
809 pub material_count: usize,
810 pub asset_count: usize,
811 pub employee_count: usize,
812 pub p2p_chain_count: usize,
814 pub o2c_chain_count: usize,
815 pub ap_invoice_count: usize,
817 pub ar_invoice_count: usize,
818 pub ocpm_event_count: usize,
820 pub ocpm_object_count: usize,
821 pub ocpm_case_count: usize,
822 pub audit_engagement_count: usize,
824 pub audit_workpaper_count: usize,
825 pub audit_evidence_count: usize,
826 pub audit_risk_count: usize,
827 pub audit_finding_count: usize,
828 pub audit_judgment_count: usize,
829 pub anomalies_injected: usize,
831 pub data_quality_issues: usize,
833 pub banking_customer_count: usize,
835 pub banking_account_count: usize,
836 pub banking_transaction_count: usize,
837 pub banking_suspicious_count: usize,
838 pub graph_export_count: usize,
840 pub graph_node_count: usize,
841 pub graph_edge_count: usize,
842 #[serde(default)]
844 pub llm_enrichment_ms: u64,
845 #[serde(default)]
847 pub llm_vendors_enriched: usize,
848 #[serde(default)]
850 pub diffusion_enhancement_ms: u64,
851 #[serde(default)]
853 pub diffusion_samples_generated: usize,
854 #[serde(default)]
856 pub causal_generation_ms: u64,
857 #[serde(default)]
859 pub causal_samples_generated: usize,
860 #[serde(default)]
862 pub causal_validation_passed: Option<bool>,
863 #[serde(default)]
865 pub sourcing_project_count: usize,
866 #[serde(default)]
867 pub rfx_event_count: usize,
868 #[serde(default)]
869 pub bid_count: usize,
870 #[serde(default)]
871 pub contract_count: usize,
872 #[serde(default)]
873 pub catalog_item_count: usize,
874 #[serde(default)]
875 pub scorecard_count: usize,
876 #[serde(default)]
878 pub financial_statement_count: usize,
879 #[serde(default)]
880 pub bank_reconciliation_count: usize,
881 #[serde(default)]
883 pub payroll_run_count: usize,
884 #[serde(default)]
885 pub time_entry_count: usize,
886 #[serde(default)]
887 pub expense_report_count: usize,
888 #[serde(default)]
890 pub revenue_contract_count: usize,
891 #[serde(default)]
892 pub impairment_test_count: usize,
893 #[serde(default)]
895 pub production_order_count: usize,
896 #[serde(default)]
897 pub quality_inspection_count: usize,
898 #[serde(default)]
899 pub cycle_count_count: usize,
900 #[serde(default)]
902 pub sales_quote_count: usize,
903 #[serde(default)]
904 pub kpi_count: usize,
905 #[serde(default)]
906 pub budget_line_count: usize,
907 #[serde(default)]
909 pub tax_jurisdiction_count: usize,
910 #[serde(default)]
911 pub tax_code_count: usize,
912 #[serde(default)]
914 pub esg_emission_count: usize,
915 #[serde(default)]
916 pub esg_disclosure_count: usize,
917 #[serde(default)]
919 pub ic_matched_pair_count: usize,
920 #[serde(default)]
921 pub ic_elimination_count: usize,
922 #[serde(default)]
924 pub ic_transaction_count: usize,
925 #[serde(default)]
927 pub fa_subledger_count: usize,
928 #[serde(default)]
930 pub inventory_subledger_count: usize,
931 #[serde(default)]
933 pub treasury_debt_instrument_count: usize,
934 #[serde(default)]
936 pub treasury_hedging_instrument_count: usize,
937 #[serde(default)]
939 pub project_count: usize,
940 #[serde(default)]
942 pub project_change_order_count: usize,
943 #[serde(default)]
945 pub tax_provision_count: usize,
946 #[serde(default)]
948 pub opening_balance_count: usize,
949 #[serde(default)]
951 pub subledger_reconciliation_count: usize,
952 #[serde(default)]
954 pub tax_line_count: usize,
955 #[serde(default)]
957 pub project_cost_line_count: usize,
958 #[serde(default)]
960 pub cash_position_count: usize,
961}
962
963pub struct EnhancedOrchestrator {
965 config: GeneratorConfig,
966 phase_config: PhaseConfig,
967 coa: Option<Arc<ChartOfAccounts>>,
968 master_data: MasterDataSnapshot,
969 seed: u64,
970 multi_progress: Option<MultiProgress>,
971 resource_guard: ResourceGuard,
973 output_path: Option<PathBuf>,
975 copula_generators: Vec<CopulaGeneratorSpec>,
977 country_pack_registry: datasynth_core::CountryPackRegistry,
979}
980
981impl EnhancedOrchestrator {
982 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
984 datasynth_config::validate_config(&config)?;
985
986 let seed = config.global.seed.unwrap_or_else(rand::random);
987
988 let resource_guard = Self::build_resource_guard(&config, None);
990
991 let country_pack_registry = match &config.country_packs {
993 Some(cp) => {
994 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
995 .map_err(|e| SynthError::config(e.to_string()))?
996 }
997 None => datasynth_core::CountryPackRegistry::builtin_only()
998 .map_err(|e| SynthError::config(e.to_string()))?,
999 };
1000
1001 Ok(Self {
1002 config,
1003 phase_config,
1004 coa: None,
1005 master_data: MasterDataSnapshot::default(),
1006 seed,
1007 multi_progress: None,
1008 resource_guard,
1009 output_path: None,
1010 copula_generators: Vec::new(),
1011 country_pack_registry,
1012 })
1013 }
1014
1015 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1017 Self::new(config, PhaseConfig::default())
1018 }
1019
1020 pub fn with_progress(mut self, show: bool) -> Self {
1022 self.phase_config.show_progress = show;
1023 if show {
1024 self.multi_progress = Some(MultiProgress::new());
1025 }
1026 self
1027 }
1028
1029 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1031 let path = path.into();
1032 self.output_path = Some(path.clone());
1033 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1035 self
1036 }
1037
1038 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1040 &self.country_pack_registry
1041 }
1042
1043 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1045 self.country_pack_registry.get_by_str(country)
1046 }
1047
1048 fn primary_country_code(&self) -> &str {
1051 self.config
1052 .companies
1053 .first()
1054 .map(|c| c.country.as_str())
1055 .unwrap_or("US")
1056 }
1057
1058 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1060 self.country_pack_for(self.primary_country_code())
1061 }
1062
1063 pub fn has_copulas(&self) -> bool {
1068 !self.copula_generators.is_empty()
1069 }
1070
1071 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1077 &self.copula_generators
1078 }
1079
1080 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1084 &mut self.copula_generators
1085 }
1086
1087 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1091 self.copula_generators
1092 .iter_mut()
1093 .find(|c| c.name == copula_name)
1094 .map(|c| c.generator.sample())
1095 }
1096
1097 pub fn from_fingerprint(
1120 fingerprint_path: &std::path::Path,
1121 phase_config: PhaseConfig,
1122 scale: f64,
1123 ) -> SynthResult<Self> {
1124 info!("Loading fingerprint from: {}", fingerprint_path.display());
1125
1126 let reader = FingerprintReader::new();
1128 let fingerprint = reader
1129 .read_from_file(fingerprint_path)
1130 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1131
1132 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1133 }
1134
1135 pub fn from_fingerprint_data(
1142 fingerprint: Fingerprint,
1143 phase_config: PhaseConfig,
1144 scale: f64,
1145 ) -> SynthResult<Self> {
1146 info!(
1147 "Synthesizing config from fingerprint (version: {}, tables: {})",
1148 fingerprint.manifest.version,
1149 fingerprint.schema.tables.len()
1150 );
1151
1152 let seed: u64 = rand::random();
1154
1155 let options = SynthesisOptions {
1157 scale,
1158 seed: Some(seed),
1159 preserve_correlations: true,
1160 inject_anomalies: true,
1161 };
1162 let synthesizer = ConfigSynthesizer::with_options(options);
1163
1164 let synthesis_result = synthesizer
1166 .synthesize_full(&fingerprint, seed)
1167 .map_err(|e| {
1168 SynthError::config(format!(
1169 "Failed to synthesize config from fingerprint: {}",
1170 e
1171 ))
1172 })?;
1173
1174 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1176 Self::base_config_for_industry(industry)
1177 } else {
1178 Self::base_config_for_industry("manufacturing")
1179 };
1180
1181 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1183
1184 info!(
1186 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1187 fingerprint.schema.tables.len(),
1188 scale,
1189 synthesis_result.copula_generators.len()
1190 );
1191
1192 if !synthesis_result.copula_generators.is_empty() {
1193 for spec in &synthesis_result.copula_generators {
1194 info!(
1195 " Copula '{}' for table '{}': {} columns",
1196 spec.name,
1197 spec.table,
1198 spec.columns.len()
1199 );
1200 }
1201 }
1202
1203 let mut orchestrator = Self::new(config, phase_config)?;
1205
1206 orchestrator.copula_generators = synthesis_result.copula_generators;
1208
1209 Ok(orchestrator)
1210 }
1211
1212 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1214 use datasynth_config::presets::create_preset;
1215 use datasynth_config::TransactionVolume;
1216 use datasynth_core::models::{CoAComplexity, IndustrySector};
1217
1218 let sector = match industry.to_lowercase().as_str() {
1219 "manufacturing" => IndustrySector::Manufacturing,
1220 "retail" => IndustrySector::Retail,
1221 "financial" | "financial_services" => IndustrySector::FinancialServices,
1222 "healthcare" => IndustrySector::Healthcare,
1223 "technology" | "tech" => IndustrySector::Technology,
1224 _ => IndustrySector::Manufacturing,
1225 };
1226
1227 create_preset(
1229 sector,
1230 1, 12, CoAComplexity::Medium,
1233 TransactionVolume::TenK,
1234 )
1235 }
1236
1237 fn apply_config_patch(
1239 mut config: GeneratorConfig,
1240 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1241 ) -> GeneratorConfig {
1242 use datasynth_fingerprint::synthesis::ConfigValue;
1243
1244 for (key, value) in patch.values() {
1245 match (key.as_str(), value) {
1246 ("transactions.count", ConfigValue::Integer(n)) => {
1249 info!(
1250 "Fingerprint suggests {} transactions (apply via company volumes)",
1251 n
1252 );
1253 }
1254 ("global.period_months", ConfigValue::Integer(n)) => {
1255 config.global.period_months = *n as u32;
1256 }
1257 ("global.start_date", ConfigValue::String(s)) => {
1258 config.global.start_date = s.clone();
1259 }
1260 ("global.seed", ConfigValue::Integer(n)) => {
1261 config.global.seed = Some(*n as u64);
1262 }
1263 ("fraud.enabled", ConfigValue::Bool(b)) => {
1264 config.fraud.enabled = *b;
1265 }
1266 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1267 config.fraud.fraud_rate = *f;
1268 }
1269 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1270 config.data_quality.enabled = *b;
1271 }
1272 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1274 config.fraud.enabled = *b;
1275 }
1276 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1277 config.fraud.fraud_rate = *f;
1278 }
1279 _ => {
1280 debug!("Ignoring unknown config patch key: {}", key);
1281 }
1282 }
1283 }
1284
1285 config
1286 }
1287
1288 fn build_resource_guard(
1290 config: &GeneratorConfig,
1291 output_path: Option<PathBuf>,
1292 ) -> ResourceGuard {
1293 let mut builder = ResourceGuardBuilder::new();
1294
1295 if config.global.memory_limit_mb > 0 {
1297 builder = builder.memory_limit(config.global.memory_limit_mb);
1298 }
1299
1300 if let Some(path) = output_path {
1302 builder = builder.output_path(path).min_free_disk(100); }
1304
1305 builder = builder.conservative();
1307
1308 builder.build()
1309 }
1310
1311 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1316 self.resource_guard.check()
1317 }
1318
1319 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1321 let level = self.resource_guard.check()?;
1322
1323 if level != DegradationLevel::Normal {
1324 warn!(
1325 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1326 phase,
1327 level,
1328 self.resource_guard.current_memory_mb(),
1329 self.resource_guard.available_disk_mb()
1330 );
1331 }
1332
1333 Ok(level)
1334 }
1335
1336 fn get_degradation_actions(&self) -> DegradationActions {
1338 self.resource_guard.get_actions()
1339 }
1340
1341 fn check_memory_limit(&self) -> SynthResult<()> {
1343 self.check_resources()?;
1344 Ok(())
1345 }
1346
1347 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1349 info!("Starting enhanced generation workflow");
1350 info!(
1351 "Config: industry={:?}, period_months={}, companies={}",
1352 self.config.global.industry,
1353 self.config.global.period_months,
1354 self.config.companies.len()
1355 );
1356
1357 let initial_level = self.check_resources_with_log("initial")?;
1359 if initial_level == DegradationLevel::Emergency {
1360 return Err(SynthError::resource(
1361 "Insufficient resources to start generation",
1362 ));
1363 }
1364
1365 let mut stats = EnhancedGenerationStatistics {
1366 companies_count: self.config.companies.len(),
1367 period_months: self.config.global.period_months,
1368 ..Default::default()
1369 };
1370
1371 let coa = self.phase_chart_of_accounts(&mut stats)?;
1373
1374 self.phase_master_data(&mut stats)?;
1376
1377 let (mut document_flows, subledger, fa_journal_entries) =
1379 self.phase_document_flows(&mut stats)?;
1380
1381 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1383
1384 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1392
1393 if !fa_journal_entries.is_empty() {
1395 debug!(
1396 "Appending {} FA acquisition JEs to main entries",
1397 fa_journal_entries.len()
1398 );
1399 entries.extend(fa_journal_entries);
1400 }
1401
1402 let actions = self.get_degradation_actions();
1404
1405 let sourcing = self.phase_sourcing_data(&mut stats)?;
1407
1408 if !sourcing.contracts.is_empty() {
1410 let mut linked_count = 0usize;
1411 for chain in &mut document_flows.p2p_chains {
1412 if chain.purchase_order.contract_id.is_none() {
1413 if let Some(contract) = sourcing
1414 .contracts
1415 .iter()
1416 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1417 {
1418 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1419 linked_count += 1;
1420 }
1421 }
1422 }
1423 if linked_count > 0 {
1424 debug!(
1425 "Linked {} purchase orders to S2C contracts by vendor match",
1426 linked_count
1427 );
1428 }
1429 }
1430
1431 let intercompany = self.phase_intercompany(&mut stats)?;
1433
1434 if !intercompany.seller_journal_entries.is_empty()
1436 || !intercompany.buyer_journal_entries.is_empty()
1437 {
1438 let ic_je_count = intercompany.seller_journal_entries.len()
1439 + intercompany.buyer_journal_entries.len();
1440 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1441 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1442 debug!(
1443 "Appended {} IC journal entries to main entries",
1444 ic_je_count
1445 );
1446 }
1447
1448 let hr = self.phase_hr_data(&mut stats)?;
1450
1451 if !hr.payroll_runs.is_empty() {
1453 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1454 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1455 entries.extend(payroll_jes);
1456 }
1457
1458 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1460
1461 if !manufacturing_snap.production_orders.is_empty() {
1463 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1464 debug!("Generated {} JEs from production orders", mfg_jes.len());
1465 entries.extend(mfg_jes);
1466 }
1467
1468 if !entries.is_empty() {
1471 stats.total_entries = entries.len() as u64;
1472 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1473 debug!(
1474 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1475 stats.total_entries, stats.total_line_items
1476 );
1477 }
1478
1479 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1481
1482 let balance_validation = self.phase_balance_validation(&entries)?;
1484
1485 let subledger_reconciliation =
1487 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1488
1489 let data_quality_stats =
1491 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1492
1493 let audit = self.phase_audit_data(&entries, &mut stats)?;
1495
1496 let banking = self.phase_banking_data(&mut stats)?;
1498
1499 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1501
1502 self.phase_llm_enrichment(&mut stats);
1504
1505 self.phase_diffusion_enhancement(&mut stats);
1507
1508 self.phase_causal_overlay(&mut stats);
1510
1511 let financial_reporting =
1513 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1514
1515 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1517
1518 let ocpm = self.phase_ocpm_events(
1520 &document_flows,
1521 &sourcing,
1522 &hr,
1523 &manufacturing_snap,
1524 &banking,
1525 &audit,
1526 &financial_reporting,
1527 &mut stats,
1528 )?;
1529
1530 let sales_kpi_budgets =
1532 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1533
1534 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1536
1537 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1539
1540 let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1542
1543 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1545
1546 self.phase_hypergraph_export(
1548 &coa,
1549 &entries,
1550 &document_flows,
1551 &sourcing,
1552 &hr,
1553 &manufacturing_snap,
1554 &banking,
1555 &audit,
1556 &financial_reporting,
1557 &ocpm,
1558 &mut stats,
1559 )?;
1560
1561 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1564 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1565 }
1566
1567 if self.config.streaming.enabled {
1569 info!("Note: streaming config is enabled but batch mode does not use it");
1570 }
1571 if self.config.vendor_network.enabled {
1572 debug!("Vendor network config available; relationship graph generation is partial");
1573 }
1574 if self.config.customer_segmentation.enabled {
1575 debug!("Customer segmentation config available; segment-aware generation is partial");
1576 }
1577
1578 let resource_stats = self.resource_guard.stats();
1580 info!(
1581 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1582 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1583 resource_stats.disk.estimated_bytes_written,
1584 resource_stats.degradation_level
1585 );
1586
1587 let lineage = self.build_lineage_graph();
1589
1590 let gate_result = if self.config.quality_gates.enabled {
1592 let profile_name = &self.config.quality_gates.profile;
1593 match datasynth_eval::gates::get_profile(profile_name) {
1594 Some(profile) => {
1595 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1597
1598 if balance_validation.validated {
1600 eval.coherence.balance =
1601 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1602 equation_balanced: balance_validation.is_balanced,
1603 max_imbalance: (balance_validation.total_debits
1604 - balance_validation.total_credits)
1605 .abs(),
1606 periods_evaluated: 1,
1607 periods_imbalanced: if balance_validation.is_balanced {
1608 0
1609 } else {
1610 1
1611 },
1612 period_results: Vec::new(),
1613 companies_evaluated: self.config.companies.len(),
1614 });
1615 }
1616
1617 eval.coherence.passes = balance_validation.is_balanced;
1619 if !balance_validation.is_balanced {
1620 eval.coherence
1621 .failures
1622 .push("Balance sheet equation not satisfied".to_string());
1623 }
1624
1625 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1627 eval.statistical.passes = !entries.is_empty();
1628
1629 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1632
1633 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1634 info!(
1635 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1636 profile_name, result.gates_passed, result.gates_total, result.summary
1637 );
1638 Some(result)
1639 }
1640 None => {
1641 warn!(
1642 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1643 profile_name
1644 );
1645 None
1646 }
1647 }
1648 } else {
1649 None
1650 };
1651
1652 let internal_controls = if self.config.internal_controls.enabled {
1654 InternalControl::standard_controls()
1655 } else {
1656 Vec::new()
1657 };
1658
1659 Ok(EnhancedGenerationResult {
1660 chart_of_accounts: (*coa).clone(),
1661 master_data: self.master_data.clone(),
1662 document_flows,
1663 subledger,
1664 ocpm,
1665 audit,
1666 banking,
1667 graph_export,
1668 sourcing,
1669 financial_reporting,
1670 hr,
1671 accounting_standards,
1672 manufacturing: manufacturing_snap,
1673 sales_kpi_budgets,
1674 tax,
1675 esg: esg_snap,
1676 treasury,
1677 project_accounting,
1678 intercompany,
1679 journal_entries: entries,
1680 anomaly_labels,
1681 balance_validation,
1682 data_quality_stats,
1683 statistics: stats,
1684 lineage: Some(lineage),
1685 gate_result,
1686 internal_controls,
1687 opening_balances,
1688 subledger_reconciliation,
1689 })
1690 }
1691
1692 fn phase_chart_of_accounts(
1698 &mut self,
1699 stats: &mut EnhancedGenerationStatistics,
1700 ) -> SynthResult<Arc<ChartOfAccounts>> {
1701 info!("Phase 1: Generating Chart of Accounts");
1702 let coa = self.generate_coa()?;
1703 stats.accounts_count = coa.account_count();
1704 info!(
1705 "Chart of Accounts generated: {} accounts",
1706 stats.accounts_count
1707 );
1708 self.check_resources_with_log("post-coa")?;
1709 Ok(coa)
1710 }
1711
1712 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1714 if self.phase_config.generate_master_data {
1715 info!("Phase 2: Generating Master Data");
1716 self.generate_master_data()?;
1717 stats.vendor_count = self.master_data.vendors.len();
1718 stats.customer_count = self.master_data.customers.len();
1719 stats.material_count = self.master_data.materials.len();
1720 stats.asset_count = self.master_data.assets.len();
1721 stats.employee_count = self.master_data.employees.len();
1722 info!(
1723 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1724 stats.vendor_count, stats.customer_count, stats.material_count,
1725 stats.asset_count, stats.employee_count
1726 );
1727 self.check_resources_with_log("post-master-data")?;
1728 } else {
1729 debug!("Phase 2: Skipped (master data generation disabled)");
1730 }
1731 Ok(())
1732 }
1733
1734 fn phase_document_flows(
1736 &mut self,
1737 stats: &mut EnhancedGenerationStatistics,
1738 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1739 let mut document_flows = DocumentFlowSnapshot::default();
1740 let mut subledger = SubledgerSnapshot::default();
1741
1742 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1743 info!("Phase 3: Generating Document Flows");
1744 self.generate_document_flows(&mut document_flows)?;
1745 stats.p2p_chain_count = document_flows.p2p_chains.len();
1746 stats.o2c_chain_count = document_flows.o2c_chains.len();
1747 info!(
1748 "Document flows generated: {} P2P chains, {} O2C chains",
1749 stats.p2p_chain_count, stats.o2c_chain_count
1750 );
1751
1752 debug!("Phase 3b: Linking document flows to subledgers");
1754 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1755 stats.ap_invoice_count = subledger.ap_invoices.len();
1756 stats.ar_invoice_count = subledger.ar_invoices.len();
1757 debug!(
1758 "Subledgers linked: {} AP invoices, {} AR invoices",
1759 stats.ap_invoice_count, stats.ar_invoice_count
1760 );
1761
1762 self.check_resources_with_log("post-document-flows")?;
1763 } else {
1764 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1765 }
1766
1767 let mut fa_journal_entries = Vec::new();
1769 if !self.master_data.assets.is_empty() {
1770 debug!("Generating FA subledger records");
1771 let company_code = self
1772 .config
1773 .companies
1774 .first()
1775 .map(|c| c.code.as_str())
1776 .unwrap_or("1000");
1777 let currency = self
1778 .config
1779 .companies
1780 .first()
1781 .map(|c| c.currency.as_str())
1782 .unwrap_or("USD");
1783
1784 let mut fa_gen = datasynth_generators::FAGenerator::new(
1785 datasynth_generators::FAGeneratorConfig::default(),
1786 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1787 );
1788
1789 for asset in &self.master_data.assets {
1790 let (record, je) = fa_gen.generate_asset_acquisition(
1791 company_code,
1792 &format!("{:?}", asset.asset_class),
1793 &asset.description,
1794 asset.acquisition_date,
1795 currency,
1796 asset.cost_center.as_deref(),
1797 );
1798 subledger.fa_records.push(record);
1799 fa_journal_entries.push(je);
1800 }
1801
1802 stats.fa_subledger_count = subledger.fa_records.len();
1803 debug!(
1804 "FA subledger records generated: {} (with {} acquisition JEs)",
1805 stats.fa_subledger_count,
1806 fa_journal_entries.len()
1807 );
1808 }
1809
1810 if !self.master_data.materials.is_empty() {
1812 debug!("Generating Inventory subledger records");
1813 let first_company = self.config.companies.first();
1814 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1815 let inv_currency = first_company
1816 .map(|c| c.currency.clone())
1817 .unwrap_or_else(|| "USD".to_string());
1818
1819 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1820 datasynth_generators::InventoryGeneratorConfig::default(),
1821 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1822 inv_currency.clone(),
1823 );
1824
1825 for (i, material) in self.master_data.materials.iter().enumerate() {
1826 let plant = format!("PLANT{:02}", (i % 3) + 1);
1827 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1828 let initial_qty = rust_decimal::Decimal::from(
1829 material
1830 .safety_stock
1831 .to_string()
1832 .parse::<i64>()
1833 .unwrap_or(100),
1834 );
1835
1836 let position = inv_gen.generate_position(
1837 company_code,
1838 &plant,
1839 &storage_loc,
1840 &material.material_id,
1841 &material.description,
1842 initial_qty,
1843 Some(material.standard_cost),
1844 &inv_currency,
1845 );
1846 subledger.inventory_positions.push(position);
1847 }
1848
1849 stats.inventory_subledger_count = subledger.inventory_positions.len();
1850 debug!(
1851 "Inventory subledger records generated: {}",
1852 stats.inventory_subledger_count
1853 );
1854 }
1855
1856 Ok((document_flows, subledger, fa_journal_entries))
1857 }
1858
1859 #[allow(clippy::too_many_arguments)]
1861 fn phase_ocpm_events(
1862 &mut self,
1863 document_flows: &DocumentFlowSnapshot,
1864 sourcing: &SourcingSnapshot,
1865 hr: &HrSnapshot,
1866 manufacturing: &ManufacturingSnapshot,
1867 banking: &BankingSnapshot,
1868 audit: &AuditSnapshot,
1869 financial_reporting: &FinancialReportingSnapshot,
1870 stats: &mut EnhancedGenerationStatistics,
1871 ) -> SynthResult<OcpmSnapshot> {
1872 if self.phase_config.generate_ocpm_events {
1873 info!("Phase 3c: Generating OCPM Events");
1874 let ocpm_snapshot = self.generate_ocpm_events(
1875 document_flows,
1876 sourcing,
1877 hr,
1878 manufacturing,
1879 banking,
1880 audit,
1881 financial_reporting,
1882 )?;
1883 stats.ocpm_event_count = ocpm_snapshot.event_count;
1884 stats.ocpm_object_count = ocpm_snapshot.object_count;
1885 stats.ocpm_case_count = ocpm_snapshot.case_count;
1886 info!(
1887 "OCPM events generated: {} events, {} objects, {} cases",
1888 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1889 );
1890 self.check_resources_with_log("post-ocpm")?;
1891 Ok(ocpm_snapshot)
1892 } else {
1893 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1894 Ok(OcpmSnapshot::default())
1895 }
1896 }
1897
1898 fn phase_journal_entries(
1900 &mut self,
1901 coa: &Arc<ChartOfAccounts>,
1902 document_flows: &DocumentFlowSnapshot,
1903 _stats: &mut EnhancedGenerationStatistics,
1904 ) -> SynthResult<Vec<JournalEntry>> {
1905 let mut entries = Vec::new();
1906
1907 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1909 debug!("Phase 4a: Generating JEs from document flows");
1910 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1911 debug!("Generated {} JEs from document flows", flow_entries.len());
1912 entries.extend(flow_entries);
1913 }
1914
1915 if self.phase_config.generate_journal_entries {
1917 info!("Phase 4: Generating Journal Entries");
1918 let je_entries = self.generate_journal_entries(coa)?;
1919 info!("Generated {} standalone journal entries", je_entries.len());
1920 entries.extend(je_entries);
1921 } else {
1922 debug!("Phase 4: Skipped (journal entry generation disabled)");
1923 }
1924
1925 if !entries.is_empty() {
1926 self.check_resources_with_log("post-journal-entries")?;
1929 }
1930
1931 Ok(entries)
1932 }
1933
1934 fn phase_anomaly_injection(
1936 &mut self,
1937 entries: &mut [JournalEntry],
1938 actions: &DegradationActions,
1939 stats: &mut EnhancedGenerationStatistics,
1940 ) -> SynthResult<AnomalyLabels> {
1941 if self.phase_config.inject_anomalies
1942 && !entries.is_empty()
1943 && !actions.skip_anomaly_injection
1944 {
1945 info!("Phase 5: Injecting Anomalies");
1946 let result = self.inject_anomalies(entries)?;
1947 stats.anomalies_injected = result.labels.len();
1948 info!("Injected {} anomalies", stats.anomalies_injected);
1949 self.check_resources_with_log("post-anomaly-injection")?;
1950 Ok(result)
1951 } else if actions.skip_anomaly_injection {
1952 warn!("Phase 5: Skipped due to resource degradation");
1953 Ok(AnomalyLabels::default())
1954 } else {
1955 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1956 Ok(AnomalyLabels::default())
1957 }
1958 }
1959
1960 fn phase_balance_validation(
1962 &mut self,
1963 entries: &[JournalEntry],
1964 ) -> SynthResult<BalanceValidationResult> {
1965 if self.phase_config.validate_balances && !entries.is_empty() {
1966 debug!("Phase 6: Validating Balances");
1967 let balance_validation = self.validate_journal_entries(entries)?;
1968 if balance_validation.is_balanced {
1969 debug!("Balance validation passed");
1970 } else {
1971 warn!(
1972 "Balance validation found {} errors",
1973 balance_validation.validation_errors.len()
1974 );
1975 }
1976 Ok(balance_validation)
1977 } else {
1978 Ok(BalanceValidationResult::default())
1979 }
1980 }
1981
1982 fn phase_data_quality_injection(
1984 &mut self,
1985 entries: &mut [JournalEntry],
1986 actions: &DegradationActions,
1987 stats: &mut EnhancedGenerationStatistics,
1988 ) -> SynthResult<DataQualityStats> {
1989 if self.phase_config.inject_data_quality
1990 && !entries.is_empty()
1991 && !actions.skip_data_quality
1992 {
1993 info!("Phase 7: Injecting Data Quality Variations");
1994 let dq_stats = self.inject_data_quality(entries)?;
1995 stats.data_quality_issues = dq_stats.records_with_issues;
1996 info!("Injected {} data quality issues", stats.data_quality_issues);
1997 self.check_resources_with_log("post-data-quality")?;
1998 Ok(dq_stats)
1999 } else if actions.skip_data_quality {
2000 warn!("Phase 7: Skipped due to resource degradation");
2001 Ok(DataQualityStats::default())
2002 } else {
2003 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2004 Ok(DataQualityStats::default())
2005 }
2006 }
2007
2008 fn phase_audit_data(
2010 &mut self,
2011 entries: &[JournalEntry],
2012 stats: &mut EnhancedGenerationStatistics,
2013 ) -> SynthResult<AuditSnapshot> {
2014 if self.phase_config.generate_audit {
2015 info!("Phase 8: Generating Audit Data");
2016 let audit_snapshot = self.generate_audit_data(entries)?;
2017 stats.audit_engagement_count = audit_snapshot.engagements.len();
2018 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2019 stats.audit_evidence_count = audit_snapshot.evidence.len();
2020 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2021 stats.audit_finding_count = audit_snapshot.findings.len();
2022 stats.audit_judgment_count = audit_snapshot.judgments.len();
2023 info!(
2024 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2025 stats.audit_engagement_count, stats.audit_workpaper_count,
2026 stats.audit_evidence_count, stats.audit_risk_count,
2027 stats.audit_finding_count, stats.audit_judgment_count
2028 );
2029 self.check_resources_with_log("post-audit")?;
2030 Ok(audit_snapshot)
2031 } else {
2032 debug!("Phase 8: Skipped (audit generation disabled)");
2033 Ok(AuditSnapshot::default())
2034 }
2035 }
2036
2037 fn phase_banking_data(
2039 &mut self,
2040 stats: &mut EnhancedGenerationStatistics,
2041 ) -> SynthResult<BankingSnapshot> {
2042 if self.phase_config.generate_banking && self.config.banking.enabled {
2043 info!("Phase 9: Generating Banking KYC/AML Data");
2044 let banking_snapshot = self.generate_banking_data()?;
2045 stats.banking_customer_count = banking_snapshot.customers.len();
2046 stats.banking_account_count = banking_snapshot.accounts.len();
2047 stats.banking_transaction_count = banking_snapshot.transactions.len();
2048 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2049 info!(
2050 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2051 stats.banking_customer_count, stats.banking_account_count,
2052 stats.banking_transaction_count, stats.banking_suspicious_count
2053 );
2054 self.check_resources_with_log("post-banking")?;
2055 Ok(banking_snapshot)
2056 } else {
2057 debug!("Phase 9: Skipped (banking generation disabled)");
2058 Ok(BankingSnapshot::default())
2059 }
2060 }
2061
2062 fn phase_graph_export(
2064 &mut self,
2065 entries: &[JournalEntry],
2066 coa: &Arc<ChartOfAccounts>,
2067 stats: &mut EnhancedGenerationStatistics,
2068 ) -> SynthResult<GraphExportSnapshot> {
2069 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2070 && !entries.is_empty()
2071 {
2072 info!("Phase 10: Exporting Accounting Network Graphs");
2073 match self.export_graphs(entries, coa, stats) {
2074 Ok(snapshot) => {
2075 info!(
2076 "Graph export complete: {} graphs ({} nodes, {} edges)",
2077 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2078 );
2079 Ok(snapshot)
2080 }
2081 Err(e) => {
2082 warn!("Phase 10: Graph export failed: {}", e);
2083 Ok(GraphExportSnapshot::default())
2084 }
2085 }
2086 } else {
2087 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2088 Ok(GraphExportSnapshot::default())
2089 }
2090 }
2091
2092 #[allow(clippy::too_many_arguments)]
2094 fn phase_hypergraph_export(
2095 &self,
2096 coa: &Arc<ChartOfAccounts>,
2097 entries: &[JournalEntry],
2098 document_flows: &DocumentFlowSnapshot,
2099 sourcing: &SourcingSnapshot,
2100 hr: &HrSnapshot,
2101 manufacturing: &ManufacturingSnapshot,
2102 banking: &BankingSnapshot,
2103 audit: &AuditSnapshot,
2104 financial_reporting: &FinancialReportingSnapshot,
2105 ocpm: &OcpmSnapshot,
2106 stats: &mut EnhancedGenerationStatistics,
2107 ) -> SynthResult<()> {
2108 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2109 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2110 match self.export_hypergraph(
2111 coa,
2112 entries,
2113 document_flows,
2114 sourcing,
2115 hr,
2116 manufacturing,
2117 banking,
2118 audit,
2119 financial_reporting,
2120 ocpm,
2121 stats,
2122 ) {
2123 Ok(info) => {
2124 info!(
2125 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2126 info.node_count, info.edge_count, info.hyperedge_count
2127 );
2128 }
2129 Err(e) => {
2130 warn!("Phase 10b: Hypergraph export failed: {}", e);
2131 }
2132 }
2133 } else {
2134 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2135 }
2136 Ok(())
2137 }
2138
2139 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2145 if !self.config.llm.enabled {
2146 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2147 return;
2148 }
2149
2150 info!("Phase 11: Starting LLM Enrichment");
2151 let start = std::time::Instant::now();
2152
2153 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2154 let provider = Arc::new(MockLlmProvider::new(self.seed));
2155 let enricher = VendorLlmEnricher::new(provider);
2156
2157 let industry = format!("{:?}", self.config.global.industry);
2158 let max_enrichments = self
2159 .config
2160 .llm
2161 .max_vendor_enrichments
2162 .min(self.master_data.vendors.len());
2163
2164 let mut enriched_count = 0usize;
2165 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2166 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2167 Ok(name) => {
2168 vendor.name = name;
2169 enriched_count += 1;
2170 }
2171 Err(e) => {
2172 warn!(
2173 "LLM vendor enrichment failed for {}: {}",
2174 vendor.vendor_id, e
2175 );
2176 }
2177 }
2178 }
2179
2180 enriched_count
2181 }));
2182
2183 match result {
2184 Ok(enriched_count) => {
2185 stats.llm_vendors_enriched = enriched_count;
2186 let elapsed = start.elapsed();
2187 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2188 info!(
2189 "Phase 11 complete: {} vendors enriched in {}ms",
2190 enriched_count, stats.llm_enrichment_ms
2191 );
2192 }
2193 Err(_) => {
2194 let elapsed = start.elapsed();
2195 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2196 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2197 }
2198 }
2199 }
2200
2201 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2207 if !self.config.diffusion.enabled {
2208 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2209 return;
2210 }
2211
2212 info!("Phase 12: Starting Diffusion Enhancement");
2213 let start = std::time::Instant::now();
2214
2215 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2216 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2219
2220 let diffusion_config = DiffusionConfig {
2221 n_steps: self.config.diffusion.n_steps,
2222 seed: self.seed,
2223 ..Default::default()
2224 };
2225
2226 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2227
2228 let n_samples = self.config.diffusion.sample_size;
2229 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2231
2232 samples.len()
2233 }));
2234
2235 match result {
2236 Ok(sample_count) => {
2237 stats.diffusion_samples_generated = sample_count;
2238 let elapsed = start.elapsed();
2239 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2240 info!(
2241 "Phase 12 complete: {} diffusion samples generated in {}ms",
2242 sample_count, stats.diffusion_enhancement_ms
2243 );
2244 }
2245 Err(_) => {
2246 let elapsed = start.elapsed();
2247 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2248 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2249 }
2250 }
2251 }
2252
2253 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2260 if !self.config.causal.enabled {
2261 debug!("Phase 13: Skipped (causal generation disabled)");
2262 return;
2263 }
2264
2265 info!("Phase 13: Starting Causal Overlay");
2266 let start = std::time::Instant::now();
2267
2268 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2269 let graph = match self.config.causal.template.as_str() {
2271 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2272 _ => CausalGraph::fraud_detection_template(),
2273 };
2274
2275 let scm = StructuralCausalModel::new(graph.clone())
2276 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2277
2278 let n_samples = self.config.causal.sample_size;
2279 let samples = scm
2280 .generate(n_samples, self.seed)
2281 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2282
2283 let validation_passed = if self.config.causal.validate {
2285 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2286 if report.valid {
2287 info!(
2288 "Causal validation passed: all {} checks OK",
2289 report.checks.len()
2290 );
2291 } else {
2292 warn!(
2293 "Causal validation: {} violations detected: {:?}",
2294 report.violations.len(),
2295 report.violations
2296 );
2297 }
2298 Some(report.valid)
2299 } else {
2300 None
2301 };
2302
2303 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2304 }));
2305
2306 match result {
2307 Ok(Ok((sample_count, validation_passed))) => {
2308 stats.causal_samples_generated = sample_count;
2309 stats.causal_validation_passed = validation_passed;
2310 let elapsed = start.elapsed();
2311 stats.causal_generation_ms = elapsed.as_millis() as u64;
2312 info!(
2313 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2314 sample_count, stats.causal_generation_ms, validation_passed,
2315 );
2316 }
2317 Ok(Err(e)) => {
2318 let elapsed = start.elapsed();
2319 stats.causal_generation_ms = elapsed.as_millis() as u64;
2320 warn!("Phase 13: Causal generation failed: {}", e);
2321 }
2322 Err(_) => {
2323 let elapsed = start.elapsed();
2324 stats.causal_generation_ms = elapsed.as_millis() as u64;
2325 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2326 }
2327 }
2328 }
2329
2330 fn phase_sourcing_data(
2332 &mut self,
2333 stats: &mut EnhancedGenerationStatistics,
2334 ) -> SynthResult<SourcingSnapshot> {
2335 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2336 debug!("Phase 14: Skipped (sourcing generation disabled)");
2337 return Ok(SourcingSnapshot::default());
2338 }
2339
2340 info!("Phase 14: Generating S2C Sourcing Data");
2341 let seed = self.seed;
2342
2343 let vendor_ids: Vec<String> = self
2345 .master_data
2346 .vendors
2347 .iter()
2348 .map(|v| v.vendor_id.clone())
2349 .collect();
2350 if vendor_ids.is_empty() {
2351 debug!("Phase 14: Skipped (no vendors available)");
2352 return Ok(SourcingSnapshot::default());
2353 }
2354
2355 let categories: Vec<(String, String)> = vec![
2356 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2357 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2358 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2359 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2360 ("CAT-LOG".to_string(), "Logistics".to_string()),
2361 ];
2362 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2363 .iter()
2364 .map(|(id, name)| {
2365 (
2366 id.clone(),
2367 name.clone(),
2368 rust_decimal::Decimal::from(100_000),
2369 )
2370 })
2371 .collect();
2372
2373 let company_code = self
2374 .config
2375 .companies
2376 .first()
2377 .map(|c| c.code.as_str())
2378 .unwrap_or("1000");
2379 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2380 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2381 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2382 let fiscal_year = start_date.year() as u16;
2383 let owner_ids: Vec<String> = self
2384 .master_data
2385 .employees
2386 .iter()
2387 .take(5)
2388 .map(|e| e.employee_id.clone())
2389 .collect();
2390 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2391
2392 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2394 let spend_analyses =
2395 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2396
2397 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2399 let sourcing_projects = if owner_ids.is_empty() {
2400 Vec::new()
2401 } else {
2402 project_gen.generate(
2403 company_code,
2404 &categories_with_spend,
2405 &owner_ids,
2406 start_date,
2407 self.config.global.period_months,
2408 )
2409 };
2410 stats.sourcing_project_count = sourcing_projects.len();
2411
2412 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2414 let mut qual_gen = QualificationGenerator::new(seed + 2);
2415 let qualifications = qual_gen.generate(
2416 company_code,
2417 &qual_vendor_ids,
2418 sourcing_projects.first().map(|p| p.project_id.as_str()),
2419 owner_id,
2420 start_date,
2421 );
2422
2423 let mut rfx_gen = RfxGenerator::new(seed + 3);
2425 let rfx_events: Vec<RfxEvent> = sourcing_projects
2426 .iter()
2427 .map(|proj| {
2428 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2429 rfx_gen.generate(
2430 company_code,
2431 &proj.project_id,
2432 &proj.category_id,
2433 &qualified_vids,
2434 owner_id,
2435 start_date,
2436 50000.0,
2437 )
2438 })
2439 .collect();
2440 stats.rfx_event_count = rfx_events.len();
2441
2442 let mut bid_gen = BidGenerator::new(seed + 4);
2444 let mut all_bids = Vec::new();
2445 for rfx in &rfx_events {
2446 let bidder_count = vendor_ids.len().clamp(2, 5);
2447 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2448 let bids = bid_gen.generate(rfx, &responding, start_date);
2449 all_bids.extend(bids);
2450 }
2451 stats.bid_count = all_bids.len();
2452
2453 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2455 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2456 .iter()
2457 .map(|rfx| {
2458 let rfx_bids: Vec<SupplierBid> = all_bids
2459 .iter()
2460 .filter(|b| b.rfx_id == rfx.rfx_id)
2461 .cloned()
2462 .collect();
2463 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2464 })
2465 .collect();
2466
2467 let mut contract_gen = ContractGenerator::new(seed + 6);
2469 let contracts: Vec<ProcurementContract> = bid_evaluations
2470 .iter()
2471 .zip(rfx_events.iter())
2472 .filter_map(|(eval, rfx)| {
2473 eval.ranked_bids.first().and_then(|winner| {
2474 all_bids
2475 .iter()
2476 .find(|b| b.bid_id == winner.bid_id)
2477 .map(|winning_bid| {
2478 contract_gen.generate_from_bid(
2479 winning_bid,
2480 Some(&rfx.sourcing_project_id),
2481 &rfx.category_id,
2482 owner_id,
2483 start_date,
2484 )
2485 })
2486 })
2487 })
2488 .collect();
2489 stats.contract_count = contracts.len();
2490
2491 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2493 let catalog_items = catalog_gen.generate(&contracts);
2494 stats.catalog_item_count = catalog_items.len();
2495
2496 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2498 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2499 .iter()
2500 .fold(
2501 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2502 |mut acc, c| {
2503 acc.entry(c.vendor_id.clone()).or_default().push(c);
2504 acc
2505 },
2506 )
2507 .into_iter()
2508 .collect();
2509 let scorecards = scorecard_gen.generate(
2510 company_code,
2511 &vendor_contracts,
2512 start_date,
2513 end_date,
2514 owner_id,
2515 );
2516 stats.scorecard_count = scorecards.len();
2517
2518 let mut sourcing_projects = sourcing_projects;
2521 for project in &mut sourcing_projects {
2522 project.rfx_ids = rfx_events
2524 .iter()
2525 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2526 .map(|rfx| rfx.rfx_id.clone())
2527 .collect();
2528
2529 project.contract_id = contracts
2531 .iter()
2532 .find(|c| {
2533 c.sourcing_project_id
2534 .as_deref()
2535 .is_some_and(|sp| sp == project.project_id)
2536 })
2537 .map(|c| c.contract_id.clone());
2538
2539 project.spend_analysis_id = spend_analyses
2541 .iter()
2542 .find(|sa| sa.category_id == project.category_id)
2543 .map(|sa| sa.category_id.clone());
2544 }
2545
2546 info!(
2547 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2548 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2549 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2550 );
2551 self.check_resources_with_log("post-sourcing")?;
2552
2553 Ok(SourcingSnapshot {
2554 spend_analyses,
2555 sourcing_projects,
2556 qualifications,
2557 rfx_events,
2558 bids: all_bids,
2559 bid_evaluations,
2560 contracts,
2561 catalog_items,
2562 scorecards,
2563 })
2564 }
2565
2566 fn phase_intercompany(
2568 &mut self,
2569 stats: &mut EnhancedGenerationStatistics,
2570 ) -> SynthResult<IntercompanySnapshot> {
2571 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2573 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2574 return Ok(IntercompanySnapshot::default());
2575 }
2576
2577 if self.config.companies.len() < 2 {
2579 debug!(
2580 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2581 self.config.companies.len()
2582 );
2583 return Ok(IntercompanySnapshot::default());
2584 }
2585
2586 info!("Phase 14b: Generating Intercompany Transactions");
2587
2588 let seed = self.seed;
2589 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2590 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2591 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2592
2593 let parent_code = self.config.companies[0].code.clone();
2596 let mut ownership_structure =
2597 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2598
2599 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2600 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2601 format!("REL{:03}", i + 1),
2602 parent_code.clone(),
2603 company.code.clone(),
2604 rust_decimal::Decimal::from(100), start_date,
2606 );
2607 ownership_structure.add_relationship(relationship);
2608 }
2609
2610 let tp_method = match self.config.intercompany.transfer_pricing_method {
2612 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2613 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2614 }
2615 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2616 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2617 }
2618 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2619 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2620 }
2621 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2622 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2623 }
2624 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2625 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2626 }
2627 };
2628
2629 let ic_currency = self
2631 .config
2632 .companies
2633 .first()
2634 .map(|c| c.currency.clone())
2635 .unwrap_or_else(|| "USD".to_string());
2636 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2637 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2638 transfer_pricing_method: tp_method,
2639 markup_percent: rust_decimal::Decimal::from_f64_retain(
2640 self.config.intercompany.markup_percent,
2641 )
2642 .unwrap_or(rust_decimal::Decimal::from(5)),
2643 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2644 default_currency: ic_currency,
2645 ..Default::default()
2646 };
2647
2648 let mut ic_generator = datasynth_generators::ICGenerator::new(
2650 ic_gen_config,
2651 ownership_structure.clone(),
2652 seed + 50,
2653 );
2654
2655 let transactions_per_day = 3;
2658 let matched_pairs = ic_generator.generate_transactions_for_period(
2659 start_date,
2660 end_date,
2661 transactions_per_day,
2662 );
2663
2664 let mut seller_entries = Vec::new();
2666 let mut buyer_entries = Vec::new();
2667 let fiscal_year = start_date.year();
2668
2669 for pair in &matched_pairs {
2670 let fiscal_period = pair.posting_date.month();
2671 let (seller_je, buyer_je) =
2672 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2673 seller_entries.push(seller_je);
2674 buyer_entries.push(buyer_je);
2675 }
2676
2677 let matching_config = datasynth_generators::ICMatchingConfig {
2679 base_currency: self
2680 .config
2681 .companies
2682 .first()
2683 .map(|c| c.currency.clone())
2684 .unwrap_or_else(|| "USD".to_string()),
2685 ..Default::default()
2686 };
2687 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2688 matching_engine.load_matched_pairs(&matched_pairs);
2689 let matching_result = matching_engine.run_matching(end_date);
2690
2691 let mut elimination_entries = Vec::new();
2693 if self.config.intercompany.generate_eliminations {
2694 let elim_config = datasynth_generators::EliminationConfig {
2695 consolidation_entity: "GROUP".to_string(),
2696 base_currency: self
2697 .config
2698 .companies
2699 .first()
2700 .map(|c| c.currency.clone())
2701 .unwrap_or_else(|| "USD".to_string()),
2702 ..Default::default()
2703 };
2704
2705 let mut elim_generator =
2706 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2707
2708 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2709 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2710 matching_result
2711 .matched_balances
2712 .iter()
2713 .chain(matching_result.unmatched_balances.iter())
2714 .cloned()
2715 .collect();
2716
2717 let journal = elim_generator.generate_eliminations(
2718 &fiscal_period,
2719 end_date,
2720 &all_balances,
2721 &matched_pairs,
2722 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2725
2726 elimination_entries = journal.entries.clone();
2727 }
2728
2729 let matched_pair_count = matched_pairs.len();
2730 let elimination_entry_count = elimination_entries.len();
2731 let match_rate = matching_result.match_rate;
2732
2733 stats.ic_matched_pair_count = matched_pair_count;
2734 stats.ic_elimination_count = elimination_entry_count;
2735 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2736
2737 info!(
2738 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2739 matched_pair_count,
2740 stats.ic_transaction_count,
2741 seller_entries.len(),
2742 buyer_entries.len(),
2743 elimination_entry_count,
2744 match_rate * 100.0
2745 );
2746 self.check_resources_with_log("post-intercompany")?;
2747
2748 Ok(IntercompanySnapshot {
2749 matched_pairs,
2750 seller_journal_entries: seller_entries,
2751 buyer_journal_entries: buyer_entries,
2752 elimination_entries,
2753 matched_pair_count,
2754 elimination_entry_count,
2755 match_rate,
2756 })
2757 }
2758
2759 fn phase_financial_reporting(
2761 &mut self,
2762 document_flows: &DocumentFlowSnapshot,
2763 journal_entries: &[JournalEntry],
2764 coa: &Arc<ChartOfAccounts>,
2765 stats: &mut EnhancedGenerationStatistics,
2766 ) -> SynthResult<FinancialReportingSnapshot> {
2767 let fs_enabled = self.phase_config.generate_financial_statements
2768 || self.config.financial_reporting.enabled;
2769 let br_enabled = self.phase_config.generate_bank_reconciliation;
2770
2771 if !fs_enabled && !br_enabled {
2772 debug!("Phase 15: Skipped (financial reporting disabled)");
2773 return Ok(FinancialReportingSnapshot::default());
2774 }
2775
2776 info!("Phase 15: Generating Financial Reporting Data");
2777
2778 let seed = self.seed;
2779 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2780 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2781
2782 let mut financial_statements = Vec::new();
2783 let mut bank_reconciliations = Vec::new();
2784 let mut trial_balances = Vec::new();
2785
2786 if fs_enabled {
2794 let company_code = self
2795 .config
2796 .companies
2797 .first()
2798 .map(|c| c.code.as_str())
2799 .unwrap_or("1000");
2800 let currency = self
2801 .config
2802 .companies
2803 .first()
2804 .map(|c| c.currency.as_str())
2805 .unwrap_or("USD");
2806 let has_journal_entries = !journal_entries.is_empty();
2807
2808 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2811
2812 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2814 None;
2815
2816 for period in 0..self.config.global.period_months {
2818 let period_start = start_date + chrono::Months::new(period);
2819 let period_end =
2820 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2821 let fiscal_year = period_end.year() as u16;
2822 let fiscal_period = period_end.month() as u8;
2823
2824 if has_journal_entries {
2825 let tb_entries = Self::build_cumulative_trial_balance(
2828 journal_entries,
2829 coa,
2830 company_code,
2831 start_date,
2832 period_end,
2833 fiscal_year,
2834 fiscal_period,
2835 );
2836
2837 let prior_ref = prior_cumulative_tb.as_deref();
2840 let stmts = fs_gen.generate(
2841 company_code,
2842 currency,
2843 &tb_entries,
2844 period_start,
2845 period_end,
2846 fiscal_year,
2847 fiscal_period,
2848 prior_ref,
2849 "SYS-AUTOCLOSE",
2850 );
2851
2852 for stmt in stmts {
2854 if stmt.statement_type == StatementType::CashFlowStatement {
2855 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2857 let cf_items = Self::build_cash_flow_from_trial_balances(
2858 &tb_entries,
2859 prior_ref,
2860 net_income,
2861 );
2862 financial_statements.push(FinancialStatement {
2863 cash_flow_items: cf_items,
2864 ..stmt
2865 });
2866 } else {
2867 financial_statements.push(stmt);
2868 }
2869 }
2870
2871 trial_balances.push(PeriodTrialBalance {
2873 fiscal_year,
2874 fiscal_period,
2875 period_start,
2876 period_end,
2877 entries: tb_entries.clone(),
2878 });
2879
2880 prior_cumulative_tb = Some(tb_entries);
2882 } else {
2883 let tb_entries = Self::build_trial_balance_from_entries(
2886 journal_entries,
2887 coa,
2888 company_code,
2889 fiscal_year,
2890 fiscal_period,
2891 );
2892
2893 let stmts = fs_gen.generate(
2894 company_code,
2895 currency,
2896 &tb_entries,
2897 period_start,
2898 period_end,
2899 fiscal_year,
2900 fiscal_period,
2901 None,
2902 "SYS-AUTOCLOSE",
2903 );
2904 financial_statements.extend(stmts);
2905
2906 if !tb_entries.is_empty() {
2908 trial_balances.push(PeriodTrialBalance {
2909 fiscal_year,
2910 fiscal_period,
2911 period_start,
2912 period_end,
2913 entries: tb_entries,
2914 });
2915 }
2916 }
2917 }
2918 stats.financial_statement_count = financial_statements.len();
2919 info!(
2920 "Financial statements generated: {} statements (JE-derived: {})",
2921 stats.financial_statement_count, has_journal_entries
2922 );
2923 }
2924
2925 if br_enabled && !document_flows.payments.is_empty() {
2927 let employee_ids: Vec<String> = self
2928 .master_data
2929 .employees
2930 .iter()
2931 .map(|e| e.employee_id.clone())
2932 .collect();
2933 let mut br_gen =
2934 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2935
2936 for company in &self.config.companies {
2938 let company_payments: Vec<PaymentReference> = document_flows
2939 .payments
2940 .iter()
2941 .filter(|p| p.header.company_code == company.code)
2942 .map(|p| PaymentReference {
2943 id: p.header.document_id.clone(),
2944 amount: if p.is_vendor { p.amount } else { -p.amount },
2945 date: p.header.document_date,
2946 reference: p
2947 .check_number
2948 .clone()
2949 .or_else(|| p.wire_reference.clone())
2950 .unwrap_or_else(|| p.header.document_id.clone()),
2951 })
2952 .collect();
2953
2954 if company_payments.is_empty() {
2955 continue;
2956 }
2957
2958 let bank_account_id = format!("{}-MAIN", company.code);
2959
2960 for period in 0..self.config.global.period_months {
2962 let period_start = start_date + chrono::Months::new(period);
2963 let period_end =
2964 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2965
2966 let period_payments: Vec<PaymentReference> = company_payments
2967 .iter()
2968 .filter(|p| p.date >= period_start && p.date <= period_end)
2969 .cloned()
2970 .collect();
2971
2972 let recon = br_gen.generate(
2973 &company.code,
2974 &bank_account_id,
2975 period_start,
2976 period_end,
2977 &company.currency,
2978 &period_payments,
2979 );
2980 bank_reconciliations.push(recon);
2981 }
2982 }
2983 info!(
2984 "Bank reconciliations generated: {} reconciliations",
2985 bank_reconciliations.len()
2986 );
2987 }
2988
2989 stats.bank_reconciliation_count = bank_reconciliations.len();
2990 self.check_resources_with_log("post-financial-reporting")?;
2991
2992 if !trial_balances.is_empty() {
2993 info!(
2994 "Period-close trial balances captured: {} periods",
2995 trial_balances.len()
2996 );
2997 }
2998
2999 Ok(FinancialReportingSnapshot {
3000 financial_statements,
3001 bank_reconciliations,
3002 trial_balances,
3003 })
3004 }
3005
3006 fn build_trial_balance_from_entries(
3012 journal_entries: &[JournalEntry],
3013 coa: &ChartOfAccounts,
3014 company_code: &str,
3015 fiscal_year: u16,
3016 fiscal_period: u8,
3017 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3018 use rust_decimal::Decimal;
3019
3020 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3022 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3023
3024 for je in journal_entries {
3025 if je.header.company_code != company_code
3027 || je.header.fiscal_year != fiscal_year
3028 || je.header.fiscal_period != fiscal_period
3029 {
3030 continue;
3031 }
3032
3033 for line in &je.lines {
3034 let acct = &line.gl_account;
3035 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3036 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3037 }
3038 }
3039
3040 let mut all_accounts: Vec<&String> = account_debits
3042 .keys()
3043 .chain(account_credits.keys())
3044 .collect::<std::collections::HashSet<_>>()
3045 .into_iter()
3046 .collect();
3047 all_accounts.sort();
3048
3049 let mut entries = Vec::new();
3050
3051 for acct_number in all_accounts {
3052 let debit = account_debits
3053 .get(acct_number)
3054 .copied()
3055 .unwrap_or(Decimal::ZERO);
3056 let credit = account_credits
3057 .get(acct_number)
3058 .copied()
3059 .unwrap_or(Decimal::ZERO);
3060
3061 if debit.is_zero() && credit.is_zero() {
3062 continue;
3063 }
3064
3065 let account_name = coa
3067 .get_account(acct_number)
3068 .map(|gl| gl.short_description.clone())
3069 .unwrap_or_else(|| format!("Account {}", acct_number));
3070
3071 let category = Self::category_from_account_code(acct_number);
3076
3077 entries.push(datasynth_generators::TrialBalanceEntry {
3078 account_code: acct_number.clone(),
3079 account_name,
3080 category,
3081 debit_balance: debit,
3082 credit_balance: credit,
3083 });
3084 }
3085
3086 entries
3087 }
3088
3089 fn build_cumulative_trial_balance(
3096 journal_entries: &[JournalEntry],
3097 coa: &ChartOfAccounts,
3098 company_code: &str,
3099 start_date: NaiveDate,
3100 period_end: NaiveDate,
3101 fiscal_year: u16,
3102 fiscal_period: u8,
3103 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3104 use rust_decimal::Decimal;
3105
3106 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3108 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3109
3110 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3112 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3113
3114 for je in journal_entries {
3115 if je.header.company_code != company_code {
3116 continue;
3117 }
3118
3119 for line in &je.lines {
3120 let acct = &line.gl_account;
3121 let category = Self::category_from_account_code(acct);
3122 let is_bs_account = matches!(
3123 category.as_str(),
3124 "Cash"
3125 | "Receivables"
3126 | "Inventory"
3127 | "FixedAssets"
3128 | "Payables"
3129 | "AccruedLiabilities"
3130 | "LongTermDebt"
3131 | "Equity"
3132 );
3133
3134 if is_bs_account {
3135 if je.header.document_date <= period_end
3137 && je.header.document_date >= start_date
3138 {
3139 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3140 line.debit_amount;
3141 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3142 line.credit_amount;
3143 }
3144 } else {
3145 if je.header.fiscal_year == fiscal_year
3147 && je.header.fiscal_period == fiscal_period
3148 {
3149 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3150 line.debit_amount;
3151 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3152 line.credit_amount;
3153 }
3154 }
3155 }
3156 }
3157
3158 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3160 all_accounts.extend(bs_debits.keys().cloned());
3161 all_accounts.extend(bs_credits.keys().cloned());
3162 all_accounts.extend(is_debits.keys().cloned());
3163 all_accounts.extend(is_credits.keys().cloned());
3164
3165 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3166 sorted_accounts.sort();
3167
3168 let mut entries = Vec::new();
3169
3170 for acct_number in &sorted_accounts {
3171 let category = Self::category_from_account_code(acct_number);
3172 let is_bs_account = matches!(
3173 category.as_str(),
3174 "Cash"
3175 | "Receivables"
3176 | "Inventory"
3177 | "FixedAssets"
3178 | "Payables"
3179 | "AccruedLiabilities"
3180 | "LongTermDebt"
3181 | "Equity"
3182 );
3183
3184 let (debit, credit) = if is_bs_account {
3185 (
3186 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3187 bs_credits
3188 .get(acct_number)
3189 .copied()
3190 .unwrap_or(Decimal::ZERO),
3191 )
3192 } else {
3193 (
3194 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3195 is_credits
3196 .get(acct_number)
3197 .copied()
3198 .unwrap_or(Decimal::ZERO),
3199 )
3200 };
3201
3202 if debit.is_zero() && credit.is_zero() {
3203 continue;
3204 }
3205
3206 let account_name = coa
3207 .get_account(acct_number)
3208 .map(|gl| gl.short_description.clone())
3209 .unwrap_or_else(|| format!("Account {}", acct_number));
3210
3211 entries.push(datasynth_generators::TrialBalanceEntry {
3212 account_code: acct_number.clone(),
3213 account_name,
3214 category,
3215 debit_balance: debit,
3216 credit_balance: credit,
3217 });
3218 }
3219
3220 entries
3221 }
3222
3223 fn build_cash_flow_from_trial_balances(
3228 current_tb: &[datasynth_generators::TrialBalanceEntry],
3229 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3230 net_income: rust_decimal::Decimal,
3231 ) -> Vec<CashFlowItem> {
3232 use rust_decimal::Decimal;
3233
3234 let aggregate =
3236 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3237 let mut map: HashMap<String, Decimal> = HashMap::new();
3238 for entry in tb {
3239 let net = entry.debit_balance - entry.credit_balance;
3240 *map.entry(entry.category.clone()).or_default() += net;
3241 }
3242 map
3243 };
3244
3245 let current = aggregate(current_tb);
3246 let prior = prior_tb.map(aggregate);
3247
3248 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3250 *map.get(key).unwrap_or(&Decimal::ZERO)
3251 };
3252
3253 let change = |key: &str| -> Decimal {
3255 let curr = get(¤t, key);
3256 match &prior {
3257 Some(p) => curr - get(p, key),
3258 None => curr,
3259 }
3260 };
3261
3262 let fixed_asset_change = change("FixedAssets");
3265 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3266 -fixed_asset_change
3267 } else {
3268 Decimal::ZERO
3269 };
3270
3271 let ar_change = change("Receivables");
3273 let inventory_change = change("Inventory");
3274 let ap_change = change("Payables");
3276 let accrued_change = change("AccruedLiabilities");
3277
3278 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3279 + (-ap_change)
3280 + (-accrued_change);
3281
3282 let capex = if fixed_asset_change > Decimal::ZERO {
3284 -fixed_asset_change
3285 } else {
3286 Decimal::ZERO
3287 };
3288 let investing_cf = capex;
3289
3290 let debt_change = -change("LongTermDebt");
3292 let equity_change = -change("Equity");
3293 let financing_cf = debt_change + equity_change;
3294
3295 let net_change = operating_cf + investing_cf + financing_cf;
3296
3297 vec![
3298 CashFlowItem {
3299 item_code: "CF-NI".to_string(),
3300 label: "Net Income".to_string(),
3301 category: CashFlowCategory::Operating,
3302 amount: net_income,
3303 amount_prior: None,
3304 sort_order: 1,
3305 is_total: false,
3306 },
3307 CashFlowItem {
3308 item_code: "CF-DEP".to_string(),
3309 label: "Depreciation & Amortization".to_string(),
3310 category: CashFlowCategory::Operating,
3311 amount: depreciation_addback,
3312 amount_prior: None,
3313 sort_order: 2,
3314 is_total: false,
3315 },
3316 CashFlowItem {
3317 item_code: "CF-AR".to_string(),
3318 label: "Change in Accounts Receivable".to_string(),
3319 category: CashFlowCategory::Operating,
3320 amount: -ar_change,
3321 amount_prior: None,
3322 sort_order: 3,
3323 is_total: false,
3324 },
3325 CashFlowItem {
3326 item_code: "CF-AP".to_string(),
3327 label: "Change in Accounts Payable".to_string(),
3328 category: CashFlowCategory::Operating,
3329 amount: -ap_change,
3330 amount_prior: None,
3331 sort_order: 4,
3332 is_total: false,
3333 },
3334 CashFlowItem {
3335 item_code: "CF-INV".to_string(),
3336 label: "Change in Inventory".to_string(),
3337 category: CashFlowCategory::Operating,
3338 amount: -inventory_change,
3339 amount_prior: None,
3340 sort_order: 5,
3341 is_total: false,
3342 },
3343 CashFlowItem {
3344 item_code: "CF-OP".to_string(),
3345 label: "Net Cash from Operating Activities".to_string(),
3346 category: CashFlowCategory::Operating,
3347 amount: operating_cf,
3348 amount_prior: None,
3349 sort_order: 6,
3350 is_total: true,
3351 },
3352 CashFlowItem {
3353 item_code: "CF-CAPEX".to_string(),
3354 label: "Capital Expenditures".to_string(),
3355 category: CashFlowCategory::Investing,
3356 amount: capex,
3357 amount_prior: None,
3358 sort_order: 7,
3359 is_total: false,
3360 },
3361 CashFlowItem {
3362 item_code: "CF-INV-T".to_string(),
3363 label: "Net Cash from Investing Activities".to_string(),
3364 category: CashFlowCategory::Investing,
3365 amount: investing_cf,
3366 amount_prior: None,
3367 sort_order: 8,
3368 is_total: true,
3369 },
3370 CashFlowItem {
3371 item_code: "CF-DEBT".to_string(),
3372 label: "Net Borrowings / (Repayments)".to_string(),
3373 category: CashFlowCategory::Financing,
3374 amount: debt_change,
3375 amount_prior: None,
3376 sort_order: 9,
3377 is_total: false,
3378 },
3379 CashFlowItem {
3380 item_code: "CF-EQ".to_string(),
3381 label: "Equity Changes".to_string(),
3382 category: CashFlowCategory::Financing,
3383 amount: equity_change,
3384 amount_prior: None,
3385 sort_order: 10,
3386 is_total: false,
3387 },
3388 CashFlowItem {
3389 item_code: "CF-FIN-T".to_string(),
3390 label: "Net Cash from Financing Activities".to_string(),
3391 category: CashFlowCategory::Financing,
3392 amount: financing_cf,
3393 amount_prior: None,
3394 sort_order: 11,
3395 is_total: true,
3396 },
3397 CashFlowItem {
3398 item_code: "CF-NET".to_string(),
3399 label: "Net Change in Cash".to_string(),
3400 category: CashFlowCategory::Operating,
3401 amount: net_change,
3402 amount_prior: None,
3403 sort_order: 12,
3404 is_total: true,
3405 },
3406 ]
3407 }
3408
3409 fn calculate_net_income_from_tb(
3413 tb: &[datasynth_generators::TrialBalanceEntry],
3414 ) -> rust_decimal::Decimal {
3415 use rust_decimal::Decimal;
3416
3417 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3418 for entry in tb {
3419 let net = entry.debit_balance - entry.credit_balance;
3420 *aggregated.entry(entry.category.clone()).or_default() += net;
3421 }
3422
3423 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3424 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3425 let opex = *aggregated
3426 .get("OperatingExpenses")
3427 .unwrap_or(&Decimal::ZERO);
3428 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3429 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3430
3431 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3434 let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3435 let tax = operating_income * tax_rate;
3436 operating_income - tax
3437 }
3438
3439 fn category_from_account_code(code: &str) -> String {
3446 let prefix: String = code.chars().take(2).collect();
3447 match prefix.as_str() {
3448 "10" => "Cash",
3449 "11" => "Receivables",
3450 "12" | "13" | "14" => "Inventory",
3451 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3452 "20" => "Payables",
3453 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3454 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3455 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3456 "40" | "41" | "42" | "43" | "44" => "Revenue",
3457 "50" | "51" | "52" => "CostOfSales",
3458 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3459 "OperatingExpenses"
3460 }
3461 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3462 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3463 _ => "OperatingExpenses",
3464 }
3465 .to_string()
3466 }
3467
3468 fn phase_hr_data(
3470 &mut self,
3471 stats: &mut EnhancedGenerationStatistics,
3472 ) -> SynthResult<HrSnapshot> {
3473 if !self.config.hr.enabled {
3474 debug!("Phase 16: Skipped (HR generation disabled)");
3475 return Ok(HrSnapshot::default());
3476 }
3477
3478 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3479
3480 let seed = self.seed;
3481 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3482 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3483 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3484 let company_code = self
3485 .config
3486 .companies
3487 .first()
3488 .map(|c| c.code.as_str())
3489 .unwrap_or("1000");
3490 let currency = self
3491 .config
3492 .companies
3493 .first()
3494 .map(|c| c.currency.as_str())
3495 .unwrap_or("USD");
3496
3497 let employee_ids: Vec<String> = self
3498 .master_data
3499 .employees
3500 .iter()
3501 .map(|e| e.employee_id.clone())
3502 .collect();
3503
3504 if employee_ids.is_empty() {
3505 debug!("Phase 16: Skipped (no employees available)");
3506 return Ok(HrSnapshot::default());
3507 }
3508
3509 let cost_center_ids: Vec<String> = self
3512 .master_data
3513 .employees
3514 .iter()
3515 .filter_map(|e| e.cost_center.clone())
3516 .collect::<std::collections::HashSet<_>>()
3517 .into_iter()
3518 .collect();
3519
3520 let mut snapshot = HrSnapshot::default();
3521
3522 if self.config.hr.payroll.enabled {
3524 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3525 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3526
3527 let payroll_pack = self.primary_pack();
3529
3530 payroll_gen.set_country_pack(payroll_pack.clone());
3533
3534 let employees_with_salary: Vec<(
3535 String,
3536 rust_decimal::Decimal,
3537 Option<String>,
3538 Option<String>,
3539 )> = self
3540 .master_data
3541 .employees
3542 .iter()
3543 .map(|e| {
3544 (
3545 e.employee_id.clone(),
3546 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3548 e.department_id.clone(),
3549 )
3550 })
3551 .collect();
3552
3553 for month in 0..self.config.global.period_months {
3554 let period_start = start_date + chrono::Months::new(month);
3555 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3556 let (run, items) = payroll_gen.generate(
3557 company_code,
3558 &employees_with_salary,
3559 period_start,
3560 period_end,
3561 currency,
3562 );
3563 snapshot.payroll_runs.push(run);
3564 snapshot.payroll_run_count += 1;
3565 snapshot.payroll_line_item_count += items.len();
3566 snapshot.payroll_line_items.extend(items);
3567 }
3568 }
3569
3570 if self.config.hr.time_attendance.enabled {
3572 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3573 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3574 let entries = time_gen.generate(
3575 &employee_ids,
3576 start_date,
3577 end_date,
3578 &self.config.hr.time_attendance,
3579 );
3580 snapshot.time_entry_count = entries.len();
3581 snapshot.time_entries = entries;
3582 }
3583
3584 if self.config.hr.expenses.enabled {
3586 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3587 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3588 let company_currency = self
3589 .config
3590 .companies
3591 .first()
3592 .map(|c| c.currency.as_str())
3593 .unwrap_or("USD");
3594 let reports = expense_gen.generate_with_currency(
3595 &employee_ids,
3596 start_date,
3597 end_date,
3598 &self.config.hr.expenses,
3599 company_currency,
3600 );
3601 snapshot.expense_report_count = reports.len();
3602 snapshot.expense_reports = reports;
3603 }
3604
3605 stats.payroll_run_count = snapshot.payroll_run_count;
3606 stats.time_entry_count = snapshot.time_entry_count;
3607 stats.expense_report_count = snapshot.expense_report_count;
3608
3609 info!(
3610 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3611 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3612 snapshot.time_entry_count, snapshot.expense_report_count
3613 );
3614 self.check_resources_with_log("post-hr")?;
3615
3616 Ok(snapshot)
3617 }
3618
3619 fn phase_accounting_standards(
3621 &mut self,
3622 stats: &mut EnhancedGenerationStatistics,
3623 ) -> SynthResult<AccountingStandardsSnapshot> {
3624 if !self.phase_config.generate_accounting_standards
3625 || !self.config.accounting_standards.enabled
3626 {
3627 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3628 return Ok(AccountingStandardsSnapshot::default());
3629 }
3630 info!("Phase 17: Generating Accounting Standards Data");
3631
3632 let seed = self.seed;
3633 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3634 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3635 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3636 let company_code = self
3637 .config
3638 .companies
3639 .first()
3640 .map(|c| c.code.as_str())
3641 .unwrap_or("1000");
3642 let currency = self
3643 .config
3644 .companies
3645 .first()
3646 .map(|c| c.currency.as_str())
3647 .unwrap_or("USD");
3648
3649 let framework = match self.config.accounting_standards.framework {
3654 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3655 datasynth_standards::framework::AccountingFramework::UsGaap
3656 }
3657 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3658 datasynth_standards::framework::AccountingFramework::Ifrs
3659 }
3660 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3661 datasynth_standards::framework::AccountingFramework::DualReporting
3662 }
3663 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3664 datasynth_standards::framework::AccountingFramework::FrenchGaap
3665 }
3666 None => {
3667 let pack = self.primary_pack();
3669 let pack_fw = pack.accounting.framework.as_str();
3670 match pack_fw {
3671 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3672 "dual_reporting" => {
3673 datasynth_standards::framework::AccountingFramework::DualReporting
3674 }
3675 "french_gaap" => {
3676 datasynth_standards::framework::AccountingFramework::FrenchGaap
3677 }
3678 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3680 }
3681 }
3682 };
3683
3684 let mut snapshot = AccountingStandardsSnapshot::default();
3685
3686 if self.config.accounting_standards.revenue_recognition.enabled {
3688 let customer_ids: Vec<String> = self
3689 .master_data
3690 .customers
3691 .iter()
3692 .map(|c| c.customer_id.clone())
3693 .collect();
3694
3695 if !customer_ids.is_empty() {
3696 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3697 let contracts = rev_gen.generate(
3698 company_code,
3699 &customer_ids,
3700 start_date,
3701 end_date,
3702 currency,
3703 &self.config.accounting_standards.revenue_recognition,
3704 framework,
3705 );
3706 snapshot.revenue_contract_count = contracts.len();
3707 snapshot.contracts = contracts;
3708 }
3709 }
3710
3711 if self.config.accounting_standards.impairment.enabled {
3713 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3714 .master_data
3715 .assets
3716 .iter()
3717 .map(|a| {
3718 (
3719 a.asset_id.clone(),
3720 a.description.clone(),
3721 a.acquisition_cost,
3722 )
3723 })
3724 .collect();
3725
3726 if !asset_data.is_empty() {
3727 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3728 let tests = imp_gen.generate(
3729 company_code,
3730 &asset_data,
3731 end_date,
3732 &self.config.accounting_standards.impairment,
3733 framework,
3734 );
3735 snapshot.impairment_test_count = tests.len();
3736 snapshot.impairment_tests = tests;
3737 }
3738 }
3739
3740 stats.revenue_contract_count = snapshot.revenue_contract_count;
3741 stats.impairment_test_count = snapshot.impairment_test_count;
3742
3743 info!(
3744 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3745 snapshot.revenue_contract_count, snapshot.impairment_test_count
3746 );
3747 self.check_resources_with_log("post-accounting-standards")?;
3748
3749 Ok(snapshot)
3750 }
3751
3752 fn phase_manufacturing(
3754 &mut self,
3755 stats: &mut EnhancedGenerationStatistics,
3756 ) -> SynthResult<ManufacturingSnapshot> {
3757 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3758 debug!("Phase 18: Skipped (manufacturing generation disabled)");
3759 return Ok(ManufacturingSnapshot::default());
3760 }
3761 info!("Phase 18: Generating Manufacturing Data");
3762
3763 let seed = self.seed;
3764 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3765 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3766 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3767 let company_code = self
3768 .config
3769 .companies
3770 .first()
3771 .map(|c| c.code.as_str())
3772 .unwrap_or("1000");
3773
3774 let material_data: Vec<(String, String)> = self
3775 .master_data
3776 .materials
3777 .iter()
3778 .map(|m| (m.material_id.clone(), m.description.clone()))
3779 .collect();
3780
3781 if material_data.is_empty() {
3782 debug!("Phase 18: Skipped (no materials available)");
3783 return Ok(ManufacturingSnapshot::default());
3784 }
3785
3786 let mut snapshot = ManufacturingSnapshot::default();
3787
3788 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3790 let production_orders = prod_gen.generate(
3791 company_code,
3792 &material_data,
3793 start_date,
3794 end_date,
3795 &self.config.manufacturing.production_orders,
3796 &self.config.manufacturing.costing,
3797 &self.config.manufacturing.routing,
3798 );
3799 snapshot.production_order_count = production_orders.len();
3800
3801 let inspection_data: Vec<(String, String, String)> = production_orders
3803 .iter()
3804 .map(|po| {
3805 (
3806 po.order_id.clone(),
3807 po.material_id.clone(),
3808 po.material_description.clone(),
3809 )
3810 })
3811 .collect();
3812
3813 snapshot.production_orders = production_orders;
3814
3815 if !inspection_data.is_empty() {
3816 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3817 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3818 snapshot.quality_inspection_count = inspections.len();
3819 snapshot.quality_inspections = inspections;
3820 }
3821
3822 let storage_locations: Vec<(String, String)> = material_data
3824 .iter()
3825 .enumerate()
3826 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3827 .collect();
3828
3829 let employee_ids: Vec<String> = self
3830 .master_data
3831 .employees
3832 .iter()
3833 .map(|e| e.employee_id.clone())
3834 .collect();
3835 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3836 .with_employee_pool(employee_ids);
3837 let mut cycle_count_total = 0usize;
3838 for month in 0..self.config.global.period_months {
3839 let count_date = start_date + chrono::Months::new(month);
3840 let items_per_count = storage_locations.len().clamp(10, 50);
3841 let cc = cc_gen.generate(
3842 company_code,
3843 &storage_locations,
3844 count_date,
3845 items_per_count,
3846 );
3847 snapshot.cycle_counts.push(cc);
3848 cycle_count_total += 1;
3849 }
3850 snapshot.cycle_count_count = cycle_count_total;
3851
3852 stats.production_order_count = snapshot.production_order_count;
3853 stats.quality_inspection_count = snapshot.quality_inspection_count;
3854 stats.cycle_count_count = snapshot.cycle_count_count;
3855
3856 info!(
3857 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3858 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3859 );
3860 self.check_resources_with_log("post-manufacturing")?;
3861
3862 Ok(snapshot)
3863 }
3864
3865 fn phase_sales_kpi_budgets(
3867 &mut self,
3868 coa: &Arc<ChartOfAccounts>,
3869 financial_reporting: &FinancialReportingSnapshot,
3870 stats: &mut EnhancedGenerationStatistics,
3871 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3872 if !self.phase_config.generate_sales_kpi_budgets {
3873 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3874 return Ok(SalesKpiBudgetsSnapshot::default());
3875 }
3876 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3877
3878 let seed = self.seed;
3879 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3880 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3881 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3882 let company_code = self
3883 .config
3884 .companies
3885 .first()
3886 .map(|c| c.code.as_str())
3887 .unwrap_or("1000");
3888
3889 let mut snapshot = SalesKpiBudgetsSnapshot::default();
3890
3891 if self.config.sales_quotes.enabled {
3893 let customer_data: Vec<(String, String)> = self
3894 .master_data
3895 .customers
3896 .iter()
3897 .map(|c| (c.customer_id.clone(), c.name.clone()))
3898 .collect();
3899 let material_data: Vec<(String, String)> = self
3900 .master_data
3901 .materials
3902 .iter()
3903 .map(|m| (m.material_id.clone(), m.description.clone()))
3904 .collect();
3905
3906 if !customer_data.is_empty() && !material_data.is_empty() {
3907 let employee_ids: Vec<String> = self
3908 .master_data
3909 .employees
3910 .iter()
3911 .map(|e| e.employee_id.clone())
3912 .collect();
3913 let customer_ids: Vec<String> = self
3914 .master_data
3915 .customers
3916 .iter()
3917 .map(|c| c.customer_id.clone())
3918 .collect();
3919 let company_currency = self
3920 .config
3921 .companies
3922 .first()
3923 .map(|c| c.currency.as_str())
3924 .unwrap_or("USD");
3925
3926 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3927 .with_pools(employee_ids, customer_ids);
3928 let quotes = quote_gen.generate_with_currency(
3929 company_code,
3930 &customer_data,
3931 &material_data,
3932 start_date,
3933 end_date,
3934 &self.config.sales_quotes,
3935 company_currency,
3936 );
3937 snapshot.sales_quote_count = quotes.len();
3938 snapshot.sales_quotes = quotes;
3939 }
3940 }
3941
3942 if self.config.financial_reporting.management_kpis.enabled {
3944 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3945 let mut kpis = kpi_gen.generate(
3946 company_code,
3947 start_date,
3948 end_date,
3949 &self.config.financial_reporting.management_kpis,
3950 );
3951
3952 {
3954 use rust_decimal::Decimal;
3955
3956 if let Some(income_stmt) =
3957 financial_reporting.financial_statements.iter().find(|fs| {
3958 fs.statement_type == StatementType::IncomeStatement
3959 && fs.company_code == company_code
3960 })
3961 {
3962 let total_revenue: Decimal = income_stmt
3964 .line_items
3965 .iter()
3966 .filter(|li| li.section.contains("Revenue") && !li.is_total)
3967 .map(|li| li.amount)
3968 .sum();
3969 let total_cogs: Decimal = income_stmt
3970 .line_items
3971 .iter()
3972 .filter(|li| {
3973 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
3974 && !li.is_total
3975 })
3976 .map(|li| li.amount.abs())
3977 .sum();
3978 let total_opex: Decimal = income_stmt
3979 .line_items
3980 .iter()
3981 .filter(|li| {
3982 li.section.contains("Expense")
3983 && !li.is_total
3984 && !li.section.contains("Cost")
3985 })
3986 .map(|li| li.amount.abs())
3987 .sum();
3988
3989 if total_revenue > Decimal::ZERO {
3990 let hundred = Decimal::from(100);
3991 let gross_margin_pct =
3992 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
3993 let operating_income = total_revenue - total_cogs - total_opex;
3994 let op_margin_pct =
3995 (operating_income * hundred / total_revenue).round_dp(2);
3996
3997 for kpi in &mut kpis {
3999 if kpi.name == "Gross Margin" {
4000 kpi.value = gross_margin_pct;
4001 } else if kpi.name == "Operating Margin" {
4002 kpi.value = op_margin_pct;
4003 }
4004 }
4005 }
4006 }
4007
4008 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4010 fs.statement_type == StatementType::BalanceSheet
4011 && fs.company_code == company_code
4012 }) {
4013 let current_assets: Decimal = bs
4014 .line_items
4015 .iter()
4016 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4017 .map(|li| li.amount)
4018 .sum();
4019 let current_liabilities: Decimal = bs
4020 .line_items
4021 .iter()
4022 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4023 .map(|li| li.amount.abs())
4024 .sum();
4025
4026 if current_liabilities > Decimal::ZERO {
4027 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4028 for kpi in &mut kpis {
4029 if kpi.name == "Current Ratio" {
4030 kpi.value = current_ratio;
4031 }
4032 }
4033 }
4034 }
4035 }
4036
4037 snapshot.kpi_count = kpis.len();
4038 snapshot.kpis = kpis;
4039 }
4040
4041 if self.config.financial_reporting.budgets.enabled {
4043 let account_data: Vec<(String, String)> = coa
4044 .accounts
4045 .iter()
4046 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4047 .collect();
4048
4049 if !account_data.is_empty() {
4050 let fiscal_year = start_date.year() as u32;
4051 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4052 let budget = budget_gen.generate(
4053 company_code,
4054 fiscal_year,
4055 &account_data,
4056 &self.config.financial_reporting.budgets,
4057 );
4058 snapshot.budget_line_count = budget.line_items.len();
4059 snapshot.budgets.push(budget);
4060 }
4061 }
4062
4063 stats.sales_quote_count = snapshot.sales_quote_count;
4064 stats.kpi_count = snapshot.kpi_count;
4065 stats.budget_line_count = snapshot.budget_line_count;
4066
4067 info!(
4068 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4069 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4070 );
4071 self.check_resources_with_log("post-sales-kpi-budgets")?;
4072
4073 Ok(snapshot)
4074 }
4075
4076 fn phase_tax_generation(
4078 &mut self,
4079 document_flows: &DocumentFlowSnapshot,
4080 stats: &mut EnhancedGenerationStatistics,
4081 ) -> SynthResult<TaxSnapshot> {
4082 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4083 debug!("Phase 20: Skipped (tax generation disabled)");
4084 return Ok(TaxSnapshot::default());
4085 }
4086 info!("Phase 20: Generating Tax Data");
4087
4088 let seed = self.seed;
4089 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4090 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4091 let fiscal_year = start_date.year();
4092 let company_code = self
4093 .config
4094 .companies
4095 .first()
4096 .map(|c| c.code.as_str())
4097 .unwrap_or("1000");
4098
4099 let mut gen =
4100 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4101
4102 let pack = self.primary_pack().clone();
4103 let (jurisdictions, codes) =
4104 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4105
4106 let mut provisions = Vec::new();
4108 if self.config.tax.provisions.enabled {
4109 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4110 for company in &self.config.companies {
4111 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4112 let statutory_rate = rust_decimal::Decimal::new(
4113 (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4114 2,
4115 );
4116 let provision = provision_gen.generate(
4117 &company.code,
4118 start_date,
4119 pre_tax_income,
4120 statutory_rate,
4121 );
4122 provisions.push(provision);
4123 }
4124 }
4125
4126 let mut tax_lines = Vec::new();
4128 if !codes.is_empty() {
4129 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4130 datasynth_generators::TaxLineGeneratorConfig::default(),
4131 codes.clone(),
4132 seed + 72,
4133 );
4134
4135 let buyer_country = self
4138 .config
4139 .companies
4140 .first()
4141 .map(|c| c.country.as_str())
4142 .unwrap_or("US");
4143 for vi in &document_flows.vendor_invoices {
4144 let lines = tax_line_gen.generate_for_document(
4145 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4146 &vi.header.document_id,
4147 buyer_country, buyer_country,
4149 vi.payable_amount,
4150 vi.header.document_date,
4151 None,
4152 );
4153 tax_lines.extend(lines);
4154 }
4155
4156 for ci in &document_flows.customer_invoices {
4158 let lines = tax_line_gen.generate_for_document(
4159 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4160 &ci.header.document_id,
4161 buyer_country, buyer_country,
4163 ci.total_gross_amount,
4164 ci.header.document_date,
4165 None,
4166 );
4167 tax_lines.extend(lines);
4168 }
4169 }
4170
4171 let snapshot = TaxSnapshot {
4172 jurisdiction_count: jurisdictions.len(),
4173 code_count: codes.len(),
4174 jurisdictions,
4175 codes,
4176 tax_provisions: provisions,
4177 tax_lines,
4178 tax_returns: Vec::new(),
4179 withholding_records: Vec::new(),
4180 tax_anomaly_labels: Vec::new(),
4181 };
4182
4183 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4184 stats.tax_code_count = snapshot.code_count;
4185 stats.tax_provision_count = snapshot.tax_provisions.len();
4186 stats.tax_line_count = snapshot.tax_lines.len();
4187
4188 info!(
4189 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4190 snapshot.jurisdiction_count,
4191 snapshot.code_count,
4192 snapshot.tax_provisions.len()
4193 );
4194 self.check_resources_with_log("post-tax")?;
4195
4196 Ok(snapshot)
4197 }
4198
4199 fn phase_esg_generation(
4201 &mut self,
4202 document_flows: &DocumentFlowSnapshot,
4203 stats: &mut EnhancedGenerationStatistics,
4204 ) -> SynthResult<EsgSnapshot> {
4205 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4206 debug!("Phase 21: Skipped (ESG generation disabled)");
4207 return Ok(EsgSnapshot::default());
4208 }
4209 info!("Phase 21: Generating ESG Data");
4210
4211 let seed = self.seed;
4212 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4213 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4214 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4215 let entity_id = self
4216 .config
4217 .companies
4218 .first()
4219 .map(|c| c.code.as_str())
4220 .unwrap_or("1000");
4221
4222 let esg_cfg = &self.config.esg;
4223 let mut snapshot = EsgSnapshot::default();
4224
4225 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4227 esg_cfg.environmental.energy.clone(),
4228 seed + 80,
4229 );
4230 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4231
4232 let facility_count = esg_cfg.environmental.energy.facility_count;
4234 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4235 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4236
4237 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4239 seed + 82,
4240 esg_cfg.environmental.waste.diversion_target,
4241 facility_count,
4242 );
4243 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4244
4245 let mut emission_gen =
4247 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4248
4249 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4251 .iter()
4252 .map(|e| datasynth_generators::EnergyInput {
4253 facility_id: e.facility_id.clone(),
4254 energy_type: match e.energy_source {
4255 EnergySourceType::NaturalGas => {
4256 datasynth_generators::EnergyInputType::NaturalGas
4257 }
4258 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4259 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4260 _ => datasynth_generators::EnergyInputType::Electricity,
4261 },
4262 consumption_kwh: e.consumption_kwh,
4263 period: e.period,
4264 })
4265 .collect();
4266
4267 let mut emissions = Vec::new();
4268 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4269 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4270
4271 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4273 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4274 for payment in &document_flows.payments {
4275 if payment.is_vendor {
4276 *totals
4277 .entry(payment.business_partner_id.clone())
4278 .or_default() += payment.amount;
4279 }
4280 }
4281 totals
4282 };
4283 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4284 .master_data
4285 .vendors
4286 .iter()
4287 .map(|v| {
4288 let spend = vendor_payment_totals
4289 .get(&v.vendor_id)
4290 .copied()
4291 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4292 datasynth_generators::VendorSpendInput {
4293 vendor_id: v.vendor_id.clone(),
4294 category: format!("{:?}", v.vendor_type).to_lowercase(),
4295 spend,
4296 country: v.country.clone(),
4297 }
4298 })
4299 .collect();
4300 if !vendor_spend.is_empty() {
4301 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4302 entity_id,
4303 &vendor_spend,
4304 start_date,
4305 end_date,
4306 ));
4307 }
4308
4309 let headcount = self.master_data.employees.len() as u32;
4311 if headcount > 0 {
4312 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4313 emissions.extend(emission_gen.generate_scope3_business_travel(
4314 entity_id,
4315 travel_spend,
4316 start_date,
4317 ));
4318 emissions
4319 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4320 }
4321
4322 snapshot.emission_count = emissions.len();
4323 snapshot.emissions = emissions;
4324 snapshot.energy = energy_records;
4325
4326 let mut workforce_gen =
4328 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4329 let total_headcount = headcount.max(100);
4330 snapshot.diversity =
4331 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4332 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4333 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4334 entity_id,
4335 facility_count,
4336 start_date,
4337 end_date,
4338 );
4339
4340 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4343 entity_id,
4344 &snapshot.safety_incidents,
4345 total_hours,
4346 start_date,
4347 );
4348 snapshot.safety_metrics = vec![safety_metric];
4349
4350 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4352 seed + 85,
4353 esg_cfg.governance.board_size,
4354 esg_cfg.governance.independence_target,
4355 );
4356 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4357
4358 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4360 esg_cfg.supply_chain_esg.clone(),
4361 seed + 86,
4362 );
4363 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4364 .master_data
4365 .vendors
4366 .iter()
4367 .map(|v| datasynth_generators::VendorInput {
4368 vendor_id: v.vendor_id.clone(),
4369 country: v.country.clone(),
4370 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4371 quality_score: None,
4372 })
4373 .collect();
4374 snapshot.supplier_assessments =
4375 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4376
4377 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4379 seed + 87,
4380 esg_cfg.reporting.clone(),
4381 esg_cfg.climate_scenarios.clone(),
4382 );
4383 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4384 snapshot.disclosures = disclosure_gen.generate_disclosures(
4385 entity_id,
4386 &snapshot.materiality,
4387 start_date,
4388 end_date,
4389 );
4390 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4391 snapshot.disclosure_count = snapshot.disclosures.len();
4392
4393 if esg_cfg.anomaly_rate > 0.0 {
4395 let mut anomaly_injector =
4396 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4397 let mut labels = Vec::new();
4398 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4399 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4400 labels.extend(
4401 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4402 );
4403 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4404 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4405 snapshot.anomaly_labels = labels;
4406 }
4407
4408 stats.esg_emission_count = snapshot.emission_count;
4409 stats.esg_disclosure_count = snapshot.disclosure_count;
4410
4411 info!(
4412 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4413 snapshot.emission_count,
4414 snapshot.disclosure_count,
4415 snapshot.supplier_assessments.len()
4416 );
4417 self.check_resources_with_log("post-esg")?;
4418
4419 Ok(snapshot)
4420 }
4421
4422 fn phase_treasury_data(
4424 &mut self,
4425 document_flows: &DocumentFlowSnapshot,
4426 stats: &mut EnhancedGenerationStatistics,
4427 ) -> SynthResult<TreasurySnapshot> {
4428 if !self.config.treasury.enabled {
4429 debug!("Phase 22: Skipped (treasury generation disabled)");
4430 return Ok(TreasurySnapshot::default());
4431 }
4432 info!("Phase 22: Generating Treasury Data");
4433
4434 let seed = self.seed;
4435 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4436 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4437 let currency = self
4438 .config
4439 .companies
4440 .first()
4441 .map(|c| c.currency.as_str())
4442 .unwrap_or("USD");
4443 let entity_id = self
4444 .config
4445 .companies
4446 .first()
4447 .map(|c| c.code.as_str())
4448 .unwrap_or("1000");
4449
4450 let mut snapshot = TreasurySnapshot::default();
4451
4452 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4454 self.config.treasury.debt.clone(),
4455 seed + 90,
4456 );
4457 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4458
4459 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4461 self.config.treasury.hedging.clone(),
4462 seed + 91,
4463 );
4464 for debt in &snapshot.debt_instruments {
4465 if debt.rate_type == InterestRateType::Variable {
4466 let swap = hedge_gen.generate_ir_swap(
4467 currency,
4468 debt.principal,
4469 debt.origination_date,
4470 debt.maturity_date,
4471 );
4472 snapshot.hedging_instruments.push(swap);
4473 }
4474 }
4475
4476 {
4479 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4480 for payment in &document_flows.payments {
4481 if payment.currency != currency {
4482 let entry = fx_map
4483 .entry(payment.currency.clone())
4484 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4485 entry.0 += payment.amount;
4486 if payment.header.document_date > entry.1 {
4488 entry.1 = payment.header.document_date;
4489 }
4490 }
4491 }
4492 if !fx_map.is_empty() {
4493 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4494 .into_iter()
4495 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4496 datasynth_generators::treasury::FxExposure {
4497 currency_pair: format!("{}/{}", foreign_ccy, currency),
4498 foreign_currency: foreign_ccy,
4499 net_amount,
4500 settlement_date,
4501 description: "AP payment FX exposure".to_string(),
4502 }
4503 })
4504 .collect();
4505 let (fx_instruments, fx_relationships) =
4506 hedge_gen.generate(start_date, &fx_exposures);
4507 snapshot.hedging_instruments.extend(fx_instruments);
4508 snapshot.hedge_relationships.extend(fx_relationships);
4509 }
4510 }
4511
4512 if self.config.treasury.anomaly_rate > 0.0 {
4514 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4515 seed + 92,
4516 self.config.treasury.anomaly_rate,
4517 );
4518 let mut labels = Vec::new();
4519 labels.extend(
4520 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4521 );
4522 snapshot.treasury_anomaly_labels = labels;
4523 }
4524
4525 if self.config.treasury.cash_positioning.enabled {
4527 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4528
4529 for payment in &document_flows.payments {
4531 cash_flows.push(datasynth_generators::treasury::CashFlow {
4532 date: payment.header.document_date,
4533 account_id: format!("{}-MAIN", entity_id),
4534 amount: payment.amount,
4535 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4536 });
4537 }
4538
4539 for chain in &document_flows.o2c_chains {
4541 if let Some(ref receipt) = chain.customer_receipt {
4542 cash_flows.push(datasynth_generators::treasury::CashFlow {
4543 date: receipt.header.document_date,
4544 account_id: format!("{}-MAIN", entity_id),
4545 amount: receipt.amount,
4546 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4547 });
4548 }
4549 }
4550
4551 if !cash_flows.is_empty() {
4552 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4553 self.config.treasury.cash_positioning.clone(),
4554 seed + 93,
4555 );
4556 let account_id = format!("{}-MAIN", entity_id);
4557 snapshot.cash_positions = cash_gen.generate(
4558 entity_id,
4559 &account_id,
4560 currency,
4561 &cash_flows,
4562 start_date,
4563 start_date + chrono::Months::new(self.config.global.period_months),
4564 rust_decimal::Decimal::new(1_000_000, 0), );
4566 }
4567 }
4568
4569 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4570 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4571 stats.cash_position_count = snapshot.cash_positions.len();
4572
4573 info!(
4574 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4575 snapshot.debt_instruments.len(),
4576 snapshot.hedging_instruments.len(),
4577 snapshot.cash_positions.len()
4578 );
4579 self.check_resources_with_log("post-treasury")?;
4580
4581 Ok(snapshot)
4582 }
4583
4584 fn phase_project_accounting(
4586 &mut self,
4587 document_flows: &DocumentFlowSnapshot,
4588 hr: &HrSnapshot,
4589 stats: &mut EnhancedGenerationStatistics,
4590 ) -> SynthResult<ProjectAccountingSnapshot> {
4591 if !self.config.project_accounting.enabled {
4592 debug!("Phase 23: Skipped (project accounting disabled)");
4593 return Ok(ProjectAccountingSnapshot::default());
4594 }
4595 info!("Phase 23: Generating Project Accounting Data");
4596
4597 let seed = self.seed;
4598 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4599 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4600 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4601 let company_code = self
4602 .config
4603 .companies
4604 .first()
4605 .map(|c| c.code.as_str())
4606 .unwrap_or("1000");
4607
4608 let mut snapshot = ProjectAccountingSnapshot::default();
4609
4610 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4612 self.config.project_accounting.clone(),
4613 seed + 95,
4614 );
4615 let pool = project_gen.generate(company_code, start_date, end_date);
4616 snapshot.projects = pool.projects.clone();
4617
4618 {
4620 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4621 Vec::new();
4622
4623 for te in &hr.time_entries {
4625 let total_hours = te.hours_regular + te.hours_overtime;
4626 if total_hours > 0.0 {
4627 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4628 id: te.entry_id.clone(),
4629 entity_id: company_code.to_string(),
4630 date: te.date,
4631 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4632 .unwrap_or(rust_decimal::Decimal::ZERO),
4633 source_type: CostSourceType::TimeEntry,
4634 hours: Some(
4635 rust_decimal::Decimal::from_f64_retain(total_hours)
4636 .unwrap_or(rust_decimal::Decimal::ZERO),
4637 ),
4638 });
4639 }
4640 }
4641
4642 for er in &hr.expense_reports {
4644 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4645 id: er.report_id.clone(),
4646 entity_id: company_code.to_string(),
4647 date: er.submission_date,
4648 amount: er.total_amount,
4649 source_type: CostSourceType::ExpenseReport,
4650 hours: None,
4651 });
4652 }
4653
4654 for po in &document_flows.purchase_orders {
4656 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4657 id: po.header.document_id.clone(),
4658 entity_id: company_code.to_string(),
4659 date: po.header.document_date,
4660 amount: po.total_net_amount,
4661 source_type: CostSourceType::PurchaseOrder,
4662 hours: None,
4663 });
4664 }
4665
4666 for vi in &document_flows.vendor_invoices {
4668 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4669 id: vi.header.document_id.clone(),
4670 entity_id: company_code.to_string(),
4671 date: vi.header.document_date,
4672 amount: vi.payable_amount,
4673 source_type: CostSourceType::VendorInvoice,
4674 hours: None,
4675 });
4676 }
4677
4678 if !source_docs.is_empty() && !pool.projects.is_empty() {
4679 let mut cost_gen =
4680 datasynth_generators::project_accounting::ProjectCostGenerator::new(
4681 self.config.project_accounting.cost_allocation.clone(),
4682 seed + 99,
4683 );
4684 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4685 }
4686 }
4687
4688 if self.config.project_accounting.change_orders.enabled {
4690 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4691 self.config.project_accounting.change_orders.clone(),
4692 seed + 96,
4693 );
4694 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4695 }
4696
4697 if self.config.project_accounting.milestones.enabled {
4699 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4700 self.config.project_accounting.milestones.clone(),
4701 seed + 97,
4702 );
4703 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4704 }
4705
4706 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4708 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4709 self.config.project_accounting.earned_value.clone(),
4710 seed + 98,
4711 );
4712 snapshot.earned_value_metrics =
4713 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4714 }
4715
4716 stats.project_count = snapshot.projects.len();
4717 stats.project_change_order_count = snapshot.change_orders.len();
4718 stats.project_cost_line_count = snapshot.cost_lines.len();
4719
4720 info!(
4721 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4722 snapshot.projects.len(),
4723 snapshot.change_orders.len(),
4724 snapshot.milestones.len(),
4725 snapshot.earned_value_metrics.len()
4726 );
4727 self.check_resources_with_log("post-project-accounting")?;
4728
4729 Ok(snapshot)
4730 }
4731
4732 fn phase_opening_balances(
4734 &mut self,
4735 coa: &Arc<ChartOfAccounts>,
4736 stats: &mut EnhancedGenerationStatistics,
4737 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4738 if !self.config.balance.generate_opening_balances {
4739 debug!("Phase 3b: Skipped (opening balance generation disabled)");
4740 return Ok(Vec::new());
4741 }
4742 info!("Phase 3b: Generating Opening Balances");
4743
4744 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4745 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4746 let fiscal_year = start_date.year();
4747
4748 let industry = match self.config.global.industry {
4749 IndustrySector::Manufacturing => IndustryType::Manufacturing,
4750 IndustrySector::Retail => IndustryType::Retail,
4751 IndustrySector::FinancialServices => IndustryType::Financial,
4752 IndustrySector::Healthcare => IndustryType::Healthcare,
4753 IndustrySector::Technology => IndustryType::Technology,
4754 _ => IndustryType::Manufacturing,
4755 };
4756
4757 let config = datasynth_generators::OpeningBalanceConfig {
4758 industry,
4759 ..Default::default()
4760 };
4761 let mut gen =
4762 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4763
4764 let mut results = Vec::new();
4765 for company in &self.config.companies {
4766 let spec = OpeningBalanceSpec::new(
4767 company.code.clone(),
4768 start_date,
4769 fiscal_year,
4770 company.currency.clone(),
4771 rust_decimal::Decimal::new(10_000_000, 0),
4772 industry,
4773 );
4774 let ob = gen.generate(&spec, coa, start_date, &company.code);
4775 results.push(ob);
4776 }
4777
4778 stats.opening_balance_count = results.len();
4779 info!("Opening balances generated: {} companies", results.len());
4780 self.check_resources_with_log("post-opening-balances")?;
4781
4782 Ok(results)
4783 }
4784
4785 fn phase_subledger_reconciliation(
4787 &mut self,
4788 subledger: &SubledgerSnapshot,
4789 entries: &[JournalEntry],
4790 stats: &mut EnhancedGenerationStatistics,
4791 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4792 if !self.config.balance.reconcile_subledgers {
4793 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4794 return Ok(Vec::new());
4795 }
4796 info!("Phase 9b: Reconciling GL to subledger balances");
4797
4798 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4799 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4800 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4801
4802 let tracker_config = BalanceTrackerConfig {
4804 validate_on_each_entry: false,
4805 track_history: false,
4806 fail_on_validation_error: false,
4807 ..Default::default()
4808 };
4809 let recon_currency = self
4810 .config
4811 .companies
4812 .first()
4813 .map(|c| c.currency.clone())
4814 .unwrap_or_else(|| "USD".to_string());
4815 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4816 let _ = tracker.apply_entries(entries);
4817
4818 let mut engine = datasynth_generators::ReconciliationEngine::new(
4819 datasynth_generators::ReconciliationConfig::default(),
4820 );
4821
4822 let mut results = Vec::new();
4823 let company_code = self
4824 .config
4825 .companies
4826 .first()
4827 .map(|c| c.code.as_str())
4828 .unwrap_or("1000");
4829
4830 if !subledger.ar_invoices.is_empty() {
4832 let gl_balance = tracker
4833 .get_account_balance(
4834 company_code,
4835 datasynth_core::accounts::control_accounts::AR_CONTROL,
4836 )
4837 .map(|b| b.closing_balance)
4838 .unwrap_or_default();
4839 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4840 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4841 }
4842
4843 if !subledger.ap_invoices.is_empty() {
4845 let gl_balance = tracker
4846 .get_account_balance(
4847 company_code,
4848 datasynth_core::accounts::control_accounts::AP_CONTROL,
4849 )
4850 .map(|b| b.closing_balance)
4851 .unwrap_or_default();
4852 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4853 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4854 }
4855
4856 if !subledger.fa_records.is_empty() {
4858 let gl_asset_balance = tracker
4859 .get_account_balance(
4860 company_code,
4861 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4862 )
4863 .map(|b| b.closing_balance)
4864 .unwrap_or_default();
4865 let gl_accum_depr_balance = tracker
4866 .get_account_balance(
4867 company_code,
4868 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4869 )
4870 .map(|b| b.closing_balance)
4871 .unwrap_or_default();
4872 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4873 subledger.fa_records.iter().collect();
4874 let (asset_recon, depr_recon) = engine.reconcile_fa(
4875 company_code,
4876 end_date,
4877 gl_asset_balance,
4878 gl_accum_depr_balance,
4879 &fa_refs,
4880 );
4881 results.push(asset_recon);
4882 results.push(depr_recon);
4883 }
4884
4885 if !subledger.inventory_positions.is_empty() {
4887 let gl_balance = tracker
4888 .get_account_balance(
4889 company_code,
4890 datasynth_core::accounts::control_accounts::INVENTORY,
4891 )
4892 .map(|b| b.closing_balance)
4893 .unwrap_or_default();
4894 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4895 subledger.inventory_positions.iter().collect();
4896 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4897 }
4898
4899 stats.subledger_reconciliation_count = results.len();
4900 info!(
4901 "Subledger reconciliation complete: {} reconciliations",
4902 results.len()
4903 );
4904 self.check_resources_with_log("post-subledger-reconciliation")?;
4905
4906 Ok(results)
4907 }
4908
4909 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4911 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4912
4913 let use_french_pcg = self.config.accounting_standards.enabled
4914 && matches!(
4915 self.config.accounting_standards.framework,
4916 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap)
4917 );
4918
4919 let mut gen = ChartOfAccountsGenerator::new(
4920 self.config.chart_of_accounts.complexity,
4921 self.config.global.industry,
4922 self.seed,
4923 )
4924 .with_french_pcg(use_french_pcg);
4925
4926 let coa = Arc::new(gen.generate());
4927 self.coa = Some(Arc::clone(&coa));
4928
4929 if let Some(pb) = pb {
4930 pb.finish_with_message("Chart of Accounts complete");
4931 }
4932
4933 Ok(coa)
4934 }
4935
4936 fn generate_master_data(&mut self) -> SynthResult<()> {
4938 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4939 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4940 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4941
4942 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
4944
4945 let pack = self.primary_pack().clone();
4947
4948 for (i, company) in self.config.companies.iter().enumerate() {
4949 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4950
4951 let mut vendor_gen = VendorGenerator::new(company_seed);
4953 vendor_gen.set_country_pack(pack.clone());
4954 let vendor_pool = vendor_gen.generate_vendor_pool(
4955 self.phase_config.vendors_per_company,
4956 &company.code,
4957 start_date,
4958 );
4959 self.master_data.vendors.extend(vendor_pool.vendors);
4960 if let Some(pb) = &pb {
4961 pb.inc(1);
4962 }
4963
4964 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
4966 customer_gen.set_country_pack(pack.clone());
4967 let customer_pool = customer_gen.generate_customer_pool(
4968 self.phase_config.customers_per_company,
4969 &company.code,
4970 start_date,
4971 );
4972 self.master_data.customers.extend(customer_pool.customers);
4973 if let Some(pb) = &pb {
4974 pb.inc(1);
4975 }
4976
4977 let mut material_gen = MaterialGenerator::new(company_seed + 200);
4979 material_gen.set_country_pack(pack.clone());
4980 let material_pool = material_gen.generate_material_pool(
4981 self.phase_config.materials_per_company,
4982 &company.code,
4983 start_date,
4984 );
4985 self.master_data.materials.extend(material_pool.materials);
4986 if let Some(pb) = &pb {
4987 pb.inc(1);
4988 }
4989
4990 let mut asset_gen = AssetGenerator::new(company_seed + 300);
4992 let asset_pool = asset_gen.generate_asset_pool(
4993 self.phase_config.assets_per_company,
4994 &company.code,
4995 (start_date, end_date),
4996 );
4997 self.master_data.assets.extend(asset_pool.assets);
4998 if let Some(pb) = &pb {
4999 pb.inc(1);
5000 }
5001
5002 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5004 let employee_pool =
5005 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5006 self.master_data.employees.extend(employee_pool.employees);
5007 if let Some(pb) = &pb {
5008 pb.inc(1);
5009 }
5010 }
5011
5012 if let Some(pb) = pb {
5013 pb.finish_with_message("Master data generation complete");
5014 }
5015
5016 Ok(())
5017 }
5018
5019 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5021 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5022 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5023
5024 let p2p_count = self
5026 .phase_config
5027 .p2p_chains
5028 .min(self.master_data.vendors.len() * 2);
5029 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5030
5031 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5033 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5034 p2p_gen.set_country_pack(self.primary_pack().clone());
5035
5036 for i in 0..p2p_count {
5037 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5038 let materials: Vec<&Material> = self
5039 .master_data
5040 .materials
5041 .iter()
5042 .skip(i % self.master_data.materials.len().max(1))
5043 .take(2.min(self.master_data.materials.len()))
5044 .collect();
5045
5046 if materials.is_empty() {
5047 continue;
5048 }
5049
5050 let company = &self.config.companies[i % self.config.companies.len()];
5051 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5052 let fiscal_period = po_date.month() as u8;
5053 let created_by = if self.master_data.employees.is_empty() {
5054 "SYSTEM"
5055 } else {
5056 self.master_data.employees[i % self.master_data.employees.len()]
5057 .user_id
5058 .as_str()
5059 };
5060
5061 let chain = p2p_gen.generate_chain(
5062 &company.code,
5063 vendor,
5064 &materials,
5065 po_date,
5066 start_date.year() as u16,
5067 fiscal_period,
5068 created_by,
5069 );
5070
5071 flows.purchase_orders.push(chain.purchase_order.clone());
5073 flows.goods_receipts.extend(chain.goods_receipts.clone());
5074 if let Some(vi) = &chain.vendor_invoice {
5075 flows.vendor_invoices.push(vi.clone());
5076 }
5077 if let Some(payment) = &chain.payment {
5078 flows.payments.push(payment.clone());
5079 }
5080 flows.p2p_chains.push(chain);
5081
5082 if let Some(pb) = &pb {
5083 pb.inc(1);
5084 }
5085 }
5086
5087 if let Some(pb) = pb {
5088 pb.finish_with_message("P2P document flows complete");
5089 }
5090
5091 let o2c_count = self
5093 .phase_config
5094 .o2c_chains
5095 .min(self.master_data.customers.len() * 2);
5096 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5097
5098 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5100 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5101 o2c_gen.set_country_pack(self.primary_pack().clone());
5102
5103 for i in 0..o2c_count {
5104 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5105 let materials: Vec<&Material> = self
5106 .master_data
5107 .materials
5108 .iter()
5109 .skip(i % self.master_data.materials.len().max(1))
5110 .take(2.min(self.master_data.materials.len()))
5111 .collect();
5112
5113 if materials.is_empty() {
5114 continue;
5115 }
5116
5117 let company = &self.config.companies[i % self.config.companies.len()];
5118 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5119 let fiscal_period = so_date.month() as u8;
5120 let created_by = if self.master_data.employees.is_empty() {
5121 "SYSTEM"
5122 } else {
5123 self.master_data.employees[i % self.master_data.employees.len()]
5124 .user_id
5125 .as_str()
5126 };
5127
5128 let chain = o2c_gen.generate_chain(
5129 &company.code,
5130 customer,
5131 &materials,
5132 so_date,
5133 start_date.year() as u16,
5134 fiscal_period,
5135 created_by,
5136 );
5137
5138 flows.sales_orders.push(chain.sales_order.clone());
5140 flows.deliveries.extend(chain.deliveries.clone());
5141 if let Some(ci) = &chain.customer_invoice {
5142 flows.customer_invoices.push(ci.clone());
5143 }
5144 if let Some(receipt) = &chain.customer_receipt {
5145 flows.payments.push(receipt.clone());
5146 }
5147 flows.o2c_chains.push(chain);
5148
5149 if let Some(pb) = &pb {
5150 pb.inc(1);
5151 }
5152 }
5153
5154 if let Some(pb) = pb {
5155 pb.finish_with_message("O2C document flows complete");
5156 }
5157
5158 Ok(())
5159 }
5160
5161 fn generate_journal_entries(
5163 &mut self,
5164 coa: &Arc<ChartOfAccounts>,
5165 ) -> SynthResult<Vec<JournalEntry>> {
5166 let total = self.calculate_total_transactions();
5167 let pb = self.create_progress_bar(total, "Generating Journal Entries");
5168
5169 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5170 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5171 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5172
5173 let company_codes: Vec<String> = self
5174 .config
5175 .companies
5176 .iter()
5177 .map(|c| c.code.clone())
5178 .collect();
5179
5180 let generator = JournalEntryGenerator::new_with_params(
5181 self.config.transactions.clone(),
5182 Arc::clone(coa),
5183 company_codes,
5184 start_date,
5185 end_date,
5186 self.seed,
5187 );
5188
5189 let je_pack = self.primary_pack();
5193
5194 let mut generator = generator
5195 .with_master_data(
5196 &self.master_data.vendors,
5197 &self.master_data.customers,
5198 &self.master_data.materials,
5199 )
5200 .with_country_pack_names(je_pack)
5201 .with_country_pack_temporal(
5202 self.config.temporal_patterns.clone(),
5203 self.seed + 200,
5204 je_pack,
5205 )
5206 .with_persona_errors(true)
5207 .with_fraud_config(self.config.fraud.clone());
5208
5209 if self.config.temporal.enabled {
5211 let drift_config = self.config.temporal.to_core_config();
5212 generator = generator.with_drift_config(drift_config, self.seed + 100);
5213 }
5214
5215 let mut entries = Vec::with_capacity(total as usize);
5216
5217 self.check_memory_limit()?;
5219
5220 const MEMORY_CHECK_INTERVAL: u64 = 1000;
5222
5223 for i in 0..total {
5224 let entry = generator.generate();
5225 entries.push(entry);
5226 if let Some(pb) = &pb {
5227 pb.inc(1);
5228 }
5229
5230 if (i + 1) % MEMORY_CHECK_INTERVAL == 0 {
5232 self.check_memory_limit()?;
5233 }
5234 }
5235
5236 if let Some(pb) = pb {
5237 pb.finish_with_message("Journal entries complete");
5238 }
5239
5240 Ok(entries)
5241 }
5242
5243 fn generate_jes_from_document_flows(
5248 &mut self,
5249 flows: &DocumentFlowSnapshot,
5250 ) -> SynthResult<Vec<JournalEntry>> {
5251 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5252 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5253
5254 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
5255 DocumentFlowJeConfig::default(),
5256 self.seed,
5257 );
5258 let mut entries = Vec::new();
5259
5260 for chain in &flows.p2p_chains {
5262 let chain_entries = generator.generate_from_p2p_chain(chain);
5263 entries.extend(chain_entries);
5264 if let Some(pb) = &pb {
5265 pb.inc(1);
5266 }
5267 }
5268
5269 for chain in &flows.o2c_chains {
5271 let chain_entries = generator.generate_from_o2c_chain(chain);
5272 entries.extend(chain_entries);
5273 if let Some(pb) = &pb {
5274 pb.inc(1);
5275 }
5276 }
5277
5278 if let Some(pb) = pb {
5279 pb.finish_with_message(format!(
5280 "Generated {} JEs from document flows",
5281 entries.len()
5282 ));
5283 }
5284
5285 Ok(entries)
5286 }
5287
5288 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5294 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5295
5296 let mut jes = Vec::with_capacity(payroll_runs.len());
5297
5298 for run in payroll_runs {
5299 let mut je = JournalEntry::new_simple(
5300 format!("JE-PAYROLL-{}", run.payroll_id),
5301 run.company_code.clone(),
5302 run.run_date,
5303 format!("Payroll {}", run.payroll_id),
5304 );
5305
5306 je.add_line(JournalEntryLine {
5308 line_number: 1,
5309 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5310 debit_amount: run.total_gross,
5311 reference: Some(run.payroll_id.clone()),
5312 text: Some(format!(
5313 "Payroll {} ({} employees)",
5314 run.payroll_id, run.employee_count
5315 )),
5316 ..Default::default()
5317 });
5318
5319 je.add_line(JournalEntryLine {
5321 line_number: 2,
5322 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5323 credit_amount: run.total_gross,
5324 reference: Some(run.payroll_id.clone()),
5325 ..Default::default()
5326 });
5327
5328 jes.push(je);
5329 }
5330
5331 jes
5332 }
5333
5334 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5340 use datasynth_core::accounts::{control_accounts, expense_accounts};
5341 use datasynth_core::models::ProductionOrderStatus;
5342
5343 let mut jes = Vec::new();
5344
5345 for order in production_orders {
5346 if !matches!(
5348 order.status,
5349 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5350 ) {
5351 continue;
5352 }
5353
5354 let mut je = JournalEntry::new_simple(
5355 format!("JE-MFG-{}", order.order_id),
5356 order.company_code.clone(),
5357 order.actual_end.unwrap_or(order.planned_end),
5358 format!(
5359 "Production Order {} - {}",
5360 order.order_id, order.material_description
5361 ),
5362 );
5363
5364 je.add_line(JournalEntryLine {
5366 line_number: 1,
5367 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5368 debit_amount: order.actual_cost,
5369 reference: Some(order.order_id.clone()),
5370 text: Some(format!(
5371 "Material consumption for {}",
5372 order.material_description
5373 )),
5374 quantity: Some(order.actual_quantity),
5375 unit: Some("EA".to_string()),
5376 ..Default::default()
5377 });
5378
5379 je.add_line(JournalEntryLine {
5381 line_number: 2,
5382 gl_account: control_accounts::INVENTORY.to_string(),
5383 credit_amount: order.actual_cost,
5384 reference: Some(order.order_id.clone()),
5385 ..Default::default()
5386 });
5387
5388 jes.push(je);
5389 }
5390
5391 jes
5392 }
5393
5394 fn link_document_flows_to_subledgers(
5399 &mut self,
5400 flows: &DocumentFlowSnapshot,
5401 ) -> SynthResult<SubledgerSnapshot> {
5402 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5403 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5404
5405 let vendor_names: std::collections::HashMap<String, String> = self
5407 .master_data
5408 .vendors
5409 .iter()
5410 .map(|v| (v.vendor_id.clone(), v.name.clone()))
5411 .collect();
5412 let customer_names: std::collections::HashMap<String, String> = self
5413 .master_data
5414 .customers
5415 .iter()
5416 .map(|c| (c.customer_id.clone(), c.name.clone()))
5417 .collect();
5418
5419 let mut linker = DocumentFlowLinker::new()
5420 .with_vendor_names(vendor_names)
5421 .with_customer_names(customer_names);
5422
5423 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5425 if let Some(pb) = &pb {
5426 pb.inc(flows.vendor_invoices.len() as u64);
5427 }
5428
5429 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5431 if let Some(pb) = &pb {
5432 pb.inc(flows.customer_invoices.len() as u64);
5433 }
5434
5435 if let Some(pb) = pb {
5436 pb.finish_with_message(format!(
5437 "Linked {} AP and {} AR invoices",
5438 ap_invoices.len(),
5439 ar_invoices.len()
5440 ));
5441 }
5442
5443 Ok(SubledgerSnapshot {
5444 ap_invoices,
5445 ar_invoices,
5446 fa_records: Vec::new(),
5447 inventory_positions: Vec::new(),
5448 inventory_movements: Vec::new(),
5449 })
5450 }
5451
5452 #[allow(clippy::too_many_arguments)]
5457 fn generate_ocpm_events(
5458 &mut self,
5459 flows: &DocumentFlowSnapshot,
5460 sourcing: &SourcingSnapshot,
5461 hr: &HrSnapshot,
5462 manufacturing: &ManufacturingSnapshot,
5463 banking: &BankingSnapshot,
5464 audit: &AuditSnapshot,
5465 financial_reporting: &FinancialReportingSnapshot,
5466 ) -> SynthResult<OcpmSnapshot> {
5467 let total_chains = flows.p2p_chains.len()
5468 + flows.o2c_chains.len()
5469 + sourcing.sourcing_projects.len()
5470 + hr.payroll_runs.len()
5471 + manufacturing.production_orders.len()
5472 + banking.customers.len()
5473 + audit.engagements.len()
5474 + financial_reporting.bank_reconciliations.len();
5475 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5476
5477 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5479 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5480
5481 let ocpm_config = OcpmGeneratorConfig {
5483 generate_p2p: true,
5484 generate_o2c: true,
5485 generate_s2c: !sourcing.sourcing_projects.is_empty(),
5486 generate_h2r: !hr.payroll_runs.is_empty(),
5487 generate_mfg: !manufacturing.production_orders.is_empty(),
5488 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5489 generate_bank: !banking.customers.is_empty(),
5490 generate_audit: !audit.engagements.is_empty(),
5491 happy_path_rate: 0.75,
5492 exception_path_rate: 0.20,
5493 error_path_rate: 0.05,
5494 add_duration_variability: true,
5495 duration_std_dev_factor: 0.3,
5496 };
5497 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5498 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5499
5500 let available_users: Vec<String> = self
5502 .master_data
5503 .employees
5504 .iter()
5505 .take(20)
5506 .map(|e| e.user_id.clone())
5507 .collect();
5508
5509 let fallback_date =
5511 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5512 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5513 .unwrap_or(fallback_date);
5514 let base_midnight = base_date
5515 .and_hms_opt(0, 0, 0)
5516 .expect("midnight is always valid");
5517 let base_datetime =
5518 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5519
5520 let add_result = |event_log: &mut OcpmEventLog,
5522 result: datasynth_ocpm::CaseGenerationResult| {
5523 for event in result.events {
5524 event_log.add_event(event);
5525 }
5526 for object in result.objects {
5527 event_log.add_object(object);
5528 }
5529 for relationship in result.relationships {
5530 event_log.add_relationship(relationship);
5531 }
5532 event_log.add_case(result.case_trace);
5533 };
5534
5535 for chain in &flows.p2p_chains {
5537 let po = &chain.purchase_order;
5538 let documents = P2pDocuments::new(
5539 &po.header.document_id,
5540 &po.vendor_id,
5541 &po.header.company_code,
5542 po.total_net_amount,
5543 &po.header.currency,
5544 &ocpm_uuid_factory,
5545 )
5546 .with_goods_receipt(
5547 chain
5548 .goods_receipts
5549 .first()
5550 .map(|gr| gr.header.document_id.as_str())
5551 .unwrap_or(""),
5552 &ocpm_uuid_factory,
5553 )
5554 .with_invoice(
5555 chain
5556 .vendor_invoice
5557 .as_ref()
5558 .map(|vi| vi.header.document_id.as_str())
5559 .unwrap_or(""),
5560 &ocpm_uuid_factory,
5561 )
5562 .with_payment(
5563 chain
5564 .payment
5565 .as_ref()
5566 .map(|p| p.header.document_id.as_str())
5567 .unwrap_or(""),
5568 &ocpm_uuid_factory,
5569 );
5570
5571 let start_time =
5572 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5573 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5574 add_result(&mut event_log, result);
5575
5576 if let Some(pb) = &pb {
5577 pb.inc(1);
5578 }
5579 }
5580
5581 for chain in &flows.o2c_chains {
5583 let so = &chain.sales_order;
5584 let documents = O2cDocuments::new(
5585 &so.header.document_id,
5586 &so.customer_id,
5587 &so.header.company_code,
5588 so.total_net_amount,
5589 &so.header.currency,
5590 &ocpm_uuid_factory,
5591 )
5592 .with_delivery(
5593 chain
5594 .deliveries
5595 .first()
5596 .map(|d| d.header.document_id.as_str())
5597 .unwrap_or(""),
5598 &ocpm_uuid_factory,
5599 )
5600 .with_invoice(
5601 chain
5602 .customer_invoice
5603 .as_ref()
5604 .map(|ci| ci.header.document_id.as_str())
5605 .unwrap_or(""),
5606 &ocpm_uuid_factory,
5607 )
5608 .with_receipt(
5609 chain
5610 .customer_receipt
5611 .as_ref()
5612 .map(|r| r.header.document_id.as_str())
5613 .unwrap_or(""),
5614 &ocpm_uuid_factory,
5615 );
5616
5617 let start_time =
5618 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5619 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5620 add_result(&mut event_log, result);
5621
5622 if let Some(pb) = &pb {
5623 pb.inc(1);
5624 }
5625 }
5626
5627 for project in &sourcing.sourcing_projects {
5629 let vendor_id = sourcing
5631 .contracts
5632 .iter()
5633 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5634 .map(|c| c.vendor_id.clone())
5635 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5636 .or_else(|| {
5637 self.master_data
5638 .vendors
5639 .first()
5640 .map(|v| v.vendor_id.clone())
5641 })
5642 .unwrap_or_else(|| "V000".to_string());
5643 let mut docs = S2cDocuments::new(
5644 &project.project_id,
5645 &vendor_id,
5646 &project.company_code,
5647 project.estimated_annual_spend,
5648 &ocpm_uuid_factory,
5649 );
5650 if let Some(rfx) = sourcing
5652 .rfx_events
5653 .iter()
5654 .find(|r| r.sourcing_project_id == project.project_id)
5655 {
5656 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5657 if let Some(bid) = sourcing.bids.iter().find(|b| {
5659 b.rfx_id == rfx.rfx_id
5660 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5661 }) {
5662 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5663 }
5664 }
5665 if let Some(contract) = sourcing
5667 .contracts
5668 .iter()
5669 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5670 {
5671 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5672 }
5673 let start_time = base_datetime - chrono::Duration::days(90);
5674 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5675 add_result(&mut event_log, result);
5676
5677 if let Some(pb) = &pb {
5678 pb.inc(1);
5679 }
5680 }
5681
5682 for run in &hr.payroll_runs {
5684 let employee_id = hr
5686 .payroll_line_items
5687 .iter()
5688 .find(|li| li.payroll_id == run.payroll_id)
5689 .map(|li| li.employee_id.as_str())
5690 .unwrap_or("EMP000");
5691 let docs = H2rDocuments::new(
5692 &run.payroll_id,
5693 employee_id,
5694 &run.company_code,
5695 run.total_gross,
5696 &ocpm_uuid_factory,
5697 )
5698 .with_time_entries(
5699 hr.time_entries
5700 .iter()
5701 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5702 .take(5)
5703 .map(|t| t.entry_id.as_str())
5704 .collect(),
5705 );
5706 let start_time = base_datetime - chrono::Duration::days(30);
5707 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5708 add_result(&mut event_log, result);
5709
5710 if let Some(pb) = &pb {
5711 pb.inc(1);
5712 }
5713 }
5714
5715 for order in &manufacturing.production_orders {
5717 let mut docs = MfgDocuments::new(
5718 &order.order_id,
5719 &order.material_id,
5720 &order.company_code,
5721 order.planned_quantity,
5722 &ocpm_uuid_factory,
5723 )
5724 .with_operations(
5725 order
5726 .operations
5727 .iter()
5728 .map(|o| format!("OP-{:04}", o.operation_number))
5729 .collect::<Vec<_>>()
5730 .iter()
5731 .map(|s| s.as_str())
5732 .collect(),
5733 );
5734 if let Some(insp) = manufacturing
5736 .quality_inspections
5737 .iter()
5738 .find(|i| i.reference_id == order.order_id)
5739 {
5740 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5741 }
5742 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5744 cc.items
5745 .iter()
5746 .any(|item| item.material_id == order.material_id)
5747 }) {
5748 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5749 }
5750 let start_time = base_datetime - chrono::Duration::days(60);
5751 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5752 add_result(&mut event_log, result);
5753
5754 if let Some(pb) = &pb {
5755 pb.inc(1);
5756 }
5757 }
5758
5759 for customer in &banking.customers {
5761 let customer_id_str = customer.customer_id.to_string();
5762 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5763 if let Some(account) = banking
5765 .accounts
5766 .iter()
5767 .find(|a| a.primary_owner_id == customer.customer_id)
5768 {
5769 let account_id_str = account.account_id.to_string();
5770 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5771 let txn_strs: Vec<String> = banking
5773 .transactions
5774 .iter()
5775 .filter(|t| t.account_id == account.account_id)
5776 .take(10)
5777 .map(|t| t.transaction_id.to_string())
5778 .collect();
5779 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5780 let txn_amounts: Vec<rust_decimal::Decimal> = banking
5781 .transactions
5782 .iter()
5783 .filter(|t| t.account_id == account.account_id)
5784 .take(10)
5785 .map(|t| t.amount)
5786 .collect();
5787 if !txn_ids.is_empty() {
5788 docs = docs.with_transactions(txn_ids, txn_amounts);
5789 }
5790 }
5791 let start_time = base_datetime - chrono::Duration::days(180);
5792 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5793 add_result(&mut event_log, result);
5794
5795 if let Some(pb) = &pb {
5796 pb.inc(1);
5797 }
5798 }
5799
5800 for engagement in &audit.engagements {
5802 let engagement_id_str = engagement.engagement_id.to_string();
5803 let docs = AuditDocuments::new(
5804 &engagement_id_str,
5805 &engagement.client_entity_id,
5806 &ocpm_uuid_factory,
5807 )
5808 .with_workpapers(
5809 audit
5810 .workpapers
5811 .iter()
5812 .filter(|w| w.engagement_id == engagement.engagement_id)
5813 .take(10)
5814 .map(|w| w.workpaper_id.to_string())
5815 .collect::<Vec<_>>()
5816 .iter()
5817 .map(|s| s.as_str())
5818 .collect(),
5819 )
5820 .with_evidence(
5821 audit
5822 .evidence
5823 .iter()
5824 .filter(|e| e.engagement_id == engagement.engagement_id)
5825 .take(10)
5826 .map(|e| e.evidence_id.to_string())
5827 .collect::<Vec<_>>()
5828 .iter()
5829 .map(|s| s.as_str())
5830 .collect(),
5831 )
5832 .with_risks(
5833 audit
5834 .risk_assessments
5835 .iter()
5836 .filter(|r| r.engagement_id == engagement.engagement_id)
5837 .take(5)
5838 .map(|r| r.risk_id.to_string())
5839 .collect::<Vec<_>>()
5840 .iter()
5841 .map(|s| s.as_str())
5842 .collect(),
5843 )
5844 .with_findings(
5845 audit
5846 .findings
5847 .iter()
5848 .filter(|f| f.engagement_id == engagement.engagement_id)
5849 .take(5)
5850 .map(|f| f.finding_id.to_string())
5851 .collect::<Vec<_>>()
5852 .iter()
5853 .map(|s| s.as_str())
5854 .collect(),
5855 )
5856 .with_judgments(
5857 audit
5858 .judgments
5859 .iter()
5860 .filter(|j| j.engagement_id == engagement.engagement_id)
5861 .take(5)
5862 .map(|j| j.judgment_id.to_string())
5863 .collect::<Vec<_>>()
5864 .iter()
5865 .map(|s| s.as_str())
5866 .collect(),
5867 );
5868 let start_time = base_datetime - chrono::Duration::days(120);
5869 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5870 add_result(&mut event_log, result);
5871
5872 if let Some(pb) = &pb {
5873 pb.inc(1);
5874 }
5875 }
5876
5877 for recon in &financial_reporting.bank_reconciliations {
5879 let docs = BankReconDocuments::new(
5880 &recon.reconciliation_id,
5881 &recon.bank_account_id,
5882 &recon.company_code,
5883 recon.bank_ending_balance,
5884 &ocpm_uuid_factory,
5885 )
5886 .with_statement_lines(
5887 recon
5888 .statement_lines
5889 .iter()
5890 .take(20)
5891 .map(|l| l.line_id.as_str())
5892 .collect(),
5893 )
5894 .with_reconciling_items(
5895 recon
5896 .reconciling_items
5897 .iter()
5898 .take(10)
5899 .map(|i| i.item_id.as_str())
5900 .collect(),
5901 );
5902 let start_time = base_datetime - chrono::Duration::days(30);
5903 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
5904 add_result(&mut event_log, result);
5905
5906 if let Some(pb) = &pb {
5907 pb.inc(1);
5908 }
5909 }
5910
5911 event_log.compute_variants();
5913
5914 let summary = event_log.summary();
5915
5916 if let Some(pb) = pb {
5917 pb.finish_with_message(format!(
5918 "Generated {} OCPM events, {} objects",
5919 summary.event_count, summary.object_count
5920 ));
5921 }
5922
5923 Ok(OcpmSnapshot {
5924 event_count: summary.event_count,
5925 object_count: summary.object_count,
5926 case_count: summary.case_count,
5927 event_log: Some(event_log),
5928 })
5929 }
5930
5931 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
5933 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
5934
5935 let total_rate = if self.config.anomaly_injection.enabled {
5938 self.config.anomaly_injection.rates.total_rate
5939 } else if self.config.fraud.enabled {
5940 self.config.fraud.fraud_rate
5941 } else {
5942 0.02
5943 };
5944
5945 let fraud_rate = if self.config.anomaly_injection.enabled {
5946 self.config.anomaly_injection.rates.fraud_rate
5947 } else {
5948 AnomalyRateConfig::default().fraud_rate
5949 };
5950
5951 let error_rate = if self.config.anomaly_injection.enabled {
5952 self.config.anomaly_injection.rates.error_rate
5953 } else {
5954 AnomalyRateConfig::default().error_rate
5955 };
5956
5957 let process_issue_rate = if self.config.anomaly_injection.enabled {
5958 self.config.anomaly_injection.rates.process_rate
5959 } else {
5960 AnomalyRateConfig::default().process_issue_rate
5961 };
5962
5963 let anomaly_config = AnomalyInjectorConfig {
5964 rates: AnomalyRateConfig {
5965 total_rate,
5966 fraud_rate,
5967 error_rate,
5968 process_issue_rate,
5969 ..Default::default()
5970 },
5971 seed: self.seed + 5000,
5972 ..Default::default()
5973 };
5974
5975 let mut injector = AnomalyInjector::new(anomaly_config);
5976 let result = injector.process_entries(entries);
5977
5978 if let Some(pb) = &pb {
5979 pb.inc(entries.len() as u64);
5980 pb.finish_with_message("Anomaly injection complete");
5981 }
5982
5983 let mut by_type = HashMap::new();
5984 for label in &result.labels {
5985 *by_type
5986 .entry(format!("{:?}", label.anomaly_type))
5987 .or_insert(0) += 1;
5988 }
5989
5990 Ok(AnomalyLabels {
5991 labels: result.labels,
5992 summary: Some(result.summary),
5993 by_type,
5994 })
5995 }
5996
5997 fn validate_journal_entries(
6006 &mut self,
6007 entries: &[JournalEntry],
6008 ) -> SynthResult<BalanceValidationResult> {
6009 let clean_entries: Vec<&JournalEntry> = entries
6011 .iter()
6012 .filter(|e| {
6013 e.header
6014 .header_text
6015 .as_ref()
6016 .map(|t| !t.contains("[HUMAN_ERROR:"))
6017 .unwrap_or(true)
6018 })
6019 .collect();
6020
6021 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6022
6023 let config = BalanceTrackerConfig {
6025 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
6029 };
6030 let validation_currency = self
6031 .config
6032 .companies
6033 .first()
6034 .map(|c| c.currency.clone())
6035 .unwrap_or_else(|| "USD".to_string());
6036
6037 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6038
6039 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6041 let errors = tracker.apply_entries(&clean_refs);
6042
6043 if let Some(pb) = &pb {
6044 pb.inc(entries.len() as u64);
6045 }
6046
6047 let has_unbalanced = tracker
6050 .get_validation_errors()
6051 .iter()
6052 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6053
6054 let mut all_errors = errors;
6057 all_errors.extend(tracker.get_validation_errors().iter().cloned());
6058 let company_codes: Vec<String> = self
6059 .config
6060 .companies
6061 .iter()
6062 .map(|c| c.code.clone())
6063 .collect();
6064
6065 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6066 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6067 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6068
6069 for company_code in &company_codes {
6070 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6071 all_errors.push(e);
6072 }
6073 }
6074
6075 let stats = tracker.get_statistics();
6077
6078 let is_balanced = all_errors.is_empty();
6080
6081 if let Some(pb) = pb {
6082 let msg = if is_balanced {
6083 "Balance validation passed"
6084 } else {
6085 "Balance validation completed with errors"
6086 };
6087 pb.finish_with_message(msg);
6088 }
6089
6090 Ok(BalanceValidationResult {
6091 validated: true,
6092 is_balanced,
6093 entries_processed: stats.entries_processed,
6094 total_debits: stats.total_debits,
6095 total_credits: stats.total_credits,
6096 accounts_tracked: stats.accounts_tracked,
6097 companies_tracked: stats.companies_tracked,
6098 validation_errors: all_errors,
6099 has_unbalanced_entries: has_unbalanced,
6100 })
6101 }
6102
6103 fn inject_data_quality(
6108 &mut self,
6109 entries: &mut [JournalEntry],
6110 ) -> SynthResult<DataQualityStats> {
6111 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6112
6113 let config = if self.config.data_quality.enabled {
6116 let dq = &self.config.data_quality;
6117 DataQualityConfig {
6118 enable_missing_values: dq.missing_values.enabled,
6119 missing_values: datasynth_generators::MissingValueConfig {
6120 global_rate: dq.effective_missing_rate(),
6121 ..Default::default()
6122 },
6123 enable_format_variations: dq.format_variations.enabled,
6124 format_variations: datasynth_generators::FormatVariationConfig {
6125 date_variation_rate: dq.format_variations.dates.rate,
6126 amount_variation_rate: dq.format_variations.amounts.rate,
6127 identifier_variation_rate: dq.format_variations.identifiers.rate,
6128 ..Default::default()
6129 },
6130 enable_duplicates: dq.duplicates.enabled,
6131 duplicates: datasynth_generators::DuplicateConfig {
6132 duplicate_rate: dq.effective_duplicate_rate(),
6133 ..Default::default()
6134 },
6135 enable_typos: dq.typos.enabled,
6136 typos: datasynth_generators::TypoConfig {
6137 char_error_rate: dq.effective_typo_rate(),
6138 ..Default::default()
6139 },
6140 enable_encoding_issues: dq.encoding_issues.enabled,
6141 encoding_issue_rate: dq.encoding_issues.rate,
6142 seed: self.seed.wrapping_add(77), track_statistics: true,
6144 }
6145 } else {
6146 DataQualityConfig::minimal()
6147 };
6148 let mut injector = DataQualityInjector::new(config);
6149
6150 injector.set_country_pack(self.primary_pack().clone());
6152
6153 let context = HashMap::new();
6155
6156 for entry in entries.iter_mut() {
6157 if let Some(text) = &entry.header.header_text {
6159 let processed = injector.process_text_field(
6160 "header_text",
6161 text,
6162 &entry.header.document_id.to_string(),
6163 &context,
6164 );
6165 match processed {
6166 Some(new_text) if new_text != *text => {
6167 entry.header.header_text = Some(new_text);
6168 }
6169 None => {
6170 entry.header.header_text = None; }
6172 _ => {}
6173 }
6174 }
6175
6176 if let Some(ref_text) = &entry.header.reference {
6178 let processed = injector.process_text_field(
6179 "reference",
6180 ref_text,
6181 &entry.header.document_id.to_string(),
6182 &context,
6183 );
6184 match processed {
6185 Some(new_text) if new_text != *ref_text => {
6186 entry.header.reference = Some(new_text);
6187 }
6188 None => {
6189 entry.header.reference = None;
6190 }
6191 _ => {}
6192 }
6193 }
6194
6195 let user_persona = entry.header.user_persona.clone();
6197 if let Some(processed) = injector.process_text_field(
6198 "user_persona",
6199 &user_persona,
6200 &entry.header.document_id.to_string(),
6201 &context,
6202 ) {
6203 if processed != user_persona {
6204 entry.header.user_persona = processed;
6205 }
6206 }
6207
6208 for line in &mut entry.lines {
6210 if let Some(ref text) = line.line_text {
6212 let processed = injector.process_text_field(
6213 "line_text",
6214 text,
6215 &entry.header.document_id.to_string(),
6216 &context,
6217 );
6218 match processed {
6219 Some(new_text) if new_text != *text => {
6220 line.line_text = Some(new_text);
6221 }
6222 None => {
6223 line.line_text = None;
6224 }
6225 _ => {}
6226 }
6227 }
6228
6229 if let Some(cc) = &line.cost_center {
6231 let processed = injector.process_text_field(
6232 "cost_center",
6233 cc,
6234 &entry.header.document_id.to_string(),
6235 &context,
6236 );
6237 match processed {
6238 Some(new_cc) if new_cc != *cc => {
6239 line.cost_center = Some(new_cc);
6240 }
6241 None => {
6242 line.cost_center = None;
6243 }
6244 _ => {}
6245 }
6246 }
6247 }
6248
6249 if let Some(pb) = &pb {
6250 pb.inc(1);
6251 }
6252 }
6253
6254 if let Some(pb) = pb {
6255 pb.finish_with_message("Data quality injection complete");
6256 }
6257
6258 Ok(injector.stats().clone())
6259 }
6260
6261 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6272 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6273 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6274 let fiscal_year = start_date.year() as u16;
6275 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6276
6277 let total_revenue: rust_decimal::Decimal = entries
6279 .iter()
6280 .flat_map(|e| e.lines.iter())
6281 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6282 .map(|l| l.credit_amount)
6283 .sum();
6284
6285 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6287
6288 let mut snapshot = AuditSnapshot::default();
6289
6290 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6292 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6293 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6294 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6295 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6296 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6297
6298 let accounts: Vec<String> = self
6300 .coa
6301 .as_ref()
6302 .map(|coa| {
6303 coa.get_postable_accounts()
6304 .iter()
6305 .map(|acc| acc.account_code().to_string())
6306 .collect()
6307 })
6308 .unwrap_or_default();
6309
6310 for (i, company) in self.config.companies.iter().enumerate() {
6312 let company_revenue = total_revenue
6314 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6315
6316 let engagements_for_company =
6318 self.phase_config.audit_engagements / self.config.companies.len().max(1);
6319 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6320 1
6321 } else {
6322 0
6323 };
6324
6325 for _eng_idx in 0..(engagements_for_company + extra) {
6326 let mut engagement = engagement_gen.generate_engagement(
6328 &company.code,
6329 &company.name,
6330 fiscal_year,
6331 period_end,
6332 company_revenue,
6333 None, );
6335
6336 if !self.master_data.employees.is_empty() {
6338 let emp_count = self.master_data.employees.len();
6339 let base = (i * 10 + _eng_idx) % emp_count;
6341 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6342 .employee_id
6343 .clone();
6344 engagement.engagement_manager_id = self.master_data.employees
6345 [(base + 1) % emp_count]
6346 .employee_id
6347 .clone();
6348 let real_team: Vec<String> = engagement
6349 .team_member_ids
6350 .iter()
6351 .enumerate()
6352 .map(|(j, _)| {
6353 self.master_data.employees[(base + 2 + j) % emp_count]
6354 .employee_id
6355 .clone()
6356 })
6357 .collect();
6358 engagement.team_member_ids = real_team;
6359 }
6360
6361 if let Some(pb) = &pb {
6362 pb.inc(1);
6363 }
6364
6365 let team_members: Vec<String> = engagement.team_member_ids.clone();
6367
6368 let workpapers =
6370 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6371
6372 for wp in &workpapers {
6373 if let Some(pb) = &pb {
6374 pb.inc(1);
6375 }
6376
6377 let evidence = evidence_gen.generate_evidence_for_workpaper(
6379 wp,
6380 &team_members,
6381 wp.preparer_date,
6382 );
6383
6384 for _ in &evidence {
6385 if let Some(pb) = &pb {
6386 pb.inc(1);
6387 }
6388 }
6389
6390 snapshot.evidence.extend(evidence);
6391 }
6392
6393 let risks =
6395 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6396
6397 for _ in &risks {
6398 if let Some(pb) = &pb {
6399 pb.inc(1);
6400 }
6401 }
6402 snapshot.risk_assessments.extend(risks);
6403
6404 let findings = finding_gen.generate_findings_for_engagement(
6406 &engagement,
6407 &workpapers,
6408 &team_members,
6409 );
6410
6411 for _ in &findings {
6412 if let Some(pb) = &pb {
6413 pb.inc(1);
6414 }
6415 }
6416 snapshot.findings.extend(findings);
6417
6418 let judgments =
6420 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6421
6422 for _ in &judgments {
6423 if let Some(pb) = &pb {
6424 pb.inc(1);
6425 }
6426 }
6427 snapshot.judgments.extend(judgments);
6428
6429 snapshot.workpapers.extend(workpapers);
6431 snapshot.engagements.push(engagement);
6432 }
6433 }
6434
6435 if let Some(pb) = pb {
6436 pb.finish_with_message(format!(
6437 "Audit data: {} engagements, {} workpapers, {} evidence",
6438 snapshot.engagements.len(),
6439 snapshot.workpapers.len(),
6440 snapshot.evidence.len()
6441 ));
6442 }
6443
6444 Ok(snapshot)
6445 }
6446
6447 fn export_graphs(
6454 &mut self,
6455 entries: &[JournalEntry],
6456 _coa: &Arc<ChartOfAccounts>,
6457 stats: &mut EnhancedGenerationStatistics,
6458 ) -> SynthResult<GraphExportSnapshot> {
6459 let pb = self.create_progress_bar(100, "Exporting Graphs");
6460
6461 let mut snapshot = GraphExportSnapshot::default();
6462
6463 let output_dir = self
6465 .output_path
6466 .clone()
6467 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6468 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6469
6470 for graph_type in &self.config.graph_export.graph_types {
6472 if let Some(pb) = &pb {
6473 pb.inc(10);
6474 }
6475
6476 let graph_config = TransactionGraphConfig {
6478 include_vendors: false,
6479 include_customers: false,
6480 create_debit_credit_edges: true,
6481 include_document_nodes: graph_type.include_document_nodes,
6482 min_edge_weight: graph_type.min_edge_weight,
6483 aggregate_parallel_edges: graph_type.aggregate_edges,
6484 };
6485
6486 let mut builder = TransactionGraphBuilder::new(graph_config);
6487 builder.add_journal_entries(entries);
6488 let graph = builder.build();
6489
6490 stats.graph_node_count += graph.node_count();
6492 stats.graph_edge_count += graph.edge_count();
6493
6494 if let Some(pb) = &pb {
6495 pb.inc(40);
6496 }
6497
6498 for format in &self.config.graph_export.formats {
6500 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6501
6502 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6504 warn!("Failed to create graph output directory: {}", e);
6505 continue;
6506 }
6507
6508 match format {
6509 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6510 let pyg_config = PyGExportConfig {
6511 common: datasynth_graph::CommonExportConfig {
6512 export_node_features: true,
6513 export_edge_features: true,
6514 export_node_labels: true,
6515 export_edge_labels: true,
6516 export_masks: true,
6517 train_ratio: self.config.graph_export.train_ratio,
6518 val_ratio: self.config.graph_export.validation_ratio,
6519 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6520 },
6521 one_hot_categoricals: false,
6522 };
6523
6524 let exporter = PyGExporter::new(pyg_config);
6525 match exporter.export(&graph, &format_dir) {
6526 Ok(metadata) => {
6527 snapshot.exports.insert(
6528 format!("{}_{}", graph_type.name, "pytorch_geometric"),
6529 GraphExportInfo {
6530 name: graph_type.name.clone(),
6531 format: "pytorch_geometric".to_string(),
6532 output_path: format_dir.clone(),
6533 node_count: metadata.num_nodes,
6534 edge_count: metadata.num_edges,
6535 },
6536 );
6537 snapshot.graph_count += 1;
6538 }
6539 Err(e) => {
6540 warn!("Failed to export PyTorch Geometric graph: {}", e);
6541 }
6542 }
6543 }
6544 datasynth_config::schema::GraphExportFormat::Neo4j => {
6545 warn!("Neo4j graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6547 }
6548 datasynth_config::schema::GraphExportFormat::Dgl => {
6549 warn!("DGL graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6551 }
6552 datasynth_config::schema::GraphExportFormat::RustGraph => {
6553 use datasynth_graph::{
6554 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6555 };
6556
6557 let rustgraph_config = RustGraphExportConfig {
6558 include_features: true,
6559 include_temporal: true,
6560 include_labels: true,
6561 source_name: "datasynth".to_string(),
6562 batch_id: None,
6563 output_format: RustGraphOutputFormat::JsonLines,
6564 export_node_properties: true,
6565 export_edge_properties: true,
6566 pretty_print: false,
6567 };
6568
6569 let exporter = RustGraphExporter::new(rustgraph_config);
6570 match exporter.export(&graph, &format_dir) {
6571 Ok(metadata) => {
6572 snapshot.exports.insert(
6573 format!("{}_{}", graph_type.name, "rustgraph"),
6574 GraphExportInfo {
6575 name: graph_type.name.clone(),
6576 format: "rustgraph".to_string(),
6577 output_path: format_dir.clone(),
6578 node_count: metadata.num_nodes,
6579 edge_count: metadata.num_edges,
6580 },
6581 );
6582 snapshot.graph_count += 1;
6583 }
6584 Err(e) => {
6585 warn!("Failed to export RustGraph: {}", e);
6586 }
6587 }
6588 }
6589 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6590 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6592 }
6593 }
6594 }
6595
6596 if let Some(pb) = &pb {
6597 pb.inc(40);
6598 }
6599 }
6600
6601 stats.graph_export_count = snapshot.graph_count;
6602 snapshot.exported = snapshot.graph_count > 0;
6603
6604 if let Some(pb) = pb {
6605 pb.finish_with_message(format!(
6606 "Graphs exported: {} graphs ({} nodes, {} edges)",
6607 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6608 ));
6609 }
6610
6611 Ok(snapshot)
6612 }
6613
6614 fn build_additional_graphs(
6619 &self,
6620 banking: &BankingSnapshot,
6621 _intercompany: &IntercompanySnapshot,
6622 entries: &[JournalEntry],
6623 stats: &mut EnhancedGenerationStatistics,
6624 ) {
6625 let output_dir = self
6626 .output_path
6627 .clone()
6628 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6629 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6630
6631 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6633 info!("Phase 10c: Building banking network graph");
6634 let config = BankingGraphConfig::default();
6635 let mut builder = BankingGraphBuilder::new(config);
6636 builder.add_customers(&banking.customers);
6637 builder.add_accounts(&banking.accounts, &banking.customers);
6638 builder.add_transactions(&banking.transactions);
6639 let graph = builder.build();
6640
6641 let node_count = graph.node_count();
6642 let edge_count = graph.edge_count();
6643 stats.graph_node_count += node_count;
6644 stats.graph_edge_count += edge_count;
6645
6646 for format in &self.config.graph_export.formats {
6648 if matches!(
6649 format,
6650 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6651 ) {
6652 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6653 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6654 warn!("Failed to create banking graph output dir: {}", e);
6655 continue;
6656 }
6657 let pyg_config = PyGExportConfig::default();
6658 let exporter = PyGExporter::new(pyg_config);
6659 if let Err(e) = exporter.export(&graph, &format_dir) {
6660 warn!("Failed to export banking graph as PyG: {}", e);
6661 } else {
6662 info!(
6663 "Banking network graph exported: {} nodes, {} edges",
6664 node_count, edge_count
6665 );
6666 }
6667 }
6668 }
6669 }
6670
6671 let approval_entries: Vec<_> = entries
6673 .iter()
6674 .filter(|je| je.header.approval_workflow.is_some())
6675 .collect();
6676
6677 if !approval_entries.is_empty() {
6678 info!(
6679 "Phase 10c: Building approval network graph ({} entries with approvals)",
6680 approval_entries.len()
6681 );
6682 let config = ApprovalGraphConfig::default();
6683 let mut builder = ApprovalGraphBuilder::new(config);
6684
6685 for je in &approval_entries {
6686 if let Some(ref wf) = je.header.approval_workflow {
6687 for action in &wf.actions {
6688 let record = datasynth_core::models::ApprovalRecord {
6689 approval_id: format!(
6690 "APR-{}-{}",
6691 je.header.document_id, action.approval_level
6692 ),
6693 document_number: je.header.document_id.to_string(),
6694 document_type: "JE".to_string(),
6695 company_code: je.company_code().to_string(),
6696 requester_id: wf.preparer_id.clone(),
6697 requester_name: Some(wf.preparer_name.clone()),
6698 approver_id: action.actor_id.clone(),
6699 approver_name: action.actor_name.clone(),
6700 approval_date: je.posting_date(),
6701 action: format!("{:?}", action.action),
6702 amount: wf.amount,
6703 approval_limit: None,
6704 comments: action.comments.clone(),
6705 delegation_from: None,
6706 is_auto_approved: false,
6707 };
6708 builder.add_approval(&record);
6709 }
6710 }
6711 }
6712
6713 let graph = builder.build();
6714 let node_count = graph.node_count();
6715 let edge_count = graph.edge_count();
6716 stats.graph_node_count += node_count;
6717 stats.graph_edge_count += edge_count;
6718
6719 for format in &self.config.graph_export.formats {
6721 if matches!(
6722 format,
6723 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6724 ) {
6725 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6726 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6727 warn!("Failed to create approval graph output dir: {}", e);
6728 continue;
6729 }
6730 let pyg_config = PyGExportConfig::default();
6731 let exporter = PyGExporter::new(pyg_config);
6732 if let Err(e) = exporter.export(&graph, &format_dir) {
6733 warn!("Failed to export approval graph as PyG: {}", e);
6734 } else {
6735 info!(
6736 "Approval network graph exported: {} nodes, {} edges",
6737 node_count, edge_count
6738 );
6739 }
6740 }
6741 }
6742 }
6743
6744 debug!(
6747 "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6748 available when intercompany relationships are modeled as IntercompanyRelationship)"
6749 );
6750 }
6751
6752 #[allow(clippy::too_many_arguments)]
6759 fn export_hypergraph(
6760 &self,
6761 coa: &Arc<ChartOfAccounts>,
6762 entries: &[JournalEntry],
6763 document_flows: &DocumentFlowSnapshot,
6764 sourcing: &SourcingSnapshot,
6765 hr: &HrSnapshot,
6766 manufacturing: &ManufacturingSnapshot,
6767 banking: &BankingSnapshot,
6768 audit: &AuditSnapshot,
6769 financial_reporting: &FinancialReportingSnapshot,
6770 ocpm: &OcpmSnapshot,
6771 stats: &mut EnhancedGenerationStatistics,
6772 ) -> SynthResult<HypergraphExportInfo> {
6773 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6774 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6775 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6776 use datasynth_graph::models::hypergraph::AggregationStrategy;
6777
6778 let hg_settings = &self.config.graph_export.hypergraph;
6779
6780 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6782 "truncate" => AggregationStrategy::Truncate,
6783 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6784 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6785 "importance_sample" => AggregationStrategy::ImportanceSample,
6786 _ => AggregationStrategy::PoolByCounterparty,
6787 };
6788
6789 let builder_config = HypergraphConfig {
6790 max_nodes: hg_settings.max_nodes,
6791 aggregation_strategy,
6792 include_coso: hg_settings.governance_layer.include_coso,
6793 include_controls: hg_settings.governance_layer.include_controls,
6794 include_sox: hg_settings.governance_layer.include_sox,
6795 include_vendors: hg_settings.governance_layer.include_vendors,
6796 include_customers: hg_settings.governance_layer.include_customers,
6797 include_employees: hg_settings.governance_layer.include_employees,
6798 include_p2p: hg_settings.process_layer.include_p2p,
6799 include_o2c: hg_settings.process_layer.include_o2c,
6800 include_s2c: hg_settings.process_layer.include_s2c,
6801 include_h2r: hg_settings.process_layer.include_h2r,
6802 include_mfg: hg_settings.process_layer.include_mfg,
6803 include_bank: hg_settings.process_layer.include_bank,
6804 include_audit: hg_settings.process_layer.include_audit,
6805 include_r2r: hg_settings.process_layer.include_r2r,
6806 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6807 docs_per_counterparty_threshold: hg_settings
6808 .process_layer
6809 .docs_per_counterparty_threshold,
6810 include_accounts: hg_settings.accounting_layer.include_accounts,
6811 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6812 include_cross_layer_edges: hg_settings.cross_layer.enabled,
6813 };
6814
6815 let mut builder = HypergraphBuilder::new(builder_config);
6816
6817 builder.add_coso_framework();
6819
6820 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6823 let controls = InternalControl::standard_controls();
6824 builder.add_controls(&controls);
6825 }
6826
6827 builder.add_vendors(&self.master_data.vendors);
6829 builder.add_customers(&self.master_data.customers);
6830 builder.add_employees(&self.master_data.employees);
6831
6832 builder.add_p2p_documents(
6834 &document_flows.purchase_orders,
6835 &document_flows.goods_receipts,
6836 &document_flows.vendor_invoices,
6837 &document_flows.payments,
6838 );
6839 builder.add_o2c_documents(
6840 &document_flows.sales_orders,
6841 &document_flows.deliveries,
6842 &document_flows.customer_invoices,
6843 );
6844 builder.add_s2c_documents(
6845 &sourcing.sourcing_projects,
6846 &sourcing.qualifications,
6847 &sourcing.rfx_events,
6848 &sourcing.bids,
6849 &sourcing.bid_evaluations,
6850 &sourcing.contracts,
6851 );
6852 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
6853 builder.add_mfg_documents(
6854 &manufacturing.production_orders,
6855 &manufacturing.quality_inspections,
6856 &manufacturing.cycle_counts,
6857 );
6858 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
6859 builder.add_audit_documents(
6860 &audit.engagements,
6861 &audit.workpapers,
6862 &audit.findings,
6863 &audit.evidence,
6864 &audit.risk_assessments,
6865 &audit.judgments,
6866 );
6867 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
6868
6869 if let Some(ref event_log) = ocpm.event_log {
6871 builder.add_ocpm_events(event_log);
6872 }
6873
6874 builder.add_accounts(coa);
6876 builder.add_journal_entries_as_hyperedges(entries);
6877
6878 let hypergraph = builder.build();
6880
6881 let output_dir = self
6883 .output_path
6884 .clone()
6885 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6886 let hg_dir = output_dir
6887 .join(&self.config.graph_export.output_subdirectory)
6888 .join(&hg_settings.output_subdirectory);
6889
6890 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
6892 "unified" => {
6893 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6894 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6895 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
6896 })?;
6897 (
6898 metadata.num_nodes,
6899 metadata.num_edges,
6900 metadata.num_hyperedges,
6901 )
6902 }
6903 _ => {
6904 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
6906 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6907 SynthError::generation(format!("Hypergraph export failed: {}", e))
6908 })?;
6909 (
6910 metadata.num_nodes,
6911 metadata.num_edges,
6912 metadata.num_hyperedges,
6913 )
6914 }
6915 };
6916
6917 #[cfg(feature = "streaming")]
6919 if let Some(ref target_url) = hg_settings.stream_target {
6920 use crate::stream_client::{StreamClient, StreamConfig};
6921 use std::io::Write as _;
6922
6923 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
6924 let stream_config = StreamConfig {
6925 target_url: target_url.clone(),
6926 batch_size: hg_settings.stream_batch_size,
6927 api_key,
6928 ..StreamConfig::default()
6929 };
6930
6931 match StreamClient::new(stream_config) {
6932 Ok(mut client) => {
6933 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6934 match exporter.export_to_writer(&hypergraph, &mut client) {
6935 Ok(_) => {
6936 if let Err(e) = client.flush() {
6937 warn!("Failed to flush stream client: {}", e);
6938 } else {
6939 info!("Streamed {} records to {}", client.total_sent(), target_url);
6940 }
6941 }
6942 Err(e) => {
6943 warn!("Streaming export failed: {}", e);
6944 }
6945 }
6946 }
6947 Err(e) => {
6948 warn!("Failed to create stream client: {}", e);
6949 }
6950 }
6951 }
6952
6953 stats.graph_node_count += num_nodes;
6955 stats.graph_edge_count += num_edges;
6956 stats.graph_export_count += 1;
6957
6958 Ok(HypergraphExportInfo {
6959 node_count: num_nodes,
6960 edge_count: num_edges,
6961 hyperedge_count: num_hyperedges,
6962 output_path: hg_dir,
6963 })
6964 }
6965
6966 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
6971 let pb = self.create_progress_bar(100, "Generating Banking Data");
6972
6973 let orchestrator = BankingOrchestratorBuilder::new()
6975 .config(self.config.banking.clone())
6976 .seed(self.seed + 9000)
6977 .country_pack(self.primary_pack().clone())
6978 .build();
6979
6980 if let Some(pb) = &pb {
6981 pb.inc(10);
6982 }
6983
6984 let result = orchestrator.generate();
6986
6987 if let Some(pb) = &pb {
6988 pb.inc(90);
6989 pb.finish_with_message(format!(
6990 "Banking: {} customers, {} transactions",
6991 result.customers.len(),
6992 result.transactions.len()
6993 ));
6994 }
6995
6996 let mut banking_customers = result.customers;
7001 let core_customers = &self.master_data.customers;
7002 if !core_customers.is_empty() {
7003 for (i, bc) in banking_customers.iter_mut().enumerate() {
7004 let core = &core_customers[i % core_customers.len()];
7005 bc.name = CustomerName::business(&core.name);
7006 bc.residence_country = core.country.clone();
7007 bc.enterprise_customer_id = Some(core.customer_id.clone());
7008 }
7009 debug!(
7010 "Cross-referenced {} banking customers with {} core customers",
7011 banking_customers.len(),
7012 core_customers.len()
7013 );
7014 }
7015
7016 Ok(BankingSnapshot {
7017 customers: banking_customers,
7018 accounts: result.accounts,
7019 transactions: result.transactions,
7020 transaction_labels: result.transaction_labels,
7021 customer_labels: result.customer_labels,
7022 account_labels: result.account_labels,
7023 relationship_labels: result.relationship_labels,
7024 narratives: result.narratives,
7025 suspicious_count: result.stats.suspicious_count,
7026 scenario_count: result.scenarios.len(),
7027 })
7028 }
7029
7030 fn calculate_total_transactions(&self) -> u64 {
7032 let months = self.config.global.period_months as f64;
7033 self.config
7034 .companies
7035 .iter()
7036 .map(|c| {
7037 let annual = c.annual_transaction_volume.count() as f64;
7038 let weighted = annual * c.volume_weight;
7039 (weighted * months / 12.0) as u64
7040 })
7041 .sum()
7042 }
7043
7044 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7046 if !self.phase_config.show_progress {
7047 return None;
7048 }
7049
7050 let pb = if let Some(mp) = &self.multi_progress {
7051 mp.add(ProgressBar::new(total))
7052 } else {
7053 ProgressBar::new(total)
7054 };
7055
7056 pb.set_style(
7057 ProgressStyle::default_bar()
7058 .template(&format!(
7059 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7060 message
7061 ))
7062 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7063 .progress_chars("#>-"),
7064 );
7065
7066 Some(pb)
7067 }
7068
7069 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7071 self.coa.clone()
7072 }
7073
7074 pub fn get_master_data(&self) -> &MasterDataSnapshot {
7076 &self.master_data
7077 }
7078
7079 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7081 use super::lineage::LineageGraphBuilder;
7082
7083 let mut builder = LineageGraphBuilder::new();
7084
7085 builder.add_config_section("config:global", "Global Config");
7087 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7088 builder.add_config_section("config:transactions", "Transaction Config");
7089
7090 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7092 builder.add_generator_phase("phase:je", "Journal Entry Generation");
7093
7094 builder.configured_by("phase:coa", "config:chart_of_accounts");
7096 builder.configured_by("phase:je", "config:transactions");
7097
7098 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7100 builder.produced_by("output:je", "phase:je");
7101
7102 if self.phase_config.generate_master_data {
7104 builder.add_config_section("config:master_data", "Master Data Config");
7105 builder.add_generator_phase("phase:master_data", "Master Data Generation");
7106 builder.configured_by("phase:master_data", "config:master_data");
7107 builder.input_to("phase:master_data", "phase:je");
7108 }
7109
7110 if self.phase_config.generate_document_flows {
7111 builder.add_config_section("config:document_flows", "Document Flow Config");
7112 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7113 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7114 builder.configured_by("phase:p2p", "config:document_flows");
7115 builder.configured_by("phase:o2c", "config:document_flows");
7116
7117 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7118 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7119 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7120 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7121 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7122
7123 builder.produced_by("output:po", "phase:p2p");
7124 builder.produced_by("output:gr", "phase:p2p");
7125 builder.produced_by("output:vi", "phase:p2p");
7126 builder.produced_by("output:so", "phase:o2c");
7127 builder.produced_by("output:ci", "phase:o2c");
7128 }
7129
7130 if self.phase_config.inject_anomalies {
7131 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7132 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7133 builder.configured_by("phase:anomaly", "config:fraud");
7134 builder.add_output_file(
7135 "output:labels",
7136 "Anomaly Labels",
7137 "labels/anomaly_labels.csv",
7138 );
7139 builder.produced_by("output:labels", "phase:anomaly");
7140 }
7141
7142 if self.phase_config.generate_audit {
7143 builder.add_config_section("config:audit", "Audit Config");
7144 builder.add_generator_phase("phase:audit", "Audit Data Generation");
7145 builder.configured_by("phase:audit", "config:audit");
7146 }
7147
7148 if self.phase_config.generate_banking {
7149 builder.add_config_section("config:banking", "Banking Config");
7150 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7151 builder.configured_by("phase:banking", "config:banking");
7152 }
7153
7154 if self.config.llm.enabled {
7155 builder.add_config_section("config:llm", "LLM Enrichment Config");
7156 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7157 builder.configured_by("phase:llm_enrichment", "config:llm");
7158 }
7159
7160 if self.config.diffusion.enabled {
7161 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7162 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7163 builder.configured_by("phase:diffusion", "config:diffusion");
7164 }
7165
7166 if self.config.causal.enabled {
7167 builder.add_config_section("config:causal", "Causal Generation Config");
7168 builder.add_generator_phase("phase:causal", "Causal Overlay");
7169 builder.configured_by("phase:causal", "config:causal");
7170 }
7171
7172 builder.build()
7173 }
7174}
7175
7176fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7178 match format {
7179 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7180 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7181 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7182 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7183 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7184 }
7185}
7186
7187#[cfg(test)]
7188#[allow(clippy::unwrap_used)]
7189mod tests {
7190 use super::*;
7191 use datasynth_config::schema::*;
7192
7193 fn create_test_config() -> GeneratorConfig {
7194 GeneratorConfig {
7195 global: GlobalConfig {
7196 industry: IndustrySector::Manufacturing,
7197 start_date: "2024-01-01".to_string(),
7198 period_months: 1,
7199 seed: Some(42),
7200 parallel: false,
7201 group_currency: "USD".to_string(),
7202 worker_threads: 0,
7203 memory_limit_mb: 0,
7204 },
7205 companies: vec![CompanyConfig {
7206 code: "1000".to_string(),
7207 name: "Test Company".to_string(),
7208 currency: "USD".to_string(),
7209 country: "US".to_string(),
7210 annual_transaction_volume: TransactionVolume::TenK,
7211 volume_weight: 1.0,
7212 fiscal_year_variant: "K4".to_string(),
7213 }],
7214 chart_of_accounts: ChartOfAccountsConfig {
7215 complexity: CoAComplexity::Small,
7216 industry_specific: true,
7217 custom_accounts: None,
7218 min_hierarchy_depth: 2,
7219 max_hierarchy_depth: 4,
7220 },
7221 transactions: TransactionConfig::default(),
7222 output: OutputConfig::default(),
7223 fraud: FraudConfig::default(),
7224 internal_controls: InternalControlsConfig::default(),
7225 business_processes: BusinessProcessConfig::default(),
7226 user_personas: UserPersonaConfig::default(),
7227 templates: TemplateConfig::default(),
7228 approval: ApprovalConfig::default(),
7229 departments: DepartmentConfig::default(),
7230 master_data: MasterDataConfig::default(),
7231 document_flows: DocumentFlowConfig::default(),
7232 intercompany: IntercompanyConfig::default(),
7233 balance: BalanceConfig::default(),
7234 ocpm: OcpmConfig::default(),
7235 audit: AuditGenerationConfig::default(),
7236 banking: datasynth_banking::BankingConfig::default(),
7237 data_quality: DataQualitySchemaConfig::default(),
7238 scenario: ScenarioConfig::default(),
7239 temporal: TemporalDriftConfig::default(),
7240 graph_export: GraphExportConfig::default(),
7241 streaming: StreamingSchemaConfig::default(),
7242 rate_limit: RateLimitSchemaConfig::default(),
7243 temporal_attributes: TemporalAttributeSchemaConfig::default(),
7244 relationships: RelationshipSchemaConfig::default(),
7245 accounting_standards: AccountingStandardsConfig::default(),
7246 audit_standards: AuditStandardsConfig::default(),
7247 distributions: Default::default(),
7248 temporal_patterns: Default::default(),
7249 vendor_network: VendorNetworkSchemaConfig::default(),
7250 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7251 relationship_strength: RelationshipStrengthSchemaConfig::default(),
7252 cross_process_links: CrossProcessLinksSchemaConfig::default(),
7253 organizational_events: OrganizationalEventsSchemaConfig::default(),
7254 behavioral_drift: BehavioralDriftSchemaConfig::default(),
7255 market_drift: MarketDriftSchemaConfig::default(),
7256 drift_labeling: DriftLabelingSchemaConfig::default(),
7257 anomaly_injection: Default::default(),
7258 industry_specific: Default::default(),
7259 fingerprint_privacy: Default::default(),
7260 quality_gates: Default::default(),
7261 compliance: Default::default(),
7262 webhooks: Default::default(),
7263 llm: Default::default(),
7264 diffusion: Default::default(),
7265 causal: Default::default(),
7266 source_to_pay: Default::default(),
7267 financial_reporting: Default::default(),
7268 hr: Default::default(),
7269 manufacturing: Default::default(),
7270 sales_quotes: Default::default(),
7271 tax: Default::default(),
7272 treasury: Default::default(),
7273 project_accounting: Default::default(),
7274 esg: Default::default(),
7275 country_packs: None,
7276 }
7277 }
7278
7279 #[test]
7280 fn test_enhanced_orchestrator_creation() {
7281 let config = create_test_config();
7282 let orchestrator = EnhancedOrchestrator::with_defaults(config);
7283 assert!(orchestrator.is_ok());
7284 }
7285
7286 #[test]
7287 fn test_minimal_generation() {
7288 let config = create_test_config();
7289 let phase_config = PhaseConfig {
7290 generate_master_data: false,
7291 generate_document_flows: false,
7292 generate_journal_entries: true,
7293 inject_anomalies: false,
7294 show_progress: false,
7295 ..Default::default()
7296 };
7297
7298 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7299 let result = orchestrator.generate();
7300
7301 assert!(result.is_ok());
7302 let result = result.unwrap();
7303 assert!(!result.journal_entries.is_empty());
7304 }
7305
7306 #[test]
7307 fn test_master_data_generation() {
7308 let config = create_test_config();
7309 let phase_config = PhaseConfig {
7310 generate_master_data: true,
7311 generate_document_flows: false,
7312 generate_journal_entries: false,
7313 inject_anomalies: false,
7314 show_progress: false,
7315 vendors_per_company: 5,
7316 customers_per_company: 5,
7317 materials_per_company: 10,
7318 assets_per_company: 5,
7319 employees_per_company: 10,
7320 ..Default::default()
7321 };
7322
7323 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7324 let result = orchestrator.generate().unwrap();
7325
7326 assert!(!result.master_data.vendors.is_empty());
7327 assert!(!result.master_data.customers.is_empty());
7328 assert!(!result.master_data.materials.is_empty());
7329 }
7330
7331 #[test]
7332 fn test_document_flow_generation() {
7333 let config = create_test_config();
7334 let phase_config = PhaseConfig {
7335 generate_master_data: true,
7336 generate_document_flows: true,
7337 generate_journal_entries: false,
7338 inject_anomalies: false,
7339 inject_data_quality: false,
7340 validate_balances: false,
7341 generate_ocpm_events: false,
7342 show_progress: false,
7343 vendors_per_company: 5,
7344 customers_per_company: 5,
7345 materials_per_company: 10,
7346 assets_per_company: 5,
7347 employees_per_company: 10,
7348 p2p_chains: 5,
7349 o2c_chains: 5,
7350 ..Default::default()
7351 };
7352
7353 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7354 let result = orchestrator.generate().unwrap();
7355
7356 assert!(!result.document_flows.p2p_chains.is_empty());
7358 assert!(!result.document_flows.o2c_chains.is_empty());
7359
7360 assert!(!result.document_flows.purchase_orders.is_empty());
7362 assert!(!result.document_flows.sales_orders.is_empty());
7363 }
7364
7365 #[test]
7366 fn test_anomaly_injection() {
7367 let config = create_test_config();
7368 let phase_config = PhaseConfig {
7369 generate_master_data: false,
7370 generate_document_flows: false,
7371 generate_journal_entries: true,
7372 inject_anomalies: true,
7373 show_progress: false,
7374 ..Default::default()
7375 };
7376
7377 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7378 let result = orchestrator.generate().unwrap();
7379
7380 assert!(!result.journal_entries.is_empty());
7382
7383 assert!(result.anomaly_labels.summary.is_some());
7386 }
7387
7388 #[test]
7389 fn test_full_generation_pipeline() {
7390 let config = create_test_config();
7391 let phase_config = PhaseConfig {
7392 generate_master_data: true,
7393 generate_document_flows: true,
7394 generate_journal_entries: true,
7395 inject_anomalies: false,
7396 inject_data_quality: false,
7397 validate_balances: true,
7398 generate_ocpm_events: false,
7399 show_progress: false,
7400 vendors_per_company: 3,
7401 customers_per_company: 3,
7402 materials_per_company: 5,
7403 assets_per_company: 3,
7404 employees_per_company: 5,
7405 p2p_chains: 3,
7406 o2c_chains: 3,
7407 ..Default::default()
7408 };
7409
7410 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7411 let result = orchestrator.generate().unwrap();
7412
7413 assert!(!result.master_data.vendors.is_empty());
7415 assert!(!result.master_data.customers.is_empty());
7416 assert!(!result.document_flows.p2p_chains.is_empty());
7417 assert!(!result.document_flows.o2c_chains.is_empty());
7418 assert!(!result.journal_entries.is_empty());
7419 assert!(result.statistics.accounts_count > 0);
7420
7421 assert!(!result.subledger.ap_invoices.is_empty());
7423 assert!(!result.subledger.ar_invoices.is_empty());
7424
7425 assert!(result.balance_validation.validated);
7427 assert!(result.balance_validation.entries_processed > 0);
7428 }
7429
7430 #[test]
7431 fn test_subledger_linking() {
7432 let config = create_test_config();
7433 let phase_config = PhaseConfig {
7434 generate_master_data: true,
7435 generate_document_flows: true,
7436 generate_journal_entries: false,
7437 inject_anomalies: false,
7438 inject_data_quality: false,
7439 validate_balances: false,
7440 generate_ocpm_events: false,
7441 show_progress: false,
7442 vendors_per_company: 5,
7443 customers_per_company: 5,
7444 materials_per_company: 10,
7445 assets_per_company: 3,
7446 employees_per_company: 5,
7447 p2p_chains: 5,
7448 o2c_chains: 5,
7449 ..Default::default()
7450 };
7451
7452 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7453 let result = orchestrator.generate().unwrap();
7454
7455 assert!(!result.document_flows.vendor_invoices.is_empty());
7457 assert!(!result.document_flows.customer_invoices.is_empty());
7458
7459 assert!(!result.subledger.ap_invoices.is_empty());
7461 assert!(!result.subledger.ar_invoices.is_empty());
7462
7463 assert_eq!(
7465 result.subledger.ap_invoices.len(),
7466 result.document_flows.vendor_invoices.len()
7467 );
7468
7469 assert_eq!(
7471 result.subledger.ar_invoices.len(),
7472 result.document_flows.customer_invoices.len()
7473 );
7474
7475 assert_eq!(
7477 result.statistics.ap_invoice_count,
7478 result.subledger.ap_invoices.len()
7479 );
7480 assert_eq!(
7481 result.statistics.ar_invoice_count,
7482 result.subledger.ar_invoices.len()
7483 );
7484 }
7485
7486 #[test]
7487 fn test_balance_validation() {
7488 let config = create_test_config();
7489 let phase_config = PhaseConfig {
7490 generate_master_data: false,
7491 generate_document_flows: false,
7492 generate_journal_entries: true,
7493 inject_anomalies: false,
7494 validate_balances: true,
7495 show_progress: false,
7496 ..Default::default()
7497 };
7498
7499 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7500 let result = orchestrator.generate().unwrap();
7501
7502 assert!(result.balance_validation.validated);
7504 assert!(result.balance_validation.entries_processed > 0);
7505
7506 assert!(!result.balance_validation.has_unbalanced_entries);
7508
7509 assert_eq!(
7511 result.balance_validation.total_debits,
7512 result.balance_validation.total_credits
7513 );
7514 }
7515
7516 #[test]
7517 fn test_statistics_accuracy() {
7518 let config = create_test_config();
7519 let phase_config = PhaseConfig {
7520 generate_master_data: true,
7521 generate_document_flows: false,
7522 generate_journal_entries: true,
7523 inject_anomalies: false,
7524 show_progress: false,
7525 vendors_per_company: 10,
7526 customers_per_company: 20,
7527 materials_per_company: 15,
7528 assets_per_company: 5,
7529 employees_per_company: 8,
7530 ..Default::default()
7531 };
7532
7533 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7534 let result = orchestrator.generate().unwrap();
7535
7536 assert_eq!(
7538 result.statistics.vendor_count,
7539 result.master_data.vendors.len()
7540 );
7541 assert_eq!(
7542 result.statistics.customer_count,
7543 result.master_data.customers.len()
7544 );
7545 assert_eq!(
7546 result.statistics.material_count,
7547 result.master_data.materials.len()
7548 );
7549 assert_eq!(
7550 result.statistics.total_entries as usize,
7551 result.journal_entries.len()
7552 );
7553 }
7554
7555 #[test]
7556 fn test_phase_config_defaults() {
7557 let config = PhaseConfig::default();
7558 assert!(config.generate_master_data);
7559 assert!(config.generate_document_flows);
7560 assert!(config.generate_journal_entries);
7561 assert!(!config.inject_anomalies);
7562 assert!(config.validate_balances);
7563 assert!(config.show_progress);
7564 assert!(config.vendors_per_company > 0);
7565 assert!(config.customers_per_company > 0);
7566 }
7567
7568 #[test]
7569 fn test_get_coa_before_generation() {
7570 let config = create_test_config();
7571 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7572
7573 assert!(orchestrator.get_coa().is_none());
7575 }
7576
7577 #[test]
7578 fn test_get_coa_after_generation() {
7579 let config = create_test_config();
7580 let phase_config = PhaseConfig {
7581 generate_master_data: false,
7582 generate_document_flows: false,
7583 generate_journal_entries: true,
7584 inject_anomalies: false,
7585 show_progress: false,
7586 ..Default::default()
7587 };
7588
7589 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7590 let _ = orchestrator.generate().unwrap();
7591
7592 assert!(orchestrator.get_coa().is_some());
7594 }
7595
7596 #[test]
7597 fn test_get_master_data() {
7598 let config = create_test_config();
7599 let phase_config = PhaseConfig {
7600 generate_master_data: true,
7601 generate_document_flows: false,
7602 generate_journal_entries: false,
7603 inject_anomalies: false,
7604 show_progress: false,
7605 vendors_per_company: 5,
7606 customers_per_company: 5,
7607 materials_per_company: 5,
7608 assets_per_company: 5,
7609 employees_per_company: 5,
7610 ..Default::default()
7611 };
7612
7613 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7614 let _ = orchestrator.generate().unwrap();
7615
7616 let master_data = orchestrator.get_master_data();
7617 assert!(!master_data.vendors.is_empty());
7618 }
7619
7620 #[test]
7621 fn test_with_progress_builder() {
7622 let config = create_test_config();
7623 let orchestrator = EnhancedOrchestrator::with_defaults(config)
7624 .unwrap()
7625 .with_progress(false);
7626
7627 assert!(!orchestrator.phase_config.show_progress);
7629 }
7630
7631 #[test]
7632 fn test_multi_company_generation() {
7633 let mut config = create_test_config();
7634 config.companies.push(CompanyConfig {
7635 code: "2000".to_string(),
7636 name: "Subsidiary".to_string(),
7637 currency: "EUR".to_string(),
7638 country: "DE".to_string(),
7639 annual_transaction_volume: TransactionVolume::TenK,
7640 volume_weight: 0.5,
7641 fiscal_year_variant: "K4".to_string(),
7642 });
7643
7644 let phase_config = PhaseConfig {
7645 generate_master_data: true,
7646 generate_document_flows: false,
7647 generate_journal_entries: true,
7648 inject_anomalies: false,
7649 show_progress: false,
7650 vendors_per_company: 5,
7651 customers_per_company: 5,
7652 materials_per_company: 5,
7653 assets_per_company: 5,
7654 employees_per_company: 5,
7655 ..Default::default()
7656 };
7657
7658 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7659 let result = orchestrator.generate().unwrap();
7660
7661 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
7664 assert!(result.statistics.companies_count == 2);
7665 }
7666
7667 #[test]
7668 fn test_empty_master_data_skips_document_flows() {
7669 let config = create_test_config();
7670 let phase_config = PhaseConfig {
7671 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
7674 inject_anomalies: false,
7675 show_progress: false,
7676 ..Default::default()
7677 };
7678
7679 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7680 let result = orchestrator.generate().unwrap();
7681
7682 assert!(result.document_flows.p2p_chains.is_empty());
7684 assert!(result.document_flows.o2c_chains.is_empty());
7685 }
7686
7687 #[test]
7688 fn test_journal_entry_line_item_count() {
7689 let config = create_test_config();
7690 let phase_config = PhaseConfig {
7691 generate_master_data: false,
7692 generate_document_flows: false,
7693 generate_journal_entries: true,
7694 inject_anomalies: false,
7695 show_progress: false,
7696 ..Default::default()
7697 };
7698
7699 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7700 let result = orchestrator.generate().unwrap();
7701
7702 let calculated_line_items: u64 = result
7704 .journal_entries
7705 .iter()
7706 .map(|e| e.line_count() as u64)
7707 .sum();
7708 assert_eq!(result.statistics.total_line_items, calculated_line_items);
7709 }
7710
7711 #[test]
7712 fn test_audit_generation() {
7713 let config = create_test_config();
7714 let phase_config = PhaseConfig {
7715 generate_master_data: false,
7716 generate_document_flows: false,
7717 generate_journal_entries: true,
7718 inject_anomalies: false,
7719 show_progress: false,
7720 generate_audit: true,
7721 audit_engagements: 2,
7722 workpapers_per_engagement: 5,
7723 evidence_per_workpaper: 2,
7724 risks_per_engagement: 3,
7725 findings_per_engagement: 2,
7726 judgments_per_engagement: 2,
7727 ..Default::default()
7728 };
7729
7730 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7731 let result = orchestrator.generate().unwrap();
7732
7733 assert_eq!(result.audit.engagements.len(), 2);
7735 assert!(!result.audit.workpapers.is_empty());
7736 assert!(!result.audit.evidence.is_empty());
7737 assert!(!result.audit.risk_assessments.is_empty());
7738 assert!(!result.audit.findings.is_empty());
7739 assert!(!result.audit.judgments.is_empty());
7740
7741 assert_eq!(
7743 result.statistics.audit_engagement_count,
7744 result.audit.engagements.len()
7745 );
7746 assert_eq!(
7747 result.statistics.audit_workpaper_count,
7748 result.audit.workpapers.len()
7749 );
7750 assert_eq!(
7751 result.statistics.audit_evidence_count,
7752 result.audit.evidence.len()
7753 );
7754 assert_eq!(
7755 result.statistics.audit_risk_count,
7756 result.audit.risk_assessments.len()
7757 );
7758 assert_eq!(
7759 result.statistics.audit_finding_count,
7760 result.audit.findings.len()
7761 );
7762 assert_eq!(
7763 result.statistics.audit_judgment_count,
7764 result.audit.judgments.len()
7765 );
7766 }
7767
7768 #[test]
7769 fn test_new_phases_disabled_by_default() {
7770 let config = create_test_config();
7771 assert!(!config.llm.enabled);
7773 assert!(!config.diffusion.enabled);
7774 assert!(!config.causal.enabled);
7775
7776 let phase_config = PhaseConfig {
7777 generate_master_data: false,
7778 generate_document_flows: false,
7779 generate_journal_entries: true,
7780 inject_anomalies: false,
7781 show_progress: false,
7782 ..Default::default()
7783 };
7784
7785 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7786 let result = orchestrator.generate().unwrap();
7787
7788 assert_eq!(result.statistics.llm_enrichment_ms, 0);
7790 assert_eq!(result.statistics.llm_vendors_enriched, 0);
7791 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7792 assert_eq!(result.statistics.diffusion_samples_generated, 0);
7793 assert_eq!(result.statistics.causal_generation_ms, 0);
7794 assert_eq!(result.statistics.causal_samples_generated, 0);
7795 assert!(result.statistics.causal_validation_passed.is_none());
7796 }
7797
7798 #[test]
7799 fn test_llm_enrichment_enabled() {
7800 let mut config = create_test_config();
7801 config.llm.enabled = true;
7802 config.llm.max_vendor_enrichments = 3;
7803
7804 let phase_config = PhaseConfig {
7805 generate_master_data: true,
7806 generate_document_flows: false,
7807 generate_journal_entries: false,
7808 inject_anomalies: false,
7809 show_progress: false,
7810 vendors_per_company: 5,
7811 customers_per_company: 3,
7812 materials_per_company: 3,
7813 assets_per_company: 3,
7814 employees_per_company: 3,
7815 ..Default::default()
7816 };
7817
7818 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7819 let result = orchestrator.generate().unwrap();
7820
7821 assert!(result.statistics.llm_vendors_enriched > 0);
7823 assert!(result.statistics.llm_vendors_enriched <= 3);
7824 }
7825
7826 #[test]
7827 fn test_diffusion_enhancement_enabled() {
7828 let mut config = create_test_config();
7829 config.diffusion.enabled = true;
7830 config.diffusion.n_steps = 50;
7831 config.diffusion.sample_size = 20;
7832
7833 let phase_config = PhaseConfig {
7834 generate_master_data: false,
7835 generate_document_flows: false,
7836 generate_journal_entries: true,
7837 inject_anomalies: false,
7838 show_progress: false,
7839 ..Default::default()
7840 };
7841
7842 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7843 let result = orchestrator.generate().unwrap();
7844
7845 assert_eq!(result.statistics.diffusion_samples_generated, 20);
7847 }
7848
7849 #[test]
7850 fn test_causal_overlay_enabled() {
7851 let mut config = create_test_config();
7852 config.causal.enabled = true;
7853 config.causal.template = "fraud_detection".to_string();
7854 config.causal.sample_size = 100;
7855 config.causal.validate = true;
7856
7857 let phase_config = PhaseConfig {
7858 generate_master_data: false,
7859 generate_document_flows: false,
7860 generate_journal_entries: true,
7861 inject_anomalies: false,
7862 show_progress: false,
7863 ..Default::default()
7864 };
7865
7866 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7867 let result = orchestrator.generate().unwrap();
7868
7869 assert_eq!(result.statistics.causal_samples_generated, 100);
7871 assert!(result.statistics.causal_validation_passed.is_some());
7873 }
7874
7875 #[test]
7876 fn test_causal_overlay_revenue_cycle_template() {
7877 let mut config = create_test_config();
7878 config.causal.enabled = true;
7879 config.causal.template = "revenue_cycle".to_string();
7880 config.causal.sample_size = 50;
7881 config.causal.validate = false;
7882
7883 let phase_config = PhaseConfig {
7884 generate_master_data: false,
7885 generate_document_flows: false,
7886 generate_journal_entries: true,
7887 inject_anomalies: false,
7888 show_progress: false,
7889 ..Default::default()
7890 };
7891
7892 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7893 let result = orchestrator.generate().unwrap();
7894
7895 assert_eq!(result.statistics.causal_samples_generated, 50);
7897 assert!(result.statistics.causal_validation_passed.is_none());
7899 }
7900
7901 #[test]
7902 fn test_all_new_phases_enabled_together() {
7903 let mut config = create_test_config();
7904 config.llm.enabled = true;
7905 config.llm.max_vendor_enrichments = 2;
7906 config.diffusion.enabled = true;
7907 config.diffusion.n_steps = 20;
7908 config.diffusion.sample_size = 10;
7909 config.causal.enabled = true;
7910 config.causal.sample_size = 50;
7911 config.causal.validate = true;
7912
7913 let phase_config = PhaseConfig {
7914 generate_master_data: true,
7915 generate_document_flows: false,
7916 generate_journal_entries: true,
7917 inject_anomalies: false,
7918 show_progress: false,
7919 vendors_per_company: 5,
7920 customers_per_company: 3,
7921 materials_per_company: 3,
7922 assets_per_company: 3,
7923 employees_per_company: 3,
7924 ..Default::default()
7925 };
7926
7927 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7928 let result = orchestrator.generate().unwrap();
7929
7930 assert!(result.statistics.llm_vendors_enriched > 0);
7932 assert_eq!(result.statistics.diffusion_samples_generated, 10);
7933 assert_eq!(result.statistics.causal_samples_generated, 50);
7934 assert!(result.statistics.causal_validation_passed.is_some());
7935 }
7936
7937 #[test]
7938 fn test_statistics_serialization_with_new_fields() {
7939 let stats = EnhancedGenerationStatistics {
7940 total_entries: 100,
7941 total_line_items: 500,
7942 llm_enrichment_ms: 42,
7943 llm_vendors_enriched: 10,
7944 diffusion_enhancement_ms: 100,
7945 diffusion_samples_generated: 50,
7946 causal_generation_ms: 200,
7947 causal_samples_generated: 100,
7948 causal_validation_passed: Some(true),
7949 ..Default::default()
7950 };
7951
7952 let json = serde_json::to_string(&stats).unwrap();
7953 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
7954
7955 assert_eq!(deserialized.llm_enrichment_ms, 42);
7956 assert_eq!(deserialized.llm_vendors_enriched, 10);
7957 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
7958 assert_eq!(deserialized.diffusion_samples_generated, 50);
7959 assert_eq!(deserialized.causal_generation_ms, 200);
7960 assert_eq!(deserialized.causal_samples_generated, 100);
7961 assert_eq!(deserialized.causal_validation_passed, Some(true));
7962 }
7963
7964 #[test]
7965 fn test_statistics_backward_compat_deserialization() {
7966 let old_json = r#"{
7968 "total_entries": 100,
7969 "total_line_items": 500,
7970 "accounts_count": 50,
7971 "companies_count": 1,
7972 "period_months": 12,
7973 "vendor_count": 10,
7974 "customer_count": 20,
7975 "material_count": 15,
7976 "asset_count": 5,
7977 "employee_count": 8,
7978 "p2p_chain_count": 5,
7979 "o2c_chain_count": 5,
7980 "ap_invoice_count": 5,
7981 "ar_invoice_count": 5,
7982 "ocpm_event_count": 0,
7983 "ocpm_object_count": 0,
7984 "ocpm_case_count": 0,
7985 "audit_engagement_count": 0,
7986 "audit_workpaper_count": 0,
7987 "audit_evidence_count": 0,
7988 "audit_risk_count": 0,
7989 "audit_finding_count": 0,
7990 "audit_judgment_count": 0,
7991 "anomalies_injected": 0,
7992 "data_quality_issues": 0,
7993 "banking_customer_count": 0,
7994 "banking_account_count": 0,
7995 "banking_transaction_count": 0,
7996 "banking_suspicious_count": 0,
7997 "graph_export_count": 0,
7998 "graph_node_count": 0,
7999 "graph_edge_count": 0
8000 }"#;
8001
8002 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8003
8004 assert_eq!(stats.llm_enrichment_ms, 0);
8006 assert_eq!(stats.llm_vendors_enriched, 0);
8007 assert_eq!(stats.diffusion_enhancement_ms, 0);
8008 assert_eq!(stats.diffusion_samples_generated, 0);
8009 assert_eq!(stats.causal_generation_ms, 0);
8010 assert_eq!(stats.causal_samples_generated, 0);
8011 assert!(stats.causal_validation_passed.is_none());
8012 }
8013}