1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33 BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42 SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50 io::FingerprintReader,
51 models::Fingerprint,
52 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55 AnomalyInjector,
57 AnomalyInjectorConfig,
58 AssetGenerator,
59 AuditEngagementGenerator,
61 BalanceTrackerConfig,
62 BankReconciliationGenerator,
64 BidEvaluationGenerator,
66 BidGenerator,
67 CatalogGenerator,
68 ChartOfAccountsGenerator,
70 ContractGenerator,
71 CustomerGenerator,
72 DataQualityConfig,
73 DataQualityInjector,
75 DataQualityStats,
76 DocumentFlowJeConfig,
78 DocumentFlowJeGenerator,
79 DocumentFlowLinker,
81 EmployeeGenerator,
82 EsgAnomalyLabel,
84 EvidenceGenerator,
85 FinancialStatementGenerator,
87 FindingGenerator,
88 JournalEntryGenerator,
89 JudgmentGenerator,
90 LatePaymentDistribution,
91 MaterialGenerator,
92 O2CDocumentChain,
93 O2CGenerator,
94 O2CGeneratorConfig,
95 O2CPaymentBehavior,
96 P2PDocumentChain,
97 P2PGenerator,
99 P2PGeneratorConfig,
100 P2PPaymentBehavior,
101 PaymentReference,
102 QualificationGenerator,
103 RfxGenerator,
104 RiskAssessmentGenerator,
105 RunningBalanceTracker,
107 ScorecardGenerator,
108 SourcingProjectGenerator,
109 SpendAnalysisGenerator,
110 ValidationError,
111 VendorGenerator,
113 WorkpaperGenerator,
114};
115use datasynth_graph::{
116 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122 OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::llm_enrichment::VendorLlmEnricher;
133use rayon::prelude::*;
134
135fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
141 let payment_behavior = &schema_config.payment_behavior;
142 let late_dist = &payment_behavior.late_payment_days_distribution;
143
144 P2PGeneratorConfig {
145 three_way_match_rate: schema_config.three_way_match_rate,
146 partial_delivery_rate: schema_config.partial_delivery_rate,
147 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
149 max_price_variance_percent: schema_config.max_price_variance_percent,
150 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
151 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
152 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
153 payment_method_distribution: vec![
154 (PaymentMethod::BankTransfer, 0.60),
155 (PaymentMethod::Check, 0.25),
156 (PaymentMethod::Wire, 0.10),
157 (PaymentMethod::CreditCard, 0.05),
158 ],
159 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
161 late_payment_rate: payment_behavior.late_payment_rate,
162 late_payment_distribution: LatePaymentDistribution {
163 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
164 late_8_to_14: late_dist.late_8_to_14,
165 very_late_15_to_30: late_dist.very_late_15_to_30,
166 severely_late_31_to_60: late_dist.severely_late_31_to_60,
167 extremely_late_over_60: late_dist.extremely_late_over_60,
168 },
169 partial_payment_rate: payment_behavior.partial_payment_rate,
170 payment_correction_rate: payment_behavior.payment_correction_rate,
171 },
172 }
173}
174
175fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
177 let payment_behavior = &schema_config.payment_behavior;
178
179 O2CGeneratorConfig {
180 credit_check_failure_rate: schema_config.credit_check_failure_rate,
181 partial_shipment_rate: schema_config.partial_shipment_rate,
182 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
183 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
184 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
185 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
187 returns_rate: schema_config.return_rate,
188 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
189 payment_method_distribution: vec![
190 (PaymentMethod::BankTransfer, 0.50),
191 (PaymentMethod::Check, 0.30),
192 (PaymentMethod::Wire, 0.15),
193 (PaymentMethod::CreditCard, 0.05),
194 ],
195 payment_behavior: O2CPaymentBehavior {
196 partial_payment_rate: payment_behavior.partial_payments.rate,
197 short_payment_rate: payment_behavior.short_payments.rate,
198 max_short_percent: payment_behavior.short_payments.max_short_percent,
199 on_account_rate: payment_behavior.on_account_payments.rate,
200 payment_correction_rate: payment_behavior.payment_corrections.rate,
201 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
202 },
203 }
204}
205
206#[derive(Debug, Clone)]
208pub struct PhaseConfig {
209 pub generate_master_data: bool,
211 pub generate_document_flows: bool,
213 pub generate_ocpm_events: bool,
215 pub generate_journal_entries: bool,
217 pub inject_anomalies: bool,
219 pub inject_data_quality: bool,
221 pub validate_balances: bool,
223 pub show_progress: bool,
225 pub vendors_per_company: usize,
227 pub customers_per_company: usize,
229 pub materials_per_company: usize,
231 pub assets_per_company: usize,
233 pub employees_per_company: usize,
235 pub p2p_chains: usize,
237 pub o2c_chains: usize,
239 pub generate_audit: bool,
241 pub audit_engagements: usize,
243 pub workpapers_per_engagement: usize,
245 pub evidence_per_workpaper: usize,
247 pub risks_per_engagement: usize,
249 pub findings_per_engagement: usize,
251 pub judgments_per_engagement: usize,
253 pub generate_banking: bool,
255 pub generate_graph_export: bool,
257 pub generate_sourcing: bool,
259 pub generate_bank_reconciliation: bool,
261 pub generate_financial_statements: bool,
263 pub generate_accounting_standards: bool,
265 pub generate_manufacturing: bool,
267 pub generate_sales_kpi_budgets: bool,
269 pub generate_tax: bool,
271 pub generate_esg: bool,
273 pub generate_intercompany: bool,
275}
276
277impl Default for PhaseConfig {
278 fn default() -> Self {
279 Self {
280 generate_master_data: true,
281 generate_document_flows: true,
282 generate_ocpm_events: false, generate_journal_entries: true,
284 inject_anomalies: false,
285 inject_data_quality: false, validate_balances: true,
287 show_progress: true,
288 vendors_per_company: 50,
289 customers_per_company: 100,
290 materials_per_company: 200,
291 assets_per_company: 50,
292 employees_per_company: 100,
293 p2p_chains: 100,
294 o2c_chains: 100,
295 generate_audit: false, audit_engagements: 5,
297 workpapers_per_engagement: 20,
298 evidence_per_workpaper: 5,
299 risks_per_engagement: 15,
300 findings_per_engagement: 8,
301 judgments_per_engagement: 10,
302 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, }
314 }
315}
316
317#[derive(Debug, Clone, Default)]
319pub struct MasterDataSnapshot {
320 pub vendors: Vec<Vendor>,
322 pub customers: Vec<Customer>,
324 pub materials: Vec<Material>,
326 pub assets: Vec<FixedAsset>,
328 pub employees: Vec<Employee>,
330}
331
332#[derive(Debug, Clone)]
334pub struct HypergraphExportInfo {
335 pub node_count: usize,
337 pub edge_count: usize,
339 pub hyperedge_count: usize,
341 pub output_path: PathBuf,
343}
344
345#[derive(Debug, Clone, Default)]
347pub struct DocumentFlowSnapshot {
348 pub p2p_chains: Vec<P2PDocumentChain>,
350 pub o2c_chains: Vec<O2CDocumentChain>,
352 pub purchase_orders: Vec<documents::PurchaseOrder>,
354 pub goods_receipts: Vec<documents::GoodsReceipt>,
356 pub vendor_invoices: Vec<documents::VendorInvoice>,
358 pub sales_orders: Vec<documents::SalesOrder>,
360 pub deliveries: Vec<documents::Delivery>,
362 pub customer_invoices: Vec<documents::CustomerInvoice>,
364 pub payments: Vec<documents::Payment>,
366}
367
368#[derive(Debug, Clone, Default)]
370pub struct SubledgerSnapshot {
371 pub ap_invoices: Vec<APInvoice>,
373 pub ar_invoices: Vec<ARInvoice>,
375 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
377 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
379 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
381}
382
383#[derive(Debug, Clone, Default)]
385pub struct OcpmSnapshot {
386 pub event_log: Option<OcpmEventLog>,
388 pub event_count: usize,
390 pub object_count: usize,
392 pub case_count: usize,
394}
395
396#[derive(Debug, Clone, Default)]
398pub struct AuditSnapshot {
399 pub engagements: Vec<AuditEngagement>,
401 pub workpapers: Vec<Workpaper>,
403 pub evidence: Vec<AuditEvidence>,
405 pub risk_assessments: Vec<RiskAssessment>,
407 pub findings: Vec<AuditFinding>,
409 pub judgments: Vec<ProfessionalJudgment>,
411}
412
413#[derive(Debug, Clone, Default)]
415pub struct BankingSnapshot {
416 pub customers: Vec<BankingCustomer>,
418 pub accounts: Vec<BankAccount>,
420 pub transactions: Vec<BankTransaction>,
422 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
424 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
426 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
428 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
430 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
432 pub suspicious_count: usize,
434 pub scenario_count: usize,
436}
437
438#[derive(Debug, Clone, Default, Serialize)]
440pub struct GraphExportSnapshot {
441 pub exported: bool,
443 pub graph_count: usize,
445 pub exports: HashMap<String, GraphExportInfo>,
447}
448
449#[derive(Debug, Clone, Serialize)]
451pub struct GraphExportInfo {
452 pub name: String,
454 pub format: String,
456 pub output_path: PathBuf,
458 pub node_count: usize,
460 pub edge_count: usize,
462}
463
464#[derive(Debug, Clone, Default)]
466pub struct SourcingSnapshot {
467 pub spend_analyses: Vec<SpendAnalysis>,
469 pub sourcing_projects: Vec<SourcingProject>,
471 pub qualifications: Vec<SupplierQualification>,
473 pub rfx_events: Vec<RfxEvent>,
475 pub bids: Vec<SupplierBid>,
477 pub bid_evaluations: Vec<BidEvaluation>,
479 pub contracts: Vec<ProcurementContract>,
481 pub catalog_items: Vec<CatalogItem>,
483 pub scorecards: Vec<SupplierScorecard>,
485}
486
487#[derive(Debug, Clone, Serialize, Deserialize)]
489pub struct PeriodTrialBalance {
490 pub fiscal_year: u16,
492 pub fiscal_period: u8,
494 pub period_start: NaiveDate,
496 pub period_end: NaiveDate,
498 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
500}
501
502#[derive(Debug, Clone, Default)]
504pub struct FinancialReportingSnapshot {
505 pub financial_statements: Vec<FinancialStatement>,
507 pub bank_reconciliations: Vec<BankReconciliation>,
509 pub trial_balances: Vec<PeriodTrialBalance>,
511}
512
513#[derive(Debug, Clone, Default)]
515pub struct HrSnapshot {
516 pub payroll_runs: Vec<PayrollRun>,
518 pub payroll_line_items: Vec<PayrollLineItem>,
520 pub time_entries: Vec<TimeEntry>,
522 pub expense_reports: Vec<ExpenseReport>,
524 pub payroll_run_count: usize,
526 pub payroll_line_item_count: usize,
528 pub time_entry_count: usize,
530 pub expense_report_count: usize,
532}
533
534#[derive(Debug, Clone, Default)]
536pub struct AccountingStandardsSnapshot {
537 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
539 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
541 pub revenue_contract_count: usize,
543 pub impairment_test_count: usize,
545}
546
547#[derive(Debug, Clone, Default)]
549pub struct ManufacturingSnapshot {
550 pub production_orders: Vec<ProductionOrder>,
552 pub quality_inspections: Vec<QualityInspection>,
554 pub cycle_counts: Vec<CycleCount>,
556 pub production_order_count: usize,
558 pub quality_inspection_count: usize,
560 pub cycle_count_count: usize,
562}
563
564#[derive(Debug, Clone, Default)]
566pub struct SalesKpiBudgetsSnapshot {
567 pub sales_quotes: Vec<SalesQuote>,
569 pub kpis: Vec<ManagementKpi>,
571 pub budgets: Vec<Budget>,
573 pub sales_quote_count: usize,
575 pub kpi_count: usize,
577 pub budget_line_count: usize,
579}
580
581#[derive(Debug, Clone, Default)]
583pub struct AnomalyLabels {
584 pub labels: Vec<LabeledAnomaly>,
586 pub summary: Option<AnomalySummary>,
588 pub by_type: HashMap<String, usize>,
590}
591
592#[derive(Debug, Clone, Default)]
594pub struct BalanceValidationResult {
595 pub validated: bool,
597 pub is_balanced: bool,
599 pub entries_processed: u64,
601 pub total_debits: rust_decimal::Decimal,
603 pub total_credits: rust_decimal::Decimal,
605 pub accounts_tracked: usize,
607 pub companies_tracked: usize,
609 pub validation_errors: Vec<ValidationError>,
611 pub has_unbalanced_entries: bool,
613}
614
615#[derive(Debug, Clone, Default)]
617pub struct TaxSnapshot {
618 pub jurisdictions: Vec<TaxJurisdiction>,
620 pub codes: Vec<TaxCode>,
622 pub tax_lines: Vec<TaxLine>,
624 pub tax_returns: Vec<TaxReturn>,
626 pub tax_provisions: Vec<TaxProvision>,
628 pub withholding_records: Vec<WithholdingTaxRecord>,
630 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
632 pub jurisdiction_count: usize,
634 pub code_count: usize,
636}
637
638#[derive(Debug, Clone, Default, Serialize, Deserialize)]
640pub struct IntercompanySnapshot {
641 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
643 pub seller_journal_entries: Vec<JournalEntry>,
645 pub buyer_journal_entries: Vec<JournalEntry>,
647 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
649 pub matched_pair_count: usize,
651 pub elimination_entry_count: usize,
653 pub match_rate: f64,
655}
656
657#[derive(Debug, Clone, Default)]
659pub struct EsgSnapshot {
660 pub emissions: Vec<EmissionRecord>,
662 pub energy: Vec<EnergyConsumption>,
664 pub water: Vec<WaterUsage>,
666 pub waste: Vec<WasteRecord>,
668 pub diversity: Vec<WorkforceDiversityMetric>,
670 pub pay_equity: Vec<PayEquityMetric>,
672 pub safety_incidents: Vec<SafetyIncident>,
674 pub safety_metrics: Vec<SafetyMetric>,
676 pub governance: Vec<GovernanceMetric>,
678 pub supplier_assessments: Vec<SupplierEsgAssessment>,
680 pub materiality: Vec<MaterialityAssessment>,
682 pub disclosures: Vec<EsgDisclosure>,
684 pub climate_scenarios: Vec<ClimateScenario>,
686 pub anomaly_labels: Vec<EsgAnomalyLabel>,
688 pub emission_count: usize,
690 pub disclosure_count: usize,
692}
693
694#[derive(Debug, Clone, Default)]
696pub struct TreasurySnapshot {
697 pub cash_positions: Vec<CashPosition>,
699 pub cash_forecasts: Vec<CashForecast>,
701 pub cash_pools: Vec<CashPool>,
703 pub cash_pool_sweeps: Vec<CashPoolSweep>,
705 pub hedging_instruments: Vec<HedgingInstrument>,
707 pub hedge_relationships: Vec<HedgeRelationship>,
709 pub debt_instruments: Vec<DebtInstrument>,
711 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
713}
714
715#[derive(Debug, Clone, Default)]
717pub struct ProjectAccountingSnapshot {
718 pub projects: Vec<Project>,
720 pub cost_lines: Vec<ProjectCostLine>,
722 pub revenue_records: Vec<ProjectRevenue>,
724 pub earned_value_metrics: Vec<EarnedValueMetric>,
726 pub change_orders: Vec<ChangeOrder>,
728 pub milestones: Vec<ProjectMilestone>,
730}
731
732#[derive(Debug)]
734pub struct EnhancedGenerationResult {
735 pub chart_of_accounts: ChartOfAccounts,
737 pub master_data: MasterDataSnapshot,
739 pub document_flows: DocumentFlowSnapshot,
741 pub subledger: SubledgerSnapshot,
743 pub ocpm: OcpmSnapshot,
745 pub audit: AuditSnapshot,
747 pub banking: BankingSnapshot,
749 pub graph_export: GraphExportSnapshot,
751 pub sourcing: SourcingSnapshot,
753 pub financial_reporting: FinancialReportingSnapshot,
755 pub hr: HrSnapshot,
757 pub accounting_standards: AccountingStandardsSnapshot,
759 pub manufacturing: ManufacturingSnapshot,
761 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
763 pub tax: TaxSnapshot,
765 pub esg: EsgSnapshot,
767 pub treasury: TreasurySnapshot,
769 pub project_accounting: ProjectAccountingSnapshot,
771 pub intercompany: IntercompanySnapshot,
773 pub journal_entries: Vec<JournalEntry>,
775 pub anomaly_labels: AnomalyLabels,
777 pub balance_validation: BalanceValidationResult,
779 pub data_quality_stats: DataQualityStats,
781 pub statistics: EnhancedGenerationStatistics,
783 pub lineage: Option<super::lineage::LineageGraph>,
785 pub gate_result: Option<datasynth_eval::gates::GateResult>,
787 pub internal_controls: Vec<InternalControl>,
789 pub opening_balances: Vec<GeneratedOpeningBalance>,
791 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
793}
794
795#[derive(Debug, Clone, Default, Serialize, Deserialize)]
797pub struct EnhancedGenerationStatistics {
798 pub total_entries: u64,
800 pub total_line_items: u64,
802 pub accounts_count: usize,
804 pub companies_count: usize,
806 pub period_months: u32,
808 pub vendor_count: usize,
810 pub customer_count: usize,
811 pub material_count: usize,
812 pub asset_count: usize,
813 pub employee_count: usize,
814 pub p2p_chain_count: usize,
816 pub o2c_chain_count: usize,
817 pub ap_invoice_count: usize,
819 pub ar_invoice_count: usize,
820 pub ocpm_event_count: usize,
822 pub ocpm_object_count: usize,
823 pub ocpm_case_count: usize,
824 pub audit_engagement_count: usize,
826 pub audit_workpaper_count: usize,
827 pub audit_evidence_count: usize,
828 pub audit_risk_count: usize,
829 pub audit_finding_count: usize,
830 pub audit_judgment_count: usize,
831 pub anomalies_injected: usize,
833 pub data_quality_issues: usize,
835 pub banking_customer_count: usize,
837 pub banking_account_count: usize,
838 pub banking_transaction_count: usize,
839 pub banking_suspicious_count: usize,
840 pub graph_export_count: usize,
842 pub graph_node_count: usize,
843 pub graph_edge_count: usize,
844 #[serde(default)]
846 pub llm_enrichment_ms: u64,
847 #[serde(default)]
849 pub llm_vendors_enriched: usize,
850 #[serde(default)]
852 pub diffusion_enhancement_ms: u64,
853 #[serde(default)]
855 pub diffusion_samples_generated: usize,
856 #[serde(default)]
858 pub causal_generation_ms: u64,
859 #[serde(default)]
861 pub causal_samples_generated: usize,
862 #[serde(default)]
864 pub causal_validation_passed: Option<bool>,
865 #[serde(default)]
867 pub sourcing_project_count: usize,
868 #[serde(default)]
869 pub rfx_event_count: usize,
870 #[serde(default)]
871 pub bid_count: usize,
872 #[serde(default)]
873 pub contract_count: usize,
874 #[serde(default)]
875 pub catalog_item_count: usize,
876 #[serde(default)]
877 pub scorecard_count: usize,
878 #[serde(default)]
880 pub financial_statement_count: usize,
881 #[serde(default)]
882 pub bank_reconciliation_count: usize,
883 #[serde(default)]
885 pub payroll_run_count: usize,
886 #[serde(default)]
887 pub time_entry_count: usize,
888 #[serde(default)]
889 pub expense_report_count: usize,
890 #[serde(default)]
892 pub revenue_contract_count: usize,
893 #[serde(default)]
894 pub impairment_test_count: usize,
895 #[serde(default)]
897 pub production_order_count: usize,
898 #[serde(default)]
899 pub quality_inspection_count: usize,
900 #[serde(default)]
901 pub cycle_count_count: usize,
902 #[serde(default)]
904 pub sales_quote_count: usize,
905 #[serde(default)]
906 pub kpi_count: usize,
907 #[serde(default)]
908 pub budget_line_count: usize,
909 #[serde(default)]
911 pub tax_jurisdiction_count: usize,
912 #[serde(default)]
913 pub tax_code_count: usize,
914 #[serde(default)]
916 pub esg_emission_count: usize,
917 #[serde(default)]
918 pub esg_disclosure_count: usize,
919 #[serde(default)]
921 pub ic_matched_pair_count: usize,
922 #[serde(default)]
923 pub ic_elimination_count: usize,
924 #[serde(default)]
926 pub ic_transaction_count: usize,
927 #[serde(default)]
929 pub fa_subledger_count: usize,
930 #[serde(default)]
932 pub inventory_subledger_count: usize,
933 #[serde(default)]
935 pub treasury_debt_instrument_count: usize,
936 #[serde(default)]
938 pub treasury_hedging_instrument_count: usize,
939 #[serde(default)]
941 pub project_count: usize,
942 #[serde(default)]
944 pub project_change_order_count: usize,
945 #[serde(default)]
947 pub tax_provision_count: usize,
948 #[serde(default)]
950 pub opening_balance_count: usize,
951 #[serde(default)]
953 pub subledger_reconciliation_count: usize,
954 #[serde(default)]
956 pub tax_line_count: usize,
957 #[serde(default)]
959 pub project_cost_line_count: usize,
960 #[serde(default)]
962 pub cash_position_count: usize,
963}
964
965pub struct EnhancedOrchestrator {
967 config: GeneratorConfig,
968 phase_config: PhaseConfig,
969 coa: Option<Arc<ChartOfAccounts>>,
970 master_data: MasterDataSnapshot,
971 seed: u64,
972 multi_progress: Option<MultiProgress>,
973 resource_guard: ResourceGuard,
975 output_path: Option<PathBuf>,
977 copula_generators: Vec<CopulaGeneratorSpec>,
979 country_pack_registry: datasynth_core::CountryPackRegistry,
981}
982
983impl EnhancedOrchestrator {
984 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
986 datasynth_config::validate_config(&config)?;
987
988 let seed = config.global.seed.unwrap_or_else(rand::random);
989
990 let resource_guard = Self::build_resource_guard(&config, None);
992
993 let country_pack_registry = match &config.country_packs {
995 Some(cp) => {
996 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
997 .map_err(|e| SynthError::config(e.to_string()))?
998 }
999 None => datasynth_core::CountryPackRegistry::builtin_only()
1000 .map_err(|e| SynthError::config(e.to_string()))?,
1001 };
1002
1003 Ok(Self {
1004 config,
1005 phase_config,
1006 coa: None,
1007 master_data: MasterDataSnapshot::default(),
1008 seed,
1009 multi_progress: None,
1010 resource_guard,
1011 output_path: None,
1012 copula_generators: Vec::new(),
1013 country_pack_registry,
1014 })
1015 }
1016
1017 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1019 Self::new(config, PhaseConfig::default())
1020 }
1021
1022 pub fn with_progress(mut self, show: bool) -> Self {
1024 self.phase_config.show_progress = show;
1025 if show {
1026 self.multi_progress = Some(MultiProgress::new());
1027 }
1028 self
1029 }
1030
1031 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1033 let path = path.into();
1034 self.output_path = Some(path.clone());
1035 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1037 self
1038 }
1039
1040 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1042 &self.country_pack_registry
1043 }
1044
1045 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1047 self.country_pack_registry.get_by_str(country)
1048 }
1049
1050 fn primary_country_code(&self) -> &str {
1053 self.config
1054 .companies
1055 .first()
1056 .map(|c| c.country.as_str())
1057 .unwrap_or("US")
1058 }
1059
1060 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1062 self.country_pack_for(self.primary_country_code())
1063 }
1064
1065 pub fn has_copulas(&self) -> bool {
1070 !self.copula_generators.is_empty()
1071 }
1072
1073 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1079 &self.copula_generators
1080 }
1081
1082 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1086 &mut self.copula_generators
1087 }
1088
1089 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1093 self.copula_generators
1094 .iter_mut()
1095 .find(|c| c.name == copula_name)
1096 .map(|c| c.generator.sample())
1097 }
1098
1099 pub fn from_fingerprint(
1122 fingerprint_path: &std::path::Path,
1123 phase_config: PhaseConfig,
1124 scale: f64,
1125 ) -> SynthResult<Self> {
1126 info!("Loading fingerprint from: {}", fingerprint_path.display());
1127
1128 let reader = FingerprintReader::new();
1130 let fingerprint = reader
1131 .read_from_file(fingerprint_path)
1132 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1133
1134 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1135 }
1136
1137 pub fn from_fingerprint_data(
1144 fingerprint: Fingerprint,
1145 phase_config: PhaseConfig,
1146 scale: f64,
1147 ) -> SynthResult<Self> {
1148 info!(
1149 "Synthesizing config from fingerprint (version: {}, tables: {})",
1150 fingerprint.manifest.version,
1151 fingerprint.schema.tables.len()
1152 );
1153
1154 let seed: u64 = rand::random();
1156
1157 let options = SynthesisOptions {
1159 scale,
1160 seed: Some(seed),
1161 preserve_correlations: true,
1162 inject_anomalies: true,
1163 };
1164 let synthesizer = ConfigSynthesizer::with_options(options);
1165
1166 let synthesis_result = synthesizer
1168 .synthesize_full(&fingerprint, seed)
1169 .map_err(|e| {
1170 SynthError::config(format!(
1171 "Failed to synthesize config from fingerprint: {}",
1172 e
1173 ))
1174 })?;
1175
1176 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1178 Self::base_config_for_industry(industry)
1179 } else {
1180 Self::base_config_for_industry("manufacturing")
1181 };
1182
1183 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1185
1186 info!(
1188 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1189 fingerprint.schema.tables.len(),
1190 scale,
1191 synthesis_result.copula_generators.len()
1192 );
1193
1194 if !synthesis_result.copula_generators.is_empty() {
1195 for spec in &synthesis_result.copula_generators {
1196 info!(
1197 " Copula '{}' for table '{}': {} columns",
1198 spec.name,
1199 spec.table,
1200 spec.columns.len()
1201 );
1202 }
1203 }
1204
1205 let mut orchestrator = Self::new(config, phase_config)?;
1207
1208 orchestrator.copula_generators = synthesis_result.copula_generators;
1210
1211 Ok(orchestrator)
1212 }
1213
1214 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1216 use datasynth_config::presets::create_preset;
1217 use datasynth_config::TransactionVolume;
1218 use datasynth_core::models::{CoAComplexity, IndustrySector};
1219
1220 let sector = match industry.to_lowercase().as_str() {
1221 "manufacturing" => IndustrySector::Manufacturing,
1222 "retail" => IndustrySector::Retail,
1223 "financial" | "financial_services" => IndustrySector::FinancialServices,
1224 "healthcare" => IndustrySector::Healthcare,
1225 "technology" | "tech" => IndustrySector::Technology,
1226 _ => IndustrySector::Manufacturing,
1227 };
1228
1229 create_preset(
1231 sector,
1232 1, 12, CoAComplexity::Medium,
1235 TransactionVolume::TenK,
1236 )
1237 }
1238
1239 fn apply_config_patch(
1241 mut config: GeneratorConfig,
1242 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1243 ) -> GeneratorConfig {
1244 use datasynth_fingerprint::synthesis::ConfigValue;
1245
1246 for (key, value) in patch.values() {
1247 match (key.as_str(), value) {
1248 ("transactions.count", ConfigValue::Integer(n)) => {
1251 info!(
1252 "Fingerprint suggests {} transactions (apply via company volumes)",
1253 n
1254 );
1255 }
1256 ("global.period_months", ConfigValue::Integer(n)) => {
1257 config.global.period_months = *n as u32;
1258 }
1259 ("global.start_date", ConfigValue::String(s)) => {
1260 config.global.start_date = s.clone();
1261 }
1262 ("global.seed", ConfigValue::Integer(n)) => {
1263 config.global.seed = Some(*n as u64);
1264 }
1265 ("fraud.enabled", ConfigValue::Bool(b)) => {
1266 config.fraud.enabled = *b;
1267 }
1268 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1269 config.fraud.fraud_rate = *f;
1270 }
1271 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1272 config.data_quality.enabled = *b;
1273 }
1274 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1276 config.fraud.enabled = *b;
1277 }
1278 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1279 config.fraud.fraud_rate = *f;
1280 }
1281 _ => {
1282 debug!("Ignoring unknown config patch key: {}", key);
1283 }
1284 }
1285 }
1286
1287 config
1288 }
1289
1290 fn build_resource_guard(
1292 config: &GeneratorConfig,
1293 output_path: Option<PathBuf>,
1294 ) -> ResourceGuard {
1295 let mut builder = ResourceGuardBuilder::new();
1296
1297 if config.global.memory_limit_mb > 0 {
1299 builder = builder.memory_limit(config.global.memory_limit_mb);
1300 }
1301
1302 if let Some(path) = output_path {
1304 builder = builder.output_path(path).min_free_disk(100); }
1306
1307 builder = builder.conservative();
1309
1310 builder.build()
1311 }
1312
1313 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1318 self.resource_guard.check()
1319 }
1320
1321 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1323 let level = self.resource_guard.check()?;
1324
1325 if level != DegradationLevel::Normal {
1326 warn!(
1327 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1328 phase,
1329 level,
1330 self.resource_guard.current_memory_mb(),
1331 self.resource_guard.available_disk_mb()
1332 );
1333 }
1334
1335 Ok(level)
1336 }
1337
1338 fn get_degradation_actions(&self) -> DegradationActions {
1340 self.resource_guard.get_actions()
1341 }
1342
1343 fn check_memory_limit(&self) -> SynthResult<()> {
1345 self.check_resources()?;
1346 Ok(())
1347 }
1348
1349 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1351 info!("Starting enhanced generation workflow");
1352 info!(
1353 "Config: industry={:?}, period_months={}, companies={}",
1354 self.config.global.industry,
1355 self.config.global.period_months,
1356 self.config.companies.len()
1357 );
1358
1359 let initial_level = self.check_resources_with_log("initial")?;
1361 if initial_level == DegradationLevel::Emergency {
1362 return Err(SynthError::resource(
1363 "Insufficient resources to start generation",
1364 ));
1365 }
1366
1367 let mut stats = EnhancedGenerationStatistics {
1368 companies_count: self.config.companies.len(),
1369 period_months: self.config.global.period_months,
1370 ..Default::default()
1371 };
1372
1373 let coa = self.phase_chart_of_accounts(&mut stats)?;
1375
1376 self.phase_master_data(&mut stats)?;
1378
1379 let (mut document_flows, subledger, fa_journal_entries) =
1381 self.phase_document_flows(&mut stats)?;
1382
1383 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1385
1386 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1394
1395 if !fa_journal_entries.is_empty() {
1397 debug!(
1398 "Appending {} FA acquisition JEs to main entries",
1399 fa_journal_entries.len()
1400 );
1401 entries.extend(fa_journal_entries);
1402 }
1403
1404 let actions = self.get_degradation_actions();
1406
1407 let sourcing = self.phase_sourcing_data(&mut stats)?;
1409
1410 if !sourcing.contracts.is_empty() {
1412 let mut linked_count = 0usize;
1413 for chain in &mut document_flows.p2p_chains {
1414 if chain.purchase_order.contract_id.is_none() {
1415 if let Some(contract) = sourcing
1416 .contracts
1417 .iter()
1418 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1419 {
1420 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1421 linked_count += 1;
1422 }
1423 }
1424 }
1425 if linked_count > 0 {
1426 debug!(
1427 "Linked {} purchase orders to S2C contracts by vendor match",
1428 linked_count
1429 );
1430 }
1431 }
1432
1433 let intercompany = self.phase_intercompany(&mut stats)?;
1435
1436 if !intercompany.seller_journal_entries.is_empty()
1438 || !intercompany.buyer_journal_entries.is_empty()
1439 {
1440 let ic_je_count = intercompany.seller_journal_entries.len()
1441 + intercompany.buyer_journal_entries.len();
1442 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1443 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1444 debug!(
1445 "Appended {} IC journal entries to main entries",
1446 ic_je_count
1447 );
1448 }
1449
1450 let hr = self.phase_hr_data(&mut stats)?;
1452
1453 if !hr.payroll_runs.is_empty() {
1455 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1456 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1457 entries.extend(payroll_jes);
1458 }
1459
1460 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1462
1463 if !manufacturing_snap.production_orders.is_empty() {
1465 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1466 debug!("Generated {} JEs from production orders", mfg_jes.len());
1467 entries.extend(mfg_jes);
1468 }
1469
1470 if !entries.is_empty() {
1473 stats.total_entries = entries.len() as u64;
1474 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1475 debug!(
1476 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1477 stats.total_entries, stats.total_line_items
1478 );
1479 }
1480
1481 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1483
1484 let balance_validation = self.phase_balance_validation(&entries)?;
1486
1487 let subledger_reconciliation =
1489 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1490
1491 let data_quality_stats =
1493 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1494
1495 let audit = self.phase_audit_data(&entries, &mut stats)?;
1497
1498 let banking = self.phase_banking_data(&mut stats)?;
1500
1501 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1503
1504 self.phase_llm_enrichment(&mut stats);
1506
1507 self.phase_diffusion_enhancement(&mut stats);
1509
1510 self.phase_causal_overlay(&mut stats);
1512
1513 let financial_reporting =
1515 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1516
1517 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1519
1520 let ocpm = self.phase_ocpm_events(
1522 &document_flows,
1523 &sourcing,
1524 &hr,
1525 &manufacturing_snap,
1526 &banking,
1527 &audit,
1528 &financial_reporting,
1529 &mut stats,
1530 )?;
1531
1532 let sales_kpi_budgets =
1534 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1535
1536 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1538
1539 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1541
1542 let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1544
1545 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1547
1548 self.phase_hypergraph_export(
1550 &coa,
1551 &entries,
1552 &document_flows,
1553 &sourcing,
1554 &hr,
1555 &manufacturing_snap,
1556 &banking,
1557 &audit,
1558 &financial_reporting,
1559 &ocpm,
1560 &mut stats,
1561 )?;
1562
1563 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1566 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1567 }
1568
1569 if self.config.streaming.enabled {
1571 info!("Note: streaming config is enabled but batch mode does not use it");
1572 }
1573 if self.config.vendor_network.enabled {
1574 debug!("Vendor network config available; relationship graph generation is partial");
1575 }
1576 if self.config.customer_segmentation.enabled {
1577 debug!("Customer segmentation config available; segment-aware generation is partial");
1578 }
1579
1580 let resource_stats = self.resource_guard.stats();
1582 info!(
1583 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1584 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1585 resource_stats.disk.estimated_bytes_written,
1586 resource_stats.degradation_level
1587 );
1588
1589 let lineage = self.build_lineage_graph();
1591
1592 let gate_result = if self.config.quality_gates.enabled {
1594 let profile_name = &self.config.quality_gates.profile;
1595 match datasynth_eval::gates::get_profile(profile_name) {
1596 Some(profile) => {
1597 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1599
1600 if balance_validation.validated {
1602 eval.coherence.balance =
1603 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1604 equation_balanced: balance_validation.is_balanced,
1605 max_imbalance: (balance_validation.total_debits
1606 - balance_validation.total_credits)
1607 .abs(),
1608 periods_evaluated: 1,
1609 periods_imbalanced: if balance_validation.is_balanced {
1610 0
1611 } else {
1612 1
1613 },
1614 period_results: Vec::new(),
1615 companies_evaluated: self.config.companies.len(),
1616 });
1617 }
1618
1619 eval.coherence.passes = balance_validation.is_balanced;
1621 if !balance_validation.is_balanced {
1622 eval.coherence
1623 .failures
1624 .push("Balance sheet equation not satisfied".to_string());
1625 }
1626
1627 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1629 eval.statistical.passes = !entries.is_empty();
1630
1631 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1634
1635 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1636 info!(
1637 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1638 profile_name, result.gates_passed, result.gates_total, result.summary
1639 );
1640 Some(result)
1641 }
1642 None => {
1643 warn!(
1644 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1645 profile_name
1646 );
1647 None
1648 }
1649 }
1650 } else {
1651 None
1652 };
1653
1654 let internal_controls = if self.config.internal_controls.enabled {
1656 InternalControl::standard_controls()
1657 } else {
1658 Vec::new()
1659 };
1660
1661 Ok(EnhancedGenerationResult {
1662 chart_of_accounts: (*coa).clone(),
1663 master_data: self.master_data.clone(),
1664 document_flows,
1665 subledger,
1666 ocpm,
1667 audit,
1668 banking,
1669 graph_export,
1670 sourcing,
1671 financial_reporting,
1672 hr,
1673 accounting_standards,
1674 manufacturing: manufacturing_snap,
1675 sales_kpi_budgets,
1676 tax,
1677 esg: esg_snap,
1678 treasury,
1679 project_accounting,
1680 intercompany,
1681 journal_entries: entries,
1682 anomaly_labels,
1683 balance_validation,
1684 data_quality_stats,
1685 statistics: stats,
1686 lineage: Some(lineage),
1687 gate_result,
1688 internal_controls,
1689 opening_balances,
1690 subledger_reconciliation,
1691 })
1692 }
1693
1694 fn phase_chart_of_accounts(
1700 &mut self,
1701 stats: &mut EnhancedGenerationStatistics,
1702 ) -> SynthResult<Arc<ChartOfAccounts>> {
1703 info!("Phase 1: Generating Chart of Accounts");
1704 let coa = self.generate_coa()?;
1705 stats.accounts_count = coa.account_count();
1706 info!(
1707 "Chart of Accounts generated: {} accounts",
1708 stats.accounts_count
1709 );
1710 self.check_resources_with_log("post-coa")?;
1711 Ok(coa)
1712 }
1713
1714 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1716 if self.phase_config.generate_master_data {
1717 info!("Phase 2: Generating Master Data");
1718 self.generate_master_data()?;
1719 stats.vendor_count = self.master_data.vendors.len();
1720 stats.customer_count = self.master_data.customers.len();
1721 stats.material_count = self.master_data.materials.len();
1722 stats.asset_count = self.master_data.assets.len();
1723 stats.employee_count = self.master_data.employees.len();
1724 info!(
1725 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1726 stats.vendor_count, stats.customer_count, stats.material_count,
1727 stats.asset_count, stats.employee_count
1728 );
1729 self.check_resources_with_log("post-master-data")?;
1730 } else {
1731 debug!("Phase 2: Skipped (master data generation disabled)");
1732 }
1733 Ok(())
1734 }
1735
1736 fn phase_document_flows(
1738 &mut self,
1739 stats: &mut EnhancedGenerationStatistics,
1740 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1741 let mut document_flows = DocumentFlowSnapshot::default();
1742 let mut subledger = SubledgerSnapshot::default();
1743
1744 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1745 info!("Phase 3: Generating Document Flows");
1746 self.generate_document_flows(&mut document_flows)?;
1747 stats.p2p_chain_count = document_flows.p2p_chains.len();
1748 stats.o2c_chain_count = document_flows.o2c_chains.len();
1749 info!(
1750 "Document flows generated: {} P2P chains, {} O2C chains",
1751 stats.p2p_chain_count, stats.o2c_chain_count
1752 );
1753
1754 debug!("Phase 3b: Linking document flows to subledgers");
1756 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1757 stats.ap_invoice_count = subledger.ap_invoices.len();
1758 stats.ar_invoice_count = subledger.ar_invoices.len();
1759 debug!(
1760 "Subledgers linked: {} AP invoices, {} AR invoices",
1761 stats.ap_invoice_count, stats.ar_invoice_count
1762 );
1763
1764 self.check_resources_with_log("post-document-flows")?;
1765 } else {
1766 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1767 }
1768
1769 let mut fa_journal_entries = Vec::new();
1771 if !self.master_data.assets.is_empty() {
1772 debug!("Generating FA subledger records");
1773 let company_code = self
1774 .config
1775 .companies
1776 .first()
1777 .map(|c| c.code.as_str())
1778 .unwrap_or("1000");
1779 let currency = self
1780 .config
1781 .companies
1782 .first()
1783 .map(|c| c.currency.as_str())
1784 .unwrap_or("USD");
1785
1786 let mut fa_gen = datasynth_generators::FAGenerator::new(
1787 datasynth_generators::FAGeneratorConfig::default(),
1788 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1789 );
1790
1791 for asset in &self.master_data.assets {
1792 let (record, je) = fa_gen.generate_asset_acquisition(
1793 company_code,
1794 &format!("{:?}", asset.asset_class),
1795 &asset.description,
1796 asset.acquisition_date,
1797 currency,
1798 asset.cost_center.as_deref(),
1799 );
1800 subledger.fa_records.push(record);
1801 fa_journal_entries.push(je);
1802 }
1803
1804 stats.fa_subledger_count = subledger.fa_records.len();
1805 debug!(
1806 "FA subledger records generated: {} (with {} acquisition JEs)",
1807 stats.fa_subledger_count,
1808 fa_journal_entries.len()
1809 );
1810 }
1811
1812 if !self.master_data.materials.is_empty() {
1814 debug!("Generating Inventory subledger records");
1815 let first_company = self.config.companies.first();
1816 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1817 let inv_currency = first_company
1818 .map(|c| c.currency.clone())
1819 .unwrap_or_else(|| "USD".to_string());
1820
1821 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1822 datasynth_generators::InventoryGeneratorConfig::default(),
1823 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1824 inv_currency.clone(),
1825 );
1826
1827 for (i, material) in self.master_data.materials.iter().enumerate() {
1828 let plant = format!("PLANT{:02}", (i % 3) + 1);
1829 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1830 let initial_qty = rust_decimal::Decimal::from(
1831 material
1832 .safety_stock
1833 .to_string()
1834 .parse::<i64>()
1835 .unwrap_or(100),
1836 );
1837
1838 let position = inv_gen.generate_position(
1839 company_code,
1840 &plant,
1841 &storage_loc,
1842 &material.material_id,
1843 &material.description,
1844 initial_qty,
1845 Some(material.standard_cost),
1846 &inv_currency,
1847 );
1848 subledger.inventory_positions.push(position);
1849 }
1850
1851 stats.inventory_subledger_count = subledger.inventory_positions.len();
1852 debug!(
1853 "Inventory subledger records generated: {}",
1854 stats.inventory_subledger_count
1855 );
1856 }
1857
1858 Ok((document_flows, subledger, fa_journal_entries))
1859 }
1860
1861 #[allow(clippy::too_many_arguments)]
1863 fn phase_ocpm_events(
1864 &mut self,
1865 document_flows: &DocumentFlowSnapshot,
1866 sourcing: &SourcingSnapshot,
1867 hr: &HrSnapshot,
1868 manufacturing: &ManufacturingSnapshot,
1869 banking: &BankingSnapshot,
1870 audit: &AuditSnapshot,
1871 financial_reporting: &FinancialReportingSnapshot,
1872 stats: &mut EnhancedGenerationStatistics,
1873 ) -> SynthResult<OcpmSnapshot> {
1874 if self.phase_config.generate_ocpm_events {
1875 info!("Phase 3c: Generating OCPM Events");
1876 let ocpm_snapshot = self.generate_ocpm_events(
1877 document_flows,
1878 sourcing,
1879 hr,
1880 manufacturing,
1881 banking,
1882 audit,
1883 financial_reporting,
1884 )?;
1885 stats.ocpm_event_count = ocpm_snapshot.event_count;
1886 stats.ocpm_object_count = ocpm_snapshot.object_count;
1887 stats.ocpm_case_count = ocpm_snapshot.case_count;
1888 info!(
1889 "OCPM events generated: {} events, {} objects, {} cases",
1890 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1891 );
1892 self.check_resources_with_log("post-ocpm")?;
1893 Ok(ocpm_snapshot)
1894 } else {
1895 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1896 Ok(OcpmSnapshot::default())
1897 }
1898 }
1899
1900 fn phase_journal_entries(
1902 &mut self,
1903 coa: &Arc<ChartOfAccounts>,
1904 document_flows: &DocumentFlowSnapshot,
1905 _stats: &mut EnhancedGenerationStatistics,
1906 ) -> SynthResult<Vec<JournalEntry>> {
1907 let mut entries = Vec::new();
1908
1909 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1911 debug!("Phase 4a: Generating JEs from document flows");
1912 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1913 debug!("Generated {} JEs from document flows", flow_entries.len());
1914 entries.extend(flow_entries);
1915 }
1916
1917 if self.phase_config.generate_journal_entries {
1919 info!("Phase 4: Generating Journal Entries");
1920 let je_entries = self.generate_journal_entries(coa)?;
1921 info!("Generated {} standalone journal entries", je_entries.len());
1922 entries.extend(je_entries);
1923 } else {
1924 debug!("Phase 4: Skipped (journal entry generation disabled)");
1925 }
1926
1927 if !entries.is_empty() {
1928 self.check_resources_with_log("post-journal-entries")?;
1931 }
1932
1933 Ok(entries)
1934 }
1935
1936 fn phase_anomaly_injection(
1938 &mut self,
1939 entries: &mut [JournalEntry],
1940 actions: &DegradationActions,
1941 stats: &mut EnhancedGenerationStatistics,
1942 ) -> SynthResult<AnomalyLabels> {
1943 if self.phase_config.inject_anomalies
1944 && !entries.is_empty()
1945 && !actions.skip_anomaly_injection
1946 {
1947 info!("Phase 5: Injecting Anomalies");
1948 let result = self.inject_anomalies(entries)?;
1949 stats.anomalies_injected = result.labels.len();
1950 info!("Injected {} anomalies", stats.anomalies_injected);
1951 self.check_resources_with_log("post-anomaly-injection")?;
1952 Ok(result)
1953 } else if actions.skip_anomaly_injection {
1954 warn!("Phase 5: Skipped due to resource degradation");
1955 Ok(AnomalyLabels::default())
1956 } else {
1957 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1958 Ok(AnomalyLabels::default())
1959 }
1960 }
1961
1962 fn phase_balance_validation(
1964 &mut self,
1965 entries: &[JournalEntry],
1966 ) -> SynthResult<BalanceValidationResult> {
1967 if self.phase_config.validate_balances && !entries.is_empty() {
1968 debug!("Phase 6: Validating Balances");
1969 let balance_validation = self.validate_journal_entries(entries)?;
1970 if balance_validation.is_balanced {
1971 debug!("Balance validation passed");
1972 } else {
1973 warn!(
1974 "Balance validation found {} errors",
1975 balance_validation.validation_errors.len()
1976 );
1977 }
1978 Ok(balance_validation)
1979 } else {
1980 Ok(BalanceValidationResult::default())
1981 }
1982 }
1983
1984 fn phase_data_quality_injection(
1986 &mut self,
1987 entries: &mut [JournalEntry],
1988 actions: &DegradationActions,
1989 stats: &mut EnhancedGenerationStatistics,
1990 ) -> SynthResult<DataQualityStats> {
1991 if self.phase_config.inject_data_quality
1992 && !entries.is_empty()
1993 && !actions.skip_data_quality
1994 {
1995 info!("Phase 7: Injecting Data Quality Variations");
1996 let dq_stats = self.inject_data_quality(entries)?;
1997 stats.data_quality_issues = dq_stats.records_with_issues;
1998 info!("Injected {} data quality issues", stats.data_quality_issues);
1999 self.check_resources_with_log("post-data-quality")?;
2000 Ok(dq_stats)
2001 } else if actions.skip_data_quality {
2002 warn!("Phase 7: Skipped due to resource degradation");
2003 Ok(DataQualityStats::default())
2004 } else {
2005 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2006 Ok(DataQualityStats::default())
2007 }
2008 }
2009
2010 fn phase_audit_data(
2012 &mut self,
2013 entries: &[JournalEntry],
2014 stats: &mut EnhancedGenerationStatistics,
2015 ) -> SynthResult<AuditSnapshot> {
2016 if self.phase_config.generate_audit {
2017 info!("Phase 8: Generating Audit Data");
2018 let audit_snapshot = self.generate_audit_data(entries)?;
2019 stats.audit_engagement_count = audit_snapshot.engagements.len();
2020 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2021 stats.audit_evidence_count = audit_snapshot.evidence.len();
2022 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2023 stats.audit_finding_count = audit_snapshot.findings.len();
2024 stats.audit_judgment_count = audit_snapshot.judgments.len();
2025 info!(
2026 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2027 stats.audit_engagement_count, stats.audit_workpaper_count,
2028 stats.audit_evidence_count, stats.audit_risk_count,
2029 stats.audit_finding_count, stats.audit_judgment_count
2030 );
2031 self.check_resources_with_log("post-audit")?;
2032 Ok(audit_snapshot)
2033 } else {
2034 debug!("Phase 8: Skipped (audit generation disabled)");
2035 Ok(AuditSnapshot::default())
2036 }
2037 }
2038
2039 fn phase_banking_data(
2041 &mut self,
2042 stats: &mut EnhancedGenerationStatistics,
2043 ) -> SynthResult<BankingSnapshot> {
2044 if self.phase_config.generate_banking && self.config.banking.enabled {
2045 info!("Phase 9: Generating Banking KYC/AML Data");
2046 let banking_snapshot = self.generate_banking_data()?;
2047 stats.banking_customer_count = banking_snapshot.customers.len();
2048 stats.banking_account_count = banking_snapshot.accounts.len();
2049 stats.banking_transaction_count = banking_snapshot.transactions.len();
2050 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2051 info!(
2052 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2053 stats.banking_customer_count, stats.banking_account_count,
2054 stats.banking_transaction_count, stats.banking_suspicious_count
2055 );
2056 self.check_resources_with_log("post-banking")?;
2057 Ok(banking_snapshot)
2058 } else {
2059 debug!("Phase 9: Skipped (banking generation disabled)");
2060 Ok(BankingSnapshot::default())
2061 }
2062 }
2063
2064 fn phase_graph_export(
2066 &mut self,
2067 entries: &[JournalEntry],
2068 coa: &Arc<ChartOfAccounts>,
2069 stats: &mut EnhancedGenerationStatistics,
2070 ) -> SynthResult<GraphExportSnapshot> {
2071 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2072 && !entries.is_empty()
2073 {
2074 info!("Phase 10: Exporting Accounting Network Graphs");
2075 match self.export_graphs(entries, coa, stats) {
2076 Ok(snapshot) => {
2077 info!(
2078 "Graph export complete: {} graphs ({} nodes, {} edges)",
2079 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2080 );
2081 Ok(snapshot)
2082 }
2083 Err(e) => {
2084 warn!("Phase 10: Graph export failed: {}", e);
2085 Ok(GraphExportSnapshot::default())
2086 }
2087 }
2088 } else {
2089 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2090 Ok(GraphExportSnapshot::default())
2091 }
2092 }
2093
2094 #[allow(clippy::too_many_arguments)]
2096 fn phase_hypergraph_export(
2097 &self,
2098 coa: &Arc<ChartOfAccounts>,
2099 entries: &[JournalEntry],
2100 document_flows: &DocumentFlowSnapshot,
2101 sourcing: &SourcingSnapshot,
2102 hr: &HrSnapshot,
2103 manufacturing: &ManufacturingSnapshot,
2104 banking: &BankingSnapshot,
2105 audit: &AuditSnapshot,
2106 financial_reporting: &FinancialReportingSnapshot,
2107 ocpm: &OcpmSnapshot,
2108 stats: &mut EnhancedGenerationStatistics,
2109 ) -> SynthResult<()> {
2110 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2111 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2112 match self.export_hypergraph(
2113 coa,
2114 entries,
2115 document_flows,
2116 sourcing,
2117 hr,
2118 manufacturing,
2119 banking,
2120 audit,
2121 financial_reporting,
2122 ocpm,
2123 stats,
2124 ) {
2125 Ok(info) => {
2126 info!(
2127 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2128 info.node_count, info.edge_count, info.hyperedge_count
2129 );
2130 }
2131 Err(e) => {
2132 warn!("Phase 10b: Hypergraph export failed: {}", e);
2133 }
2134 }
2135 } else {
2136 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2137 }
2138 Ok(())
2139 }
2140
2141 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2147 if !self.config.llm.enabled {
2148 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2149 return;
2150 }
2151
2152 info!("Phase 11: Starting LLM Enrichment");
2153 let start = std::time::Instant::now();
2154
2155 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2156 let provider = Arc::new(MockLlmProvider::new(self.seed));
2157 let enricher = VendorLlmEnricher::new(provider);
2158
2159 let industry = format!("{:?}", self.config.global.industry);
2160 let max_enrichments = self
2161 .config
2162 .llm
2163 .max_vendor_enrichments
2164 .min(self.master_data.vendors.len());
2165
2166 let mut enriched_count = 0usize;
2167 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2168 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2169 Ok(name) => {
2170 vendor.name = name;
2171 enriched_count += 1;
2172 }
2173 Err(e) => {
2174 warn!(
2175 "LLM vendor enrichment failed for {}: {}",
2176 vendor.vendor_id, e
2177 );
2178 }
2179 }
2180 }
2181
2182 enriched_count
2183 }));
2184
2185 match result {
2186 Ok(enriched_count) => {
2187 stats.llm_vendors_enriched = enriched_count;
2188 let elapsed = start.elapsed();
2189 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2190 info!(
2191 "Phase 11 complete: {} vendors enriched in {}ms",
2192 enriched_count, stats.llm_enrichment_ms
2193 );
2194 }
2195 Err(_) => {
2196 let elapsed = start.elapsed();
2197 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2198 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2199 }
2200 }
2201 }
2202
2203 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2209 if !self.config.diffusion.enabled {
2210 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2211 return;
2212 }
2213
2214 info!("Phase 12: Starting Diffusion Enhancement");
2215 let start = std::time::Instant::now();
2216
2217 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2218 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2221
2222 let diffusion_config = DiffusionConfig {
2223 n_steps: self.config.diffusion.n_steps,
2224 seed: self.seed,
2225 ..Default::default()
2226 };
2227
2228 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2229
2230 let n_samples = self.config.diffusion.sample_size;
2231 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2233
2234 samples.len()
2235 }));
2236
2237 match result {
2238 Ok(sample_count) => {
2239 stats.diffusion_samples_generated = sample_count;
2240 let elapsed = start.elapsed();
2241 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2242 info!(
2243 "Phase 12 complete: {} diffusion samples generated in {}ms",
2244 sample_count, stats.diffusion_enhancement_ms
2245 );
2246 }
2247 Err(_) => {
2248 let elapsed = start.elapsed();
2249 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2250 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2251 }
2252 }
2253 }
2254
2255 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2262 if !self.config.causal.enabled {
2263 debug!("Phase 13: Skipped (causal generation disabled)");
2264 return;
2265 }
2266
2267 info!("Phase 13: Starting Causal Overlay");
2268 let start = std::time::Instant::now();
2269
2270 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2271 let graph = match self.config.causal.template.as_str() {
2273 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2274 _ => CausalGraph::fraud_detection_template(),
2275 };
2276
2277 let scm = StructuralCausalModel::new(graph.clone())
2278 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2279
2280 let n_samples = self.config.causal.sample_size;
2281 let samples = scm
2282 .generate(n_samples, self.seed)
2283 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2284
2285 let validation_passed = if self.config.causal.validate {
2287 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2288 if report.valid {
2289 info!(
2290 "Causal validation passed: all {} checks OK",
2291 report.checks.len()
2292 );
2293 } else {
2294 warn!(
2295 "Causal validation: {} violations detected: {:?}",
2296 report.violations.len(),
2297 report.violations
2298 );
2299 }
2300 Some(report.valid)
2301 } else {
2302 None
2303 };
2304
2305 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2306 }));
2307
2308 match result {
2309 Ok(Ok((sample_count, validation_passed))) => {
2310 stats.causal_samples_generated = sample_count;
2311 stats.causal_validation_passed = validation_passed;
2312 let elapsed = start.elapsed();
2313 stats.causal_generation_ms = elapsed.as_millis() as u64;
2314 info!(
2315 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2316 sample_count, stats.causal_generation_ms, validation_passed,
2317 );
2318 }
2319 Ok(Err(e)) => {
2320 let elapsed = start.elapsed();
2321 stats.causal_generation_ms = elapsed.as_millis() as u64;
2322 warn!("Phase 13: Causal generation failed: {}", e);
2323 }
2324 Err(_) => {
2325 let elapsed = start.elapsed();
2326 stats.causal_generation_ms = elapsed.as_millis() as u64;
2327 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2328 }
2329 }
2330 }
2331
2332 fn phase_sourcing_data(
2334 &mut self,
2335 stats: &mut EnhancedGenerationStatistics,
2336 ) -> SynthResult<SourcingSnapshot> {
2337 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2338 debug!("Phase 14: Skipped (sourcing generation disabled)");
2339 return Ok(SourcingSnapshot::default());
2340 }
2341
2342 info!("Phase 14: Generating S2C Sourcing Data");
2343 let seed = self.seed;
2344
2345 let vendor_ids: Vec<String> = self
2347 .master_data
2348 .vendors
2349 .iter()
2350 .map(|v| v.vendor_id.clone())
2351 .collect();
2352 if vendor_ids.is_empty() {
2353 debug!("Phase 14: Skipped (no vendors available)");
2354 return Ok(SourcingSnapshot::default());
2355 }
2356
2357 let categories: Vec<(String, String)> = vec![
2358 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2359 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2360 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2361 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2362 ("CAT-LOG".to_string(), "Logistics".to_string()),
2363 ];
2364 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2365 .iter()
2366 .map(|(id, name)| {
2367 (
2368 id.clone(),
2369 name.clone(),
2370 rust_decimal::Decimal::from(100_000),
2371 )
2372 })
2373 .collect();
2374
2375 let company_code = self
2376 .config
2377 .companies
2378 .first()
2379 .map(|c| c.code.as_str())
2380 .unwrap_or("1000");
2381 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2382 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2383 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2384 let fiscal_year = start_date.year() as u16;
2385 let owner_ids: Vec<String> = self
2386 .master_data
2387 .employees
2388 .iter()
2389 .take(5)
2390 .map(|e| e.employee_id.clone())
2391 .collect();
2392 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2393
2394 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2396 let spend_analyses =
2397 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2398
2399 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2401 let sourcing_projects = if owner_ids.is_empty() {
2402 Vec::new()
2403 } else {
2404 project_gen.generate(
2405 company_code,
2406 &categories_with_spend,
2407 &owner_ids,
2408 start_date,
2409 self.config.global.period_months,
2410 )
2411 };
2412 stats.sourcing_project_count = sourcing_projects.len();
2413
2414 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2416 let mut qual_gen = QualificationGenerator::new(seed + 2);
2417 let qualifications = qual_gen.generate(
2418 company_code,
2419 &qual_vendor_ids,
2420 sourcing_projects.first().map(|p| p.project_id.as_str()),
2421 owner_id,
2422 start_date,
2423 );
2424
2425 let mut rfx_gen = RfxGenerator::new(seed + 3);
2427 let rfx_events: Vec<RfxEvent> = sourcing_projects
2428 .iter()
2429 .map(|proj| {
2430 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2431 rfx_gen.generate(
2432 company_code,
2433 &proj.project_id,
2434 &proj.category_id,
2435 &qualified_vids,
2436 owner_id,
2437 start_date,
2438 50000.0,
2439 )
2440 })
2441 .collect();
2442 stats.rfx_event_count = rfx_events.len();
2443
2444 let mut bid_gen = BidGenerator::new(seed + 4);
2446 let mut all_bids = Vec::new();
2447 for rfx in &rfx_events {
2448 let bidder_count = vendor_ids.len().clamp(2, 5);
2449 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2450 let bids = bid_gen.generate(rfx, &responding, start_date);
2451 all_bids.extend(bids);
2452 }
2453 stats.bid_count = all_bids.len();
2454
2455 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2457 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2458 .iter()
2459 .map(|rfx| {
2460 let rfx_bids: Vec<SupplierBid> = all_bids
2461 .iter()
2462 .filter(|b| b.rfx_id == rfx.rfx_id)
2463 .cloned()
2464 .collect();
2465 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2466 })
2467 .collect();
2468
2469 let mut contract_gen = ContractGenerator::new(seed + 6);
2471 let contracts: Vec<ProcurementContract> = bid_evaluations
2472 .iter()
2473 .zip(rfx_events.iter())
2474 .filter_map(|(eval, rfx)| {
2475 eval.ranked_bids.first().and_then(|winner| {
2476 all_bids
2477 .iter()
2478 .find(|b| b.bid_id == winner.bid_id)
2479 .map(|winning_bid| {
2480 contract_gen.generate_from_bid(
2481 winning_bid,
2482 Some(&rfx.sourcing_project_id),
2483 &rfx.category_id,
2484 owner_id,
2485 start_date,
2486 )
2487 })
2488 })
2489 })
2490 .collect();
2491 stats.contract_count = contracts.len();
2492
2493 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2495 let catalog_items = catalog_gen.generate(&contracts);
2496 stats.catalog_item_count = catalog_items.len();
2497
2498 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2500 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2501 .iter()
2502 .fold(
2503 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2504 |mut acc, c| {
2505 acc.entry(c.vendor_id.clone()).or_default().push(c);
2506 acc
2507 },
2508 )
2509 .into_iter()
2510 .collect();
2511 let scorecards = scorecard_gen.generate(
2512 company_code,
2513 &vendor_contracts,
2514 start_date,
2515 end_date,
2516 owner_id,
2517 );
2518 stats.scorecard_count = scorecards.len();
2519
2520 let mut sourcing_projects = sourcing_projects;
2523 for project in &mut sourcing_projects {
2524 project.rfx_ids = rfx_events
2526 .iter()
2527 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2528 .map(|rfx| rfx.rfx_id.clone())
2529 .collect();
2530
2531 project.contract_id = contracts
2533 .iter()
2534 .find(|c| {
2535 c.sourcing_project_id
2536 .as_deref()
2537 .is_some_and(|sp| sp == project.project_id)
2538 })
2539 .map(|c| c.contract_id.clone());
2540
2541 project.spend_analysis_id = spend_analyses
2543 .iter()
2544 .find(|sa| sa.category_id == project.category_id)
2545 .map(|sa| sa.category_id.clone());
2546 }
2547
2548 info!(
2549 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2550 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2551 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2552 );
2553 self.check_resources_with_log("post-sourcing")?;
2554
2555 Ok(SourcingSnapshot {
2556 spend_analyses,
2557 sourcing_projects,
2558 qualifications,
2559 rfx_events,
2560 bids: all_bids,
2561 bid_evaluations,
2562 contracts,
2563 catalog_items,
2564 scorecards,
2565 })
2566 }
2567
2568 fn phase_intercompany(
2570 &mut self,
2571 stats: &mut EnhancedGenerationStatistics,
2572 ) -> SynthResult<IntercompanySnapshot> {
2573 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2575 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2576 return Ok(IntercompanySnapshot::default());
2577 }
2578
2579 if self.config.companies.len() < 2 {
2581 debug!(
2582 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2583 self.config.companies.len()
2584 );
2585 return Ok(IntercompanySnapshot::default());
2586 }
2587
2588 info!("Phase 14b: Generating Intercompany Transactions");
2589
2590 let seed = self.seed;
2591 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2592 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2593 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2594
2595 let parent_code = self.config.companies[0].code.clone();
2598 let mut ownership_structure =
2599 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2600
2601 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2602 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2603 format!("REL{:03}", i + 1),
2604 parent_code.clone(),
2605 company.code.clone(),
2606 rust_decimal::Decimal::from(100), start_date,
2608 );
2609 ownership_structure.add_relationship(relationship);
2610 }
2611
2612 let tp_method = match self.config.intercompany.transfer_pricing_method {
2614 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2615 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2616 }
2617 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2618 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2619 }
2620 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2621 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2622 }
2623 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2624 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2625 }
2626 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2627 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2628 }
2629 };
2630
2631 let ic_currency = self
2633 .config
2634 .companies
2635 .first()
2636 .map(|c| c.currency.clone())
2637 .unwrap_or_else(|| "USD".to_string());
2638 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2639 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2640 transfer_pricing_method: tp_method,
2641 markup_percent: rust_decimal::Decimal::from_f64_retain(
2642 self.config.intercompany.markup_percent,
2643 )
2644 .unwrap_or(rust_decimal::Decimal::from(5)),
2645 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2646 default_currency: ic_currency,
2647 ..Default::default()
2648 };
2649
2650 let mut ic_generator = datasynth_generators::ICGenerator::new(
2652 ic_gen_config,
2653 ownership_structure.clone(),
2654 seed + 50,
2655 );
2656
2657 let transactions_per_day = 3;
2660 let matched_pairs = ic_generator.generate_transactions_for_period(
2661 start_date,
2662 end_date,
2663 transactions_per_day,
2664 );
2665
2666 let mut seller_entries = Vec::new();
2668 let mut buyer_entries = Vec::new();
2669 let fiscal_year = start_date.year();
2670
2671 for pair in &matched_pairs {
2672 let fiscal_period = pair.posting_date.month();
2673 let (seller_je, buyer_je) =
2674 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2675 seller_entries.push(seller_je);
2676 buyer_entries.push(buyer_je);
2677 }
2678
2679 let matching_config = datasynth_generators::ICMatchingConfig {
2681 base_currency: self
2682 .config
2683 .companies
2684 .first()
2685 .map(|c| c.currency.clone())
2686 .unwrap_or_else(|| "USD".to_string()),
2687 ..Default::default()
2688 };
2689 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2690 matching_engine.load_matched_pairs(&matched_pairs);
2691 let matching_result = matching_engine.run_matching(end_date);
2692
2693 let mut elimination_entries = Vec::new();
2695 if self.config.intercompany.generate_eliminations {
2696 let elim_config = datasynth_generators::EliminationConfig {
2697 consolidation_entity: "GROUP".to_string(),
2698 base_currency: self
2699 .config
2700 .companies
2701 .first()
2702 .map(|c| c.currency.clone())
2703 .unwrap_or_else(|| "USD".to_string()),
2704 ..Default::default()
2705 };
2706
2707 let mut elim_generator =
2708 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2709
2710 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2711 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2712 matching_result
2713 .matched_balances
2714 .iter()
2715 .chain(matching_result.unmatched_balances.iter())
2716 .cloned()
2717 .collect();
2718
2719 let journal = elim_generator.generate_eliminations(
2720 &fiscal_period,
2721 end_date,
2722 &all_balances,
2723 &matched_pairs,
2724 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2727
2728 elimination_entries = journal.entries.clone();
2729 }
2730
2731 let matched_pair_count = matched_pairs.len();
2732 let elimination_entry_count = elimination_entries.len();
2733 let match_rate = matching_result.match_rate;
2734
2735 stats.ic_matched_pair_count = matched_pair_count;
2736 stats.ic_elimination_count = elimination_entry_count;
2737 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2738
2739 info!(
2740 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2741 matched_pair_count,
2742 stats.ic_transaction_count,
2743 seller_entries.len(),
2744 buyer_entries.len(),
2745 elimination_entry_count,
2746 match_rate * 100.0
2747 );
2748 self.check_resources_with_log("post-intercompany")?;
2749
2750 Ok(IntercompanySnapshot {
2751 matched_pairs,
2752 seller_journal_entries: seller_entries,
2753 buyer_journal_entries: buyer_entries,
2754 elimination_entries,
2755 matched_pair_count,
2756 elimination_entry_count,
2757 match_rate,
2758 })
2759 }
2760
2761 fn phase_financial_reporting(
2763 &mut self,
2764 document_flows: &DocumentFlowSnapshot,
2765 journal_entries: &[JournalEntry],
2766 coa: &Arc<ChartOfAccounts>,
2767 stats: &mut EnhancedGenerationStatistics,
2768 ) -> SynthResult<FinancialReportingSnapshot> {
2769 let fs_enabled = self.phase_config.generate_financial_statements
2770 || self.config.financial_reporting.enabled;
2771 let br_enabled = self.phase_config.generate_bank_reconciliation;
2772
2773 if !fs_enabled && !br_enabled {
2774 debug!("Phase 15: Skipped (financial reporting disabled)");
2775 return Ok(FinancialReportingSnapshot::default());
2776 }
2777
2778 info!("Phase 15: Generating Financial Reporting Data");
2779
2780 let seed = self.seed;
2781 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2782 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2783
2784 let mut financial_statements = Vec::new();
2785 let mut bank_reconciliations = Vec::new();
2786 let mut trial_balances = Vec::new();
2787
2788 if fs_enabled {
2796 let company_code = self
2797 .config
2798 .companies
2799 .first()
2800 .map(|c| c.code.as_str())
2801 .unwrap_or("1000");
2802 let currency = self
2803 .config
2804 .companies
2805 .first()
2806 .map(|c| c.currency.as_str())
2807 .unwrap_or("USD");
2808 let has_journal_entries = !journal_entries.is_empty();
2809
2810 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2813
2814 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2816 None;
2817
2818 for period in 0..self.config.global.period_months {
2820 let period_start = start_date + chrono::Months::new(period);
2821 let period_end =
2822 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2823 let fiscal_year = period_end.year() as u16;
2824 let fiscal_period = period_end.month() as u8;
2825
2826 if has_journal_entries {
2827 let tb_entries = Self::build_cumulative_trial_balance(
2830 journal_entries,
2831 coa,
2832 company_code,
2833 start_date,
2834 period_end,
2835 fiscal_year,
2836 fiscal_period,
2837 );
2838
2839 let prior_ref = prior_cumulative_tb.as_deref();
2842 let stmts = fs_gen.generate(
2843 company_code,
2844 currency,
2845 &tb_entries,
2846 period_start,
2847 period_end,
2848 fiscal_year,
2849 fiscal_period,
2850 prior_ref,
2851 "SYS-AUTOCLOSE",
2852 );
2853
2854 for stmt in stmts {
2856 if stmt.statement_type == StatementType::CashFlowStatement {
2857 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2859 let cf_items = Self::build_cash_flow_from_trial_balances(
2860 &tb_entries,
2861 prior_ref,
2862 net_income,
2863 );
2864 financial_statements.push(FinancialStatement {
2865 cash_flow_items: cf_items,
2866 ..stmt
2867 });
2868 } else {
2869 financial_statements.push(stmt);
2870 }
2871 }
2872
2873 trial_balances.push(PeriodTrialBalance {
2875 fiscal_year,
2876 fiscal_period,
2877 period_start,
2878 period_end,
2879 entries: tb_entries.clone(),
2880 });
2881
2882 prior_cumulative_tb = Some(tb_entries);
2884 } else {
2885 let tb_entries = Self::build_trial_balance_from_entries(
2888 journal_entries,
2889 coa,
2890 company_code,
2891 fiscal_year,
2892 fiscal_period,
2893 );
2894
2895 let stmts = fs_gen.generate(
2896 company_code,
2897 currency,
2898 &tb_entries,
2899 period_start,
2900 period_end,
2901 fiscal_year,
2902 fiscal_period,
2903 None,
2904 "SYS-AUTOCLOSE",
2905 );
2906 financial_statements.extend(stmts);
2907
2908 if !tb_entries.is_empty() {
2910 trial_balances.push(PeriodTrialBalance {
2911 fiscal_year,
2912 fiscal_period,
2913 period_start,
2914 period_end,
2915 entries: tb_entries,
2916 });
2917 }
2918 }
2919 }
2920 stats.financial_statement_count = financial_statements.len();
2921 info!(
2922 "Financial statements generated: {} statements (JE-derived: {})",
2923 stats.financial_statement_count, has_journal_entries
2924 );
2925 }
2926
2927 if br_enabled && !document_flows.payments.is_empty() {
2929 let employee_ids: Vec<String> = self
2930 .master_data
2931 .employees
2932 .iter()
2933 .map(|e| e.employee_id.clone())
2934 .collect();
2935 let mut br_gen =
2936 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2937
2938 for company in &self.config.companies {
2940 let company_payments: Vec<PaymentReference> = document_flows
2941 .payments
2942 .iter()
2943 .filter(|p| p.header.company_code == company.code)
2944 .map(|p| PaymentReference {
2945 id: p.header.document_id.clone(),
2946 amount: if p.is_vendor { p.amount } else { -p.amount },
2947 date: p.header.document_date,
2948 reference: p
2949 .check_number
2950 .clone()
2951 .or_else(|| p.wire_reference.clone())
2952 .unwrap_or_else(|| p.header.document_id.clone()),
2953 })
2954 .collect();
2955
2956 if company_payments.is_empty() {
2957 continue;
2958 }
2959
2960 let bank_account_id = format!("{}-MAIN", company.code);
2961
2962 for period in 0..self.config.global.period_months {
2964 let period_start = start_date + chrono::Months::new(period);
2965 let period_end =
2966 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2967
2968 let period_payments: Vec<PaymentReference> = company_payments
2969 .iter()
2970 .filter(|p| p.date >= period_start && p.date <= period_end)
2971 .cloned()
2972 .collect();
2973
2974 let recon = br_gen.generate(
2975 &company.code,
2976 &bank_account_id,
2977 period_start,
2978 period_end,
2979 &company.currency,
2980 &period_payments,
2981 );
2982 bank_reconciliations.push(recon);
2983 }
2984 }
2985 info!(
2986 "Bank reconciliations generated: {} reconciliations",
2987 bank_reconciliations.len()
2988 );
2989 }
2990
2991 stats.bank_reconciliation_count = bank_reconciliations.len();
2992 self.check_resources_with_log("post-financial-reporting")?;
2993
2994 if !trial_balances.is_empty() {
2995 info!(
2996 "Period-close trial balances captured: {} periods",
2997 trial_balances.len()
2998 );
2999 }
3000
3001 Ok(FinancialReportingSnapshot {
3002 financial_statements,
3003 bank_reconciliations,
3004 trial_balances,
3005 })
3006 }
3007
3008 fn build_trial_balance_from_entries(
3014 journal_entries: &[JournalEntry],
3015 coa: &ChartOfAccounts,
3016 company_code: &str,
3017 fiscal_year: u16,
3018 fiscal_period: u8,
3019 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3020 use rust_decimal::Decimal;
3021
3022 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3024 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3025
3026 for je in journal_entries {
3027 if je.header.company_code != company_code
3029 || je.header.fiscal_year != fiscal_year
3030 || je.header.fiscal_period != fiscal_period
3031 {
3032 continue;
3033 }
3034
3035 for line in &je.lines {
3036 let acct = &line.gl_account;
3037 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3038 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3039 }
3040 }
3041
3042 let mut all_accounts: Vec<&String> = account_debits
3044 .keys()
3045 .chain(account_credits.keys())
3046 .collect::<std::collections::HashSet<_>>()
3047 .into_iter()
3048 .collect();
3049 all_accounts.sort();
3050
3051 let mut entries = Vec::new();
3052
3053 for acct_number in all_accounts {
3054 let debit = account_debits
3055 .get(acct_number)
3056 .copied()
3057 .unwrap_or(Decimal::ZERO);
3058 let credit = account_credits
3059 .get(acct_number)
3060 .copied()
3061 .unwrap_or(Decimal::ZERO);
3062
3063 if debit.is_zero() && credit.is_zero() {
3064 continue;
3065 }
3066
3067 let account_name = coa
3069 .get_account(acct_number)
3070 .map(|gl| gl.short_description.clone())
3071 .unwrap_or_else(|| format!("Account {}", acct_number));
3072
3073 let category = Self::category_from_account_code(acct_number);
3078
3079 entries.push(datasynth_generators::TrialBalanceEntry {
3080 account_code: acct_number.clone(),
3081 account_name,
3082 category,
3083 debit_balance: debit,
3084 credit_balance: credit,
3085 });
3086 }
3087
3088 entries
3089 }
3090
3091 fn build_cumulative_trial_balance(
3098 journal_entries: &[JournalEntry],
3099 coa: &ChartOfAccounts,
3100 company_code: &str,
3101 start_date: NaiveDate,
3102 period_end: NaiveDate,
3103 fiscal_year: u16,
3104 fiscal_period: u8,
3105 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3106 use rust_decimal::Decimal;
3107
3108 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3110 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3111
3112 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3114 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3115
3116 for je in journal_entries {
3117 if je.header.company_code != company_code {
3118 continue;
3119 }
3120
3121 for line in &je.lines {
3122 let acct = &line.gl_account;
3123 let category = Self::category_from_account_code(acct);
3124 let is_bs_account = matches!(
3125 category.as_str(),
3126 "Cash"
3127 | "Receivables"
3128 | "Inventory"
3129 | "FixedAssets"
3130 | "Payables"
3131 | "AccruedLiabilities"
3132 | "LongTermDebt"
3133 | "Equity"
3134 );
3135
3136 if is_bs_account {
3137 if je.header.document_date <= period_end
3139 && je.header.document_date >= start_date
3140 {
3141 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3142 line.debit_amount;
3143 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3144 line.credit_amount;
3145 }
3146 } else {
3147 if je.header.fiscal_year == fiscal_year
3149 && je.header.fiscal_period == fiscal_period
3150 {
3151 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3152 line.debit_amount;
3153 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3154 line.credit_amount;
3155 }
3156 }
3157 }
3158 }
3159
3160 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3162 all_accounts.extend(bs_debits.keys().cloned());
3163 all_accounts.extend(bs_credits.keys().cloned());
3164 all_accounts.extend(is_debits.keys().cloned());
3165 all_accounts.extend(is_credits.keys().cloned());
3166
3167 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3168 sorted_accounts.sort();
3169
3170 let mut entries = Vec::new();
3171
3172 for acct_number in &sorted_accounts {
3173 let category = Self::category_from_account_code(acct_number);
3174 let is_bs_account = matches!(
3175 category.as_str(),
3176 "Cash"
3177 | "Receivables"
3178 | "Inventory"
3179 | "FixedAssets"
3180 | "Payables"
3181 | "AccruedLiabilities"
3182 | "LongTermDebt"
3183 | "Equity"
3184 );
3185
3186 let (debit, credit) = if is_bs_account {
3187 (
3188 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3189 bs_credits
3190 .get(acct_number)
3191 .copied()
3192 .unwrap_or(Decimal::ZERO),
3193 )
3194 } else {
3195 (
3196 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3197 is_credits
3198 .get(acct_number)
3199 .copied()
3200 .unwrap_or(Decimal::ZERO),
3201 )
3202 };
3203
3204 if debit.is_zero() && credit.is_zero() {
3205 continue;
3206 }
3207
3208 let account_name = coa
3209 .get_account(acct_number)
3210 .map(|gl| gl.short_description.clone())
3211 .unwrap_or_else(|| format!("Account {}", acct_number));
3212
3213 entries.push(datasynth_generators::TrialBalanceEntry {
3214 account_code: acct_number.clone(),
3215 account_name,
3216 category,
3217 debit_balance: debit,
3218 credit_balance: credit,
3219 });
3220 }
3221
3222 entries
3223 }
3224
3225 fn build_cash_flow_from_trial_balances(
3230 current_tb: &[datasynth_generators::TrialBalanceEntry],
3231 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3232 net_income: rust_decimal::Decimal,
3233 ) -> Vec<CashFlowItem> {
3234 use rust_decimal::Decimal;
3235
3236 let aggregate =
3238 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3239 let mut map: HashMap<String, Decimal> = HashMap::new();
3240 for entry in tb {
3241 let net = entry.debit_balance - entry.credit_balance;
3242 *map.entry(entry.category.clone()).or_default() += net;
3243 }
3244 map
3245 };
3246
3247 let current = aggregate(current_tb);
3248 let prior = prior_tb.map(aggregate);
3249
3250 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3252 *map.get(key).unwrap_or(&Decimal::ZERO)
3253 };
3254
3255 let change = |key: &str| -> Decimal {
3257 let curr = get(¤t, key);
3258 match &prior {
3259 Some(p) => curr - get(p, key),
3260 None => curr,
3261 }
3262 };
3263
3264 let fixed_asset_change = change("FixedAssets");
3267 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3268 -fixed_asset_change
3269 } else {
3270 Decimal::ZERO
3271 };
3272
3273 let ar_change = change("Receivables");
3275 let inventory_change = change("Inventory");
3276 let ap_change = change("Payables");
3278 let accrued_change = change("AccruedLiabilities");
3279
3280 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3281 + (-ap_change)
3282 + (-accrued_change);
3283
3284 let capex = if fixed_asset_change > Decimal::ZERO {
3286 -fixed_asset_change
3287 } else {
3288 Decimal::ZERO
3289 };
3290 let investing_cf = capex;
3291
3292 let debt_change = -change("LongTermDebt");
3294 let equity_change = -change("Equity");
3295 let financing_cf = debt_change + equity_change;
3296
3297 let net_change = operating_cf + investing_cf + financing_cf;
3298
3299 vec![
3300 CashFlowItem {
3301 item_code: "CF-NI".to_string(),
3302 label: "Net Income".to_string(),
3303 category: CashFlowCategory::Operating,
3304 amount: net_income,
3305 amount_prior: None,
3306 sort_order: 1,
3307 is_total: false,
3308 },
3309 CashFlowItem {
3310 item_code: "CF-DEP".to_string(),
3311 label: "Depreciation & Amortization".to_string(),
3312 category: CashFlowCategory::Operating,
3313 amount: depreciation_addback,
3314 amount_prior: None,
3315 sort_order: 2,
3316 is_total: false,
3317 },
3318 CashFlowItem {
3319 item_code: "CF-AR".to_string(),
3320 label: "Change in Accounts Receivable".to_string(),
3321 category: CashFlowCategory::Operating,
3322 amount: -ar_change,
3323 amount_prior: None,
3324 sort_order: 3,
3325 is_total: false,
3326 },
3327 CashFlowItem {
3328 item_code: "CF-AP".to_string(),
3329 label: "Change in Accounts Payable".to_string(),
3330 category: CashFlowCategory::Operating,
3331 amount: -ap_change,
3332 amount_prior: None,
3333 sort_order: 4,
3334 is_total: false,
3335 },
3336 CashFlowItem {
3337 item_code: "CF-INV".to_string(),
3338 label: "Change in Inventory".to_string(),
3339 category: CashFlowCategory::Operating,
3340 amount: -inventory_change,
3341 amount_prior: None,
3342 sort_order: 5,
3343 is_total: false,
3344 },
3345 CashFlowItem {
3346 item_code: "CF-OP".to_string(),
3347 label: "Net Cash from Operating Activities".to_string(),
3348 category: CashFlowCategory::Operating,
3349 amount: operating_cf,
3350 amount_prior: None,
3351 sort_order: 6,
3352 is_total: true,
3353 },
3354 CashFlowItem {
3355 item_code: "CF-CAPEX".to_string(),
3356 label: "Capital Expenditures".to_string(),
3357 category: CashFlowCategory::Investing,
3358 amount: capex,
3359 amount_prior: None,
3360 sort_order: 7,
3361 is_total: false,
3362 },
3363 CashFlowItem {
3364 item_code: "CF-INV-T".to_string(),
3365 label: "Net Cash from Investing Activities".to_string(),
3366 category: CashFlowCategory::Investing,
3367 amount: investing_cf,
3368 amount_prior: None,
3369 sort_order: 8,
3370 is_total: true,
3371 },
3372 CashFlowItem {
3373 item_code: "CF-DEBT".to_string(),
3374 label: "Net Borrowings / (Repayments)".to_string(),
3375 category: CashFlowCategory::Financing,
3376 amount: debt_change,
3377 amount_prior: None,
3378 sort_order: 9,
3379 is_total: false,
3380 },
3381 CashFlowItem {
3382 item_code: "CF-EQ".to_string(),
3383 label: "Equity Changes".to_string(),
3384 category: CashFlowCategory::Financing,
3385 amount: equity_change,
3386 amount_prior: None,
3387 sort_order: 10,
3388 is_total: false,
3389 },
3390 CashFlowItem {
3391 item_code: "CF-FIN-T".to_string(),
3392 label: "Net Cash from Financing Activities".to_string(),
3393 category: CashFlowCategory::Financing,
3394 amount: financing_cf,
3395 amount_prior: None,
3396 sort_order: 11,
3397 is_total: true,
3398 },
3399 CashFlowItem {
3400 item_code: "CF-NET".to_string(),
3401 label: "Net Change in Cash".to_string(),
3402 category: CashFlowCategory::Operating,
3403 amount: net_change,
3404 amount_prior: None,
3405 sort_order: 12,
3406 is_total: true,
3407 },
3408 ]
3409 }
3410
3411 fn calculate_net_income_from_tb(
3415 tb: &[datasynth_generators::TrialBalanceEntry],
3416 ) -> rust_decimal::Decimal {
3417 use rust_decimal::Decimal;
3418
3419 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3420 for entry in tb {
3421 let net = entry.debit_balance - entry.credit_balance;
3422 *aggregated.entry(entry.category.clone()).or_default() += net;
3423 }
3424
3425 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3426 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3427 let opex = *aggregated
3428 .get("OperatingExpenses")
3429 .unwrap_or(&Decimal::ZERO);
3430 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3431 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3432
3433 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3436 let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3437 let tax = operating_income * tax_rate;
3438 operating_income - tax
3439 }
3440
3441 fn category_from_account_code(code: &str) -> String {
3448 let prefix: String = code.chars().take(2).collect();
3449 match prefix.as_str() {
3450 "10" => "Cash",
3451 "11" => "Receivables",
3452 "12" | "13" | "14" => "Inventory",
3453 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3454 "20" => "Payables",
3455 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3456 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3457 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3458 "40" | "41" | "42" | "43" | "44" => "Revenue",
3459 "50" | "51" | "52" => "CostOfSales",
3460 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3461 "OperatingExpenses"
3462 }
3463 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3464 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3465 _ => "OperatingExpenses",
3466 }
3467 .to_string()
3468 }
3469
3470 fn phase_hr_data(
3472 &mut self,
3473 stats: &mut EnhancedGenerationStatistics,
3474 ) -> SynthResult<HrSnapshot> {
3475 if !self.config.hr.enabled {
3476 debug!("Phase 16: Skipped (HR generation disabled)");
3477 return Ok(HrSnapshot::default());
3478 }
3479
3480 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3481
3482 let seed = self.seed;
3483 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3484 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3485 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3486 let company_code = self
3487 .config
3488 .companies
3489 .first()
3490 .map(|c| c.code.as_str())
3491 .unwrap_or("1000");
3492 let currency = self
3493 .config
3494 .companies
3495 .first()
3496 .map(|c| c.currency.as_str())
3497 .unwrap_or("USD");
3498
3499 let employee_ids: Vec<String> = self
3500 .master_data
3501 .employees
3502 .iter()
3503 .map(|e| e.employee_id.clone())
3504 .collect();
3505
3506 if employee_ids.is_empty() {
3507 debug!("Phase 16: Skipped (no employees available)");
3508 return Ok(HrSnapshot::default());
3509 }
3510
3511 let cost_center_ids: Vec<String> = self
3514 .master_data
3515 .employees
3516 .iter()
3517 .filter_map(|e| e.cost_center.clone())
3518 .collect::<std::collections::HashSet<_>>()
3519 .into_iter()
3520 .collect();
3521
3522 let mut snapshot = HrSnapshot::default();
3523
3524 if self.config.hr.payroll.enabled {
3526 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3527 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3528
3529 let payroll_pack = self.primary_pack();
3531
3532 payroll_gen.set_country_pack(payroll_pack.clone());
3535
3536 let employees_with_salary: Vec<(
3537 String,
3538 rust_decimal::Decimal,
3539 Option<String>,
3540 Option<String>,
3541 )> = self
3542 .master_data
3543 .employees
3544 .iter()
3545 .map(|e| {
3546 (
3547 e.employee_id.clone(),
3548 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3550 e.department_id.clone(),
3551 )
3552 })
3553 .collect();
3554
3555 for month in 0..self.config.global.period_months {
3556 let period_start = start_date + chrono::Months::new(month);
3557 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3558 let (run, items) = payroll_gen.generate(
3559 company_code,
3560 &employees_with_salary,
3561 period_start,
3562 period_end,
3563 currency,
3564 );
3565 snapshot.payroll_runs.push(run);
3566 snapshot.payroll_run_count += 1;
3567 snapshot.payroll_line_item_count += items.len();
3568 snapshot.payroll_line_items.extend(items);
3569 }
3570 }
3571
3572 if self.config.hr.time_attendance.enabled {
3574 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3575 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3576 let entries = time_gen.generate(
3577 &employee_ids,
3578 start_date,
3579 end_date,
3580 &self.config.hr.time_attendance,
3581 );
3582 snapshot.time_entry_count = entries.len();
3583 snapshot.time_entries = entries;
3584 }
3585
3586 if self.config.hr.expenses.enabled {
3588 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3589 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3590 let company_currency = self
3591 .config
3592 .companies
3593 .first()
3594 .map(|c| c.currency.as_str())
3595 .unwrap_or("USD");
3596 let reports = expense_gen.generate_with_currency(
3597 &employee_ids,
3598 start_date,
3599 end_date,
3600 &self.config.hr.expenses,
3601 company_currency,
3602 );
3603 snapshot.expense_report_count = reports.len();
3604 snapshot.expense_reports = reports;
3605 }
3606
3607 stats.payroll_run_count = snapshot.payroll_run_count;
3608 stats.time_entry_count = snapshot.time_entry_count;
3609 stats.expense_report_count = snapshot.expense_report_count;
3610
3611 info!(
3612 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3613 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3614 snapshot.time_entry_count, snapshot.expense_report_count
3615 );
3616 self.check_resources_with_log("post-hr")?;
3617
3618 Ok(snapshot)
3619 }
3620
3621 fn phase_accounting_standards(
3623 &mut self,
3624 stats: &mut EnhancedGenerationStatistics,
3625 ) -> SynthResult<AccountingStandardsSnapshot> {
3626 if !self.phase_config.generate_accounting_standards
3627 || !self.config.accounting_standards.enabled
3628 {
3629 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3630 return Ok(AccountingStandardsSnapshot::default());
3631 }
3632 info!("Phase 17: Generating Accounting Standards Data");
3633
3634 let seed = self.seed;
3635 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3636 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3637 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3638 let company_code = self
3639 .config
3640 .companies
3641 .first()
3642 .map(|c| c.code.as_str())
3643 .unwrap_or("1000");
3644 let currency = self
3645 .config
3646 .companies
3647 .first()
3648 .map(|c| c.currency.as_str())
3649 .unwrap_or("USD");
3650
3651 let framework = match self.config.accounting_standards.framework {
3656 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3657 datasynth_standards::framework::AccountingFramework::UsGaap
3658 }
3659 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3660 datasynth_standards::framework::AccountingFramework::Ifrs
3661 }
3662 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3663 datasynth_standards::framework::AccountingFramework::DualReporting
3664 }
3665 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3666 datasynth_standards::framework::AccountingFramework::FrenchGaap
3667 }
3668 None => {
3669 let pack = self.primary_pack();
3671 let pack_fw = pack.accounting.framework.as_str();
3672 match pack_fw {
3673 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3674 "dual_reporting" => {
3675 datasynth_standards::framework::AccountingFramework::DualReporting
3676 }
3677 "french_gaap" => {
3678 datasynth_standards::framework::AccountingFramework::FrenchGaap
3679 }
3680 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3682 }
3683 }
3684 };
3685
3686 let mut snapshot = AccountingStandardsSnapshot::default();
3687
3688 if self.config.accounting_standards.revenue_recognition.enabled {
3690 let customer_ids: Vec<String> = self
3691 .master_data
3692 .customers
3693 .iter()
3694 .map(|c| c.customer_id.clone())
3695 .collect();
3696
3697 if !customer_ids.is_empty() {
3698 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3699 let contracts = rev_gen.generate(
3700 company_code,
3701 &customer_ids,
3702 start_date,
3703 end_date,
3704 currency,
3705 &self.config.accounting_standards.revenue_recognition,
3706 framework,
3707 );
3708 snapshot.revenue_contract_count = contracts.len();
3709 snapshot.contracts = contracts;
3710 }
3711 }
3712
3713 if self.config.accounting_standards.impairment.enabled {
3715 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3716 .master_data
3717 .assets
3718 .iter()
3719 .map(|a| {
3720 (
3721 a.asset_id.clone(),
3722 a.description.clone(),
3723 a.acquisition_cost,
3724 )
3725 })
3726 .collect();
3727
3728 if !asset_data.is_empty() {
3729 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3730 let tests = imp_gen.generate(
3731 company_code,
3732 &asset_data,
3733 end_date,
3734 &self.config.accounting_standards.impairment,
3735 framework,
3736 );
3737 snapshot.impairment_test_count = tests.len();
3738 snapshot.impairment_tests = tests;
3739 }
3740 }
3741
3742 stats.revenue_contract_count = snapshot.revenue_contract_count;
3743 stats.impairment_test_count = snapshot.impairment_test_count;
3744
3745 info!(
3746 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3747 snapshot.revenue_contract_count, snapshot.impairment_test_count
3748 );
3749 self.check_resources_with_log("post-accounting-standards")?;
3750
3751 Ok(snapshot)
3752 }
3753
3754 fn phase_manufacturing(
3756 &mut self,
3757 stats: &mut EnhancedGenerationStatistics,
3758 ) -> SynthResult<ManufacturingSnapshot> {
3759 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3760 debug!("Phase 18: Skipped (manufacturing generation disabled)");
3761 return Ok(ManufacturingSnapshot::default());
3762 }
3763 info!("Phase 18: Generating Manufacturing Data");
3764
3765 let seed = self.seed;
3766 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3767 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3768 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3769 let company_code = self
3770 .config
3771 .companies
3772 .first()
3773 .map(|c| c.code.as_str())
3774 .unwrap_or("1000");
3775
3776 let material_data: Vec<(String, String)> = self
3777 .master_data
3778 .materials
3779 .iter()
3780 .map(|m| (m.material_id.clone(), m.description.clone()))
3781 .collect();
3782
3783 if material_data.is_empty() {
3784 debug!("Phase 18: Skipped (no materials available)");
3785 return Ok(ManufacturingSnapshot::default());
3786 }
3787
3788 let mut snapshot = ManufacturingSnapshot::default();
3789
3790 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3792 let production_orders = prod_gen.generate(
3793 company_code,
3794 &material_data,
3795 start_date,
3796 end_date,
3797 &self.config.manufacturing.production_orders,
3798 &self.config.manufacturing.costing,
3799 &self.config.manufacturing.routing,
3800 );
3801 snapshot.production_order_count = production_orders.len();
3802
3803 let inspection_data: Vec<(String, String, String)> = production_orders
3805 .iter()
3806 .map(|po| {
3807 (
3808 po.order_id.clone(),
3809 po.material_id.clone(),
3810 po.material_description.clone(),
3811 )
3812 })
3813 .collect();
3814
3815 snapshot.production_orders = production_orders;
3816
3817 if !inspection_data.is_empty() {
3818 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3819 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3820 snapshot.quality_inspection_count = inspections.len();
3821 snapshot.quality_inspections = inspections;
3822 }
3823
3824 let storage_locations: Vec<(String, String)> = material_data
3826 .iter()
3827 .enumerate()
3828 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3829 .collect();
3830
3831 let employee_ids: Vec<String> = self
3832 .master_data
3833 .employees
3834 .iter()
3835 .map(|e| e.employee_id.clone())
3836 .collect();
3837 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3838 .with_employee_pool(employee_ids);
3839 let mut cycle_count_total = 0usize;
3840 for month in 0..self.config.global.period_months {
3841 let count_date = start_date + chrono::Months::new(month);
3842 let items_per_count = storage_locations.len().clamp(10, 50);
3843 let cc = cc_gen.generate(
3844 company_code,
3845 &storage_locations,
3846 count_date,
3847 items_per_count,
3848 );
3849 snapshot.cycle_counts.push(cc);
3850 cycle_count_total += 1;
3851 }
3852 snapshot.cycle_count_count = cycle_count_total;
3853
3854 stats.production_order_count = snapshot.production_order_count;
3855 stats.quality_inspection_count = snapshot.quality_inspection_count;
3856 stats.cycle_count_count = snapshot.cycle_count_count;
3857
3858 info!(
3859 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3860 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3861 );
3862 self.check_resources_with_log("post-manufacturing")?;
3863
3864 Ok(snapshot)
3865 }
3866
3867 fn phase_sales_kpi_budgets(
3869 &mut self,
3870 coa: &Arc<ChartOfAccounts>,
3871 financial_reporting: &FinancialReportingSnapshot,
3872 stats: &mut EnhancedGenerationStatistics,
3873 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3874 if !self.phase_config.generate_sales_kpi_budgets {
3875 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3876 return Ok(SalesKpiBudgetsSnapshot::default());
3877 }
3878 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3879
3880 let seed = self.seed;
3881 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3882 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3883 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3884 let company_code = self
3885 .config
3886 .companies
3887 .first()
3888 .map(|c| c.code.as_str())
3889 .unwrap_or("1000");
3890
3891 let mut snapshot = SalesKpiBudgetsSnapshot::default();
3892
3893 if self.config.sales_quotes.enabled {
3895 let customer_data: Vec<(String, String)> = self
3896 .master_data
3897 .customers
3898 .iter()
3899 .map(|c| (c.customer_id.clone(), c.name.clone()))
3900 .collect();
3901 let material_data: Vec<(String, String)> = self
3902 .master_data
3903 .materials
3904 .iter()
3905 .map(|m| (m.material_id.clone(), m.description.clone()))
3906 .collect();
3907
3908 if !customer_data.is_empty() && !material_data.is_empty() {
3909 let employee_ids: Vec<String> = self
3910 .master_data
3911 .employees
3912 .iter()
3913 .map(|e| e.employee_id.clone())
3914 .collect();
3915 let customer_ids: Vec<String> = self
3916 .master_data
3917 .customers
3918 .iter()
3919 .map(|c| c.customer_id.clone())
3920 .collect();
3921 let company_currency = self
3922 .config
3923 .companies
3924 .first()
3925 .map(|c| c.currency.as_str())
3926 .unwrap_or("USD");
3927
3928 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3929 .with_pools(employee_ids, customer_ids);
3930 let quotes = quote_gen.generate_with_currency(
3931 company_code,
3932 &customer_data,
3933 &material_data,
3934 start_date,
3935 end_date,
3936 &self.config.sales_quotes,
3937 company_currency,
3938 );
3939 snapshot.sales_quote_count = quotes.len();
3940 snapshot.sales_quotes = quotes;
3941 }
3942 }
3943
3944 if self.config.financial_reporting.management_kpis.enabled {
3946 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3947 let mut kpis = kpi_gen.generate(
3948 company_code,
3949 start_date,
3950 end_date,
3951 &self.config.financial_reporting.management_kpis,
3952 );
3953
3954 {
3956 use rust_decimal::Decimal;
3957
3958 if let Some(income_stmt) =
3959 financial_reporting.financial_statements.iter().find(|fs| {
3960 fs.statement_type == StatementType::IncomeStatement
3961 && fs.company_code == company_code
3962 })
3963 {
3964 let total_revenue: Decimal = income_stmt
3966 .line_items
3967 .iter()
3968 .filter(|li| li.section.contains("Revenue") && !li.is_total)
3969 .map(|li| li.amount)
3970 .sum();
3971 let total_cogs: Decimal = income_stmt
3972 .line_items
3973 .iter()
3974 .filter(|li| {
3975 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
3976 && !li.is_total
3977 })
3978 .map(|li| li.amount.abs())
3979 .sum();
3980 let total_opex: Decimal = income_stmt
3981 .line_items
3982 .iter()
3983 .filter(|li| {
3984 li.section.contains("Expense")
3985 && !li.is_total
3986 && !li.section.contains("Cost")
3987 })
3988 .map(|li| li.amount.abs())
3989 .sum();
3990
3991 if total_revenue > Decimal::ZERO {
3992 let hundred = Decimal::from(100);
3993 let gross_margin_pct =
3994 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
3995 let operating_income = total_revenue - total_cogs - total_opex;
3996 let op_margin_pct =
3997 (operating_income * hundred / total_revenue).round_dp(2);
3998
3999 for kpi in &mut kpis {
4001 if kpi.name == "Gross Margin" {
4002 kpi.value = gross_margin_pct;
4003 } else if kpi.name == "Operating Margin" {
4004 kpi.value = op_margin_pct;
4005 }
4006 }
4007 }
4008 }
4009
4010 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4012 fs.statement_type == StatementType::BalanceSheet
4013 && fs.company_code == company_code
4014 }) {
4015 let current_assets: Decimal = bs
4016 .line_items
4017 .iter()
4018 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4019 .map(|li| li.amount)
4020 .sum();
4021 let current_liabilities: Decimal = bs
4022 .line_items
4023 .iter()
4024 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4025 .map(|li| li.amount.abs())
4026 .sum();
4027
4028 if current_liabilities > Decimal::ZERO {
4029 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4030 for kpi in &mut kpis {
4031 if kpi.name == "Current Ratio" {
4032 kpi.value = current_ratio;
4033 }
4034 }
4035 }
4036 }
4037 }
4038
4039 snapshot.kpi_count = kpis.len();
4040 snapshot.kpis = kpis;
4041 }
4042
4043 if self.config.financial_reporting.budgets.enabled {
4045 let account_data: Vec<(String, String)> = coa
4046 .accounts
4047 .iter()
4048 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4049 .collect();
4050
4051 if !account_data.is_empty() {
4052 let fiscal_year = start_date.year() as u32;
4053 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4054 let budget = budget_gen.generate(
4055 company_code,
4056 fiscal_year,
4057 &account_data,
4058 &self.config.financial_reporting.budgets,
4059 );
4060 snapshot.budget_line_count = budget.line_items.len();
4061 snapshot.budgets.push(budget);
4062 }
4063 }
4064
4065 stats.sales_quote_count = snapshot.sales_quote_count;
4066 stats.kpi_count = snapshot.kpi_count;
4067 stats.budget_line_count = snapshot.budget_line_count;
4068
4069 info!(
4070 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4071 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4072 );
4073 self.check_resources_with_log("post-sales-kpi-budgets")?;
4074
4075 Ok(snapshot)
4076 }
4077
4078 fn phase_tax_generation(
4080 &mut self,
4081 document_flows: &DocumentFlowSnapshot,
4082 stats: &mut EnhancedGenerationStatistics,
4083 ) -> SynthResult<TaxSnapshot> {
4084 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4085 debug!("Phase 20: Skipped (tax generation disabled)");
4086 return Ok(TaxSnapshot::default());
4087 }
4088 info!("Phase 20: Generating Tax Data");
4089
4090 let seed = self.seed;
4091 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4092 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4093 let fiscal_year = start_date.year();
4094 let company_code = self
4095 .config
4096 .companies
4097 .first()
4098 .map(|c| c.code.as_str())
4099 .unwrap_or("1000");
4100
4101 let mut gen =
4102 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4103
4104 let pack = self.primary_pack().clone();
4105 let (jurisdictions, codes) =
4106 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4107
4108 let mut provisions = Vec::new();
4110 if self.config.tax.provisions.enabled {
4111 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4112 for company in &self.config.companies {
4113 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4114 let statutory_rate = rust_decimal::Decimal::new(
4115 (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4116 2,
4117 );
4118 let provision = provision_gen.generate(
4119 &company.code,
4120 start_date,
4121 pre_tax_income,
4122 statutory_rate,
4123 );
4124 provisions.push(provision);
4125 }
4126 }
4127
4128 let mut tax_lines = Vec::new();
4130 if !codes.is_empty() {
4131 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4132 datasynth_generators::TaxLineGeneratorConfig::default(),
4133 codes.clone(),
4134 seed + 72,
4135 );
4136
4137 let buyer_country = self
4140 .config
4141 .companies
4142 .first()
4143 .map(|c| c.country.as_str())
4144 .unwrap_or("US");
4145 for vi in &document_flows.vendor_invoices {
4146 let lines = tax_line_gen.generate_for_document(
4147 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4148 &vi.header.document_id,
4149 buyer_country, buyer_country,
4151 vi.payable_amount,
4152 vi.header.document_date,
4153 None,
4154 );
4155 tax_lines.extend(lines);
4156 }
4157
4158 for ci in &document_flows.customer_invoices {
4160 let lines = tax_line_gen.generate_for_document(
4161 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4162 &ci.header.document_id,
4163 buyer_country, buyer_country,
4165 ci.total_gross_amount,
4166 ci.header.document_date,
4167 None,
4168 );
4169 tax_lines.extend(lines);
4170 }
4171 }
4172
4173 let snapshot = TaxSnapshot {
4174 jurisdiction_count: jurisdictions.len(),
4175 code_count: codes.len(),
4176 jurisdictions,
4177 codes,
4178 tax_provisions: provisions,
4179 tax_lines,
4180 tax_returns: Vec::new(),
4181 withholding_records: Vec::new(),
4182 tax_anomaly_labels: Vec::new(),
4183 };
4184
4185 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4186 stats.tax_code_count = snapshot.code_count;
4187 stats.tax_provision_count = snapshot.tax_provisions.len();
4188 stats.tax_line_count = snapshot.tax_lines.len();
4189
4190 info!(
4191 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4192 snapshot.jurisdiction_count,
4193 snapshot.code_count,
4194 snapshot.tax_provisions.len()
4195 );
4196 self.check_resources_with_log("post-tax")?;
4197
4198 Ok(snapshot)
4199 }
4200
4201 fn phase_esg_generation(
4203 &mut self,
4204 document_flows: &DocumentFlowSnapshot,
4205 stats: &mut EnhancedGenerationStatistics,
4206 ) -> SynthResult<EsgSnapshot> {
4207 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4208 debug!("Phase 21: Skipped (ESG generation disabled)");
4209 return Ok(EsgSnapshot::default());
4210 }
4211 info!("Phase 21: Generating ESG Data");
4212
4213 let seed = self.seed;
4214 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4215 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4216 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4217 let entity_id = self
4218 .config
4219 .companies
4220 .first()
4221 .map(|c| c.code.as_str())
4222 .unwrap_or("1000");
4223
4224 let esg_cfg = &self.config.esg;
4225 let mut snapshot = EsgSnapshot::default();
4226
4227 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4229 esg_cfg.environmental.energy.clone(),
4230 seed + 80,
4231 );
4232 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4233
4234 let facility_count = esg_cfg.environmental.energy.facility_count;
4236 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4237 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4238
4239 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4241 seed + 82,
4242 esg_cfg.environmental.waste.diversion_target,
4243 facility_count,
4244 );
4245 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4246
4247 let mut emission_gen =
4249 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4250
4251 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4253 .iter()
4254 .map(|e| datasynth_generators::EnergyInput {
4255 facility_id: e.facility_id.clone(),
4256 energy_type: match e.energy_source {
4257 EnergySourceType::NaturalGas => {
4258 datasynth_generators::EnergyInputType::NaturalGas
4259 }
4260 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4261 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4262 _ => datasynth_generators::EnergyInputType::Electricity,
4263 },
4264 consumption_kwh: e.consumption_kwh,
4265 period: e.period,
4266 })
4267 .collect();
4268
4269 let mut emissions = Vec::new();
4270 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4271 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4272
4273 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4275 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4276 for payment in &document_flows.payments {
4277 if payment.is_vendor {
4278 *totals
4279 .entry(payment.business_partner_id.clone())
4280 .or_default() += payment.amount;
4281 }
4282 }
4283 totals
4284 };
4285 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4286 .master_data
4287 .vendors
4288 .iter()
4289 .map(|v| {
4290 let spend = vendor_payment_totals
4291 .get(&v.vendor_id)
4292 .copied()
4293 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4294 datasynth_generators::VendorSpendInput {
4295 vendor_id: v.vendor_id.clone(),
4296 category: format!("{:?}", v.vendor_type).to_lowercase(),
4297 spend,
4298 country: v.country.clone(),
4299 }
4300 })
4301 .collect();
4302 if !vendor_spend.is_empty() {
4303 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4304 entity_id,
4305 &vendor_spend,
4306 start_date,
4307 end_date,
4308 ));
4309 }
4310
4311 let headcount = self.master_data.employees.len() as u32;
4313 if headcount > 0 {
4314 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4315 emissions.extend(emission_gen.generate_scope3_business_travel(
4316 entity_id,
4317 travel_spend,
4318 start_date,
4319 ));
4320 emissions
4321 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4322 }
4323
4324 snapshot.emission_count = emissions.len();
4325 snapshot.emissions = emissions;
4326 snapshot.energy = energy_records;
4327
4328 let mut workforce_gen =
4330 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4331 let total_headcount = headcount.max(100);
4332 snapshot.diversity =
4333 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4334 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4335 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4336 entity_id,
4337 facility_count,
4338 start_date,
4339 end_date,
4340 );
4341
4342 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4345 entity_id,
4346 &snapshot.safety_incidents,
4347 total_hours,
4348 start_date,
4349 );
4350 snapshot.safety_metrics = vec![safety_metric];
4351
4352 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4354 seed + 85,
4355 esg_cfg.governance.board_size,
4356 esg_cfg.governance.independence_target,
4357 );
4358 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4359
4360 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4362 esg_cfg.supply_chain_esg.clone(),
4363 seed + 86,
4364 );
4365 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4366 .master_data
4367 .vendors
4368 .iter()
4369 .map(|v| datasynth_generators::VendorInput {
4370 vendor_id: v.vendor_id.clone(),
4371 country: v.country.clone(),
4372 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4373 quality_score: None,
4374 })
4375 .collect();
4376 snapshot.supplier_assessments =
4377 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4378
4379 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4381 seed + 87,
4382 esg_cfg.reporting.clone(),
4383 esg_cfg.climate_scenarios.clone(),
4384 );
4385 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4386 snapshot.disclosures = disclosure_gen.generate_disclosures(
4387 entity_id,
4388 &snapshot.materiality,
4389 start_date,
4390 end_date,
4391 );
4392 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4393 snapshot.disclosure_count = snapshot.disclosures.len();
4394
4395 if esg_cfg.anomaly_rate > 0.0 {
4397 let mut anomaly_injector =
4398 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4399 let mut labels = Vec::new();
4400 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4401 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4402 labels.extend(
4403 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4404 );
4405 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4406 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4407 snapshot.anomaly_labels = labels;
4408 }
4409
4410 stats.esg_emission_count = snapshot.emission_count;
4411 stats.esg_disclosure_count = snapshot.disclosure_count;
4412
4413 info!(
4414 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4415 snapshot.emission_count,
4416 snapshot.disclosure_count,
4417 snapshot.supplier_assessments.len()
4418 );
4419 self.check_resources_with_log("post-esg")?;
4420
4421 Ok(snapshot)
4422 }
4423
4424 fn phase_treasury_data(
4426 &mut self,
4427 document_flows: &DocumentFlowSnapshot,
4428 stats: &mut EnhancedGenerationStatistics,
4429 ) -> SynthResult<TreasurySnapshot> {
4430 if !self.config.treasury.enabled {
4431 debug!("Phase 22: Skipped (treasury generation disabled)");
4432 return Ok(TreasurySnapshot::default());
4433 }
4434 info!("Phase 22: Generating Treasury Data");
4435
4436 let seed = self.seed;
4437 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4438 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4439 let currency = self
4440 .config
4441 .companies
4442 .first()
4443 .map(|c| c.currency.as_str())
4444 .unwrap_or("USD");
4445 let entity_id = self
4446 .config
4447 .companies
4448 .first()
4449 .map(|c| c.code.as_str())
4450 .unwrap_or("1000");
4451
4452 let mut snapshot = TreasurySnapshot::default();
4453
4454 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4456 self.config.treasury.debt.clone(),
4457 seed + 90,
4458 );
4459 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4460
4461 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4463 self.config.treasury.hedging.clone(),
4464 seed + 91,
4465 );
4466 for debt in &snapshot.debt_instruments {
4467 if debt.rate_type == InterestRateType::Variable {
4468 let swap = hedge_gen.generate_ir_swap(
4469 currency,
4470 debt.principal,
4471 debt.origination_date,
4472 debt.maturity_date,
4473 );
4474 snapshot.hedging_instruments.push(swap);
4475 }
4476 }
4477
4478 {
4481 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4482 for payment in &document_flows.payments {
4483 if payment.currency != currency {
4484 let entry = fx_map
4485 .entry(payment.currency.clone())
4486 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4487 entry.0 += payment.amount;
4488 if payment.header.document_date > entry.1 {
4490 entry.1 = payment.header.document_date;
4491 }
4492 }
4493 }
4494 if !fx_map.is_empty() {
4495 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4496 .into_iter()
4497 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4498 datasynth_generators::treasury::FxExposure {
4499 currency_pair: format!("{}/{}", foreign_ccy, currency),
4500 foreign_currency: foreign_ccy,
4501 net_amount,
4502 settlement_date,
4503 description: "AP payment FX exposure".to_string(),
4504 }
4505 })
4506 .collect();
4507 let (fx_instruments, fx_relationships) =
4508 hedge_gen.generate(start_date, &fx_exposures);
4509 snapshot.hedging_instruments.extend(fx_instruments);
4510 snapshot.hedge_relationships.extend(fx_relationships);
4511 }
4512 }
4513
4514 if self.config.treasury.anomaly_rate > 0.0 {
4516 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4517 seed + 92,
4518 self.config.treasury.anomaly_rate,
4519 );
4520 let mut labels = Vec::new();
4521 labels.extend(
4522 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4523 );
4524 snapshot.treasury_anomaly_labels = labels;
4525 }
4526
4527 if self.config.treasury.cash_positioning.enabled {
4529 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4530
4531 for payment in &document_flows.payments {
4533 cash_flows.push(datasynth_generators::treasury::CashFlow {
4534 date: payment.header.document_date,
4535 account_id: format!("{}-MAIN", entity_id),
4536 amount: payment.amount,
4537 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4538 });
4539 }
4540
4541 for chain in &document_flows.o2c_chains {
4543 if let Some(ref receipt) = chain.customer_receipt {
4544 cash_flows.push(datasynth_generators::treasury::CashFlow {
4545 date: receipt.header.document_date,
4546 account_id: format!("{}-MAIN", entity_id),
4547 amount: receipt.amount,
4548 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4549 });
4550 }
4551 }
4552
4553 if !cash_flows.is_empty() {
4554 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4555 self.config.treasury.cash_positioning.clone(),
4556 seed + 93,
4557 );
4558 let account_id = format!("{}-MAIN", entity_id);
4559 snapshot.cash_positions = cash_gen.generate(
4560 entity_id,
4561 &account_id,
4562 currency,
4563 &cash_flows,
4564 start_date,
4565 start_date + chrono::Months::new(self.config.global.period_months),
4566 rust_decimal::Decimal::new(1_000_000, 0), );
4568 }
4569 }
4570
4571 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4572 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4573 stats.cash_position_count = snapshot.cash_positions.len();
4574
4575 info!(
4576 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4577 snapshot.debt_instruments.len(),
4578 snapshot.hedging_instruments.len(),
4579 snapshot.cash_positions.len()
4580 );
4581 self.check_resources_with_log("post-treasury")?;
4582
4583 Ok(snapshot)
4584 }
4585
4586 fn phase_project_accounting(
4588 &mut self,
4589 document_flows: &DocumentFlowSnapshot,
4590 hr: &HrSnapshot,
4591 stats: &mut EnhancedGenerationStatistics,
4592 ) -> SynthResult<ProjectAccountingSnapshot> {
4593 if !self.config.project_accounting.enabled {
4594 debug!("Phase 23: Skipped (project accounting disabled)");
4595 return Ok(ProjectAccountingSnapshot::default());
4596 }
4597 info!("Phase 23: Generating Project Accounting Data");
4598
4599 let seed = self.seed;
4600 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4601 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4602 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4603 let company_code = self
4604 .config
4605 .companies
4606 .first()
4607 .map(|c| c.code.as_str())
4608 .unwrap_or("1000");
4609
4610 let mut snapshot = ProjectAccountingSnapshot::default();
4611
4612 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4614 self.config.project_accounting.clone(),
4615 seed + 95,
4616 );
4617 let pool = project_gen.generate(company_code, start_date, end_date);
4618 snapshot.projects = pool.projects.clone();
4619
4620 {
4622 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4623 Vec::new();
4624
4625 for te in &hr.time_entries {
4627 let total_hours = te.hours_regular + te.hours_overtime;
4628 if total_hours > 0.0 {
4629 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4630 id: te.entry_id.clone(),
4631 entity_id: company_code.to_string(),
4632 date: te.date,
4633 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4634 .unwrap_or(rust_decimal::Decimal::ZERO),
4635 source_type: CostSourceType::TimeEntry,
4636 hours: Some(
4637 rust_decimal::Decimal::from_f64_retain(total_hours)
4638 .unwrap_or(rust_decimal::Decimal::ZERO),
4639 ),
4640 });
4641 }
4642 }
4643
4644 for er in &hr.expense_reports {
4646 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4647 id: er.report_id.clone(),
4648 entity_id: company_code.to_string(),
4649 date: er.submission_date,
4650 amount: er.total_amount,
4651 source_type: CostSourceType::ExpenseReport,
4652 hours: None,
4653 });
4654 }
4655
4656 for po in &document_flows.purchase_orders {
4658 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4659 id: po.header.document_id.clone(),
4660 entity_id: company_code.to_string(),
4661 date: po.header.document_date,
4662 amount: po.total_net_amount,
4663 source_type: CostSourceType::PurchaseOrder,
4664 hours: None,
4665 });
4666 }
4667
4668 for vi in &document_flows.vendor_invoices {
4670 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4671 id: vi.header.document_id.clone(),
4672 entity_id: company_code.to_string(),
4673 date: vi.header.document_date,
4674 amount: vi.payable_amount,
4675 source_type: CostSourceType::VendorInvoice,
4676 hours: None,
4677 });
4678 }
4679
4680 if !source_docs.is_empty() && !pool.projects.is_empty() {
4681 let mut cost_gen =
4682 datasynth_generators::project_accounting::ProjectCostGenerator::new(
4683 self.config.project_accounting.cost_allocation.clone(),
4684 seed + 99,
4685 );
4686 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4687 }
4688 }
4689
4690 if self.config.project_accounting.change_orders.enabled {
4692 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4693 self.config.project_accounting.change_orders.clone(),
4694 seed + 96,
4695 );
4696 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4697 }
4698
4699 if self.config.project_accounting.milestones.enabled {
4701 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4702 self.config.project_accounting.milestones.clone(),
4703 seed + 97,
4704 );
4705 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4706 }
4707
4708 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4710 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4711 self.config.project_accounting.earned_value.clone(),
4712 seed + 98,
4713 );
4714 snapshot.earned_value_metrics =
4715 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4716 }
4717
4718 stats.project_count = snapshot.projects.len();
4719 stats.project_change_order_count = snapshot.change_orders.len();
4720 stats.project_cost_line_count = snapshot.cost_lines.len();
4721
4722 info!(
4723 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4724 snapshot.projects.len(),
4725 snapshot.change_orders.len(),
4726 snapshot.milestones.len(),
4727 snapshot.earned_value_metrics.len()
4728 );
4729 self.check_resources_with_log("post-project-accounting")?;
4730
4731 Ok(snapshot)
4732 }
4733
4734 fn phase_opening_balances(
4736 &mut self,
4737 coa: &Arc<ChartOfAccounts>,
4738 stats: &mut EnhancedGenerationStatistics,
4739 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4740 if !self.config.balance.generate_opening_balances {
4741 debug!("Phase 3b: Skipped (opening balance generation disabled)");
4742 return Ok(Vec::new());
4743 }
4744 info!("Phase 3b: Generating Opening Balances");
4745
4746 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4747 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4748 let fiscal_year = start_date.year();
4749
4750 let industry = match self.config.global.industry {
4751 IndustrySector::Manufacturing => IndustryType::Manufacturing,
4752 IndustrySector::Retail => IndustryType::Retail,
4753 IndustrySector::FinancialServices => IndustryType::Financial,
4754 IndustrySector::Healthcare => IndustryType::Healthcare,
4755 IndustrySector::Technology => IndustryType::Technology,
4756 _ => IndustryType::Manufacturing,
4757 };
4758
4759 let config = datasynth_generators::OpeningBalanceConfig {
4760 industry,
4761 ..Default::default()
4762 };
4763 let mut gen =
4764 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4765
4766 let mut results = Vec::new();
4767 for company in &self.config.companies {
4768 let spec = OpeningBalanceSpec::new(
4769 company.code.clone(),
4770 start_date,
4771 fiscal_year,
4772 company.currency.clone(),
4773 rust_decimal::Decimal::new(10_000_000, 0),
4774 industry,
4775 );
4776 let ob = gen.generate(&spec, coa, start_date, &company.code);
4777 results.push(ob);
4778 }
4779
4780 stats.opening_balance_count = results.len();
4781 info!("Opening balances generated: {} companies", results.len());
4782 self.check_resources_with_log("post-opening-balances")?;
4783
4784 Ok(results)
4785 }
4786
4787 fn phase_subledger_reconciliation(
4789 &mut self,
4790 subledger: &SubledgerSnapshot,
4791 entries: &[JournalEntry],
4792 stats: &mut EnhancedGenerationStatistics,
4793 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4794 if !self.config.balance.reconcile_subledgers {
4795 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4796 return Ok(Vec::new());
4797 }
4798 info!("Phase 9b: Reconciling GL to subledger balances");
4799
4800 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4801 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4802 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4803
4804 let tracker_config = BalanceTrackerConfig {
4806 validate_on_each_entry: false,
4807 track_history: false,
4808 fail_on_validation_error: false,
4809 ..Default::default()
4810 };
4811 let recon_currency = self
4812 .config
4813 .companies
4814 .first()
4815 .map(|c| c.currency.clone())
4816 .unwrap_or_else(|| "USD".to_string());
4817 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4818 let _ = tracker.apply_entries(entries);
4819
4820 let mut engine = datasynth_generators::ReconciliationEngine::new(
4821 datasynth_generators::ReconciliationConfig::default(),
4822 );
4823
4824 let mut results = Vec::new();
4825 let company_code = self
4826 .config
4827 .companies
4828 .first()
4829 .map(|c| c.code.as_str())
4830 .unwrap_or("1000");
4831
4832 if !subledger.ar_invoices.is_empty() {
4834 let gl_balance = tracker
4835 .get_account_balance(
4836 company_code,
4837 datasynth_core::accounts::control_accounts::AR_CONTROL,
4838 )
4839 .map(|b| b.closing_balance)
4840 .unwrap_or_default();
4841 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4842 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4843 }
4844
4845 if !subledger.ap_invoices.is_empty() {
4847 let gl_balance = tracker
4848 .get_account_balance(
4849 company_code,
4850 datasynth_core::accounts::control_accounts::AP_CONTROL,
4851 )
4852 .map(|b| b.closing_balance)
4853 .unwrap_or_default();
4854 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4855 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4856 }
4857
4858 if !subledger.fa_records.is_empty() {
4860 let gl_asset_balance = tracker
4861 .get_account_balance(
4862 company_code,
4863 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4864 )
4865 .map(|b| b.closing_balance)
4866 .unwrap_or_default();
4867 let gl_accum_depr_balance = tracker
4868 .get_account_balance(
4869 company_code,
4870 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4871 )
4872 .map(|b| b.closing_balance)
4873 .unwrap_or_default();
4874 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4875 subledger.fa_records.iter().collect();
4876 let (asset_recon, depr_recon) = engine.reconcile_fa(
4877 company_code,
4878 end_date,
4879 gl_asset_balance,
4880 gl_accum_depr_balance,
4881 &fa_refs,
4882 );
4883 results.push(asset_recon);
4884 results.push(depr_recon);
4885 }
4886
4887 if !subledger.inventory_positions.is_empty() {
4889 let gl_balance = tracker
4890 .get_account_balance(
4891 company_code,
4892 datasynth_core::accounts::control_accounts::INVENTORY,
4893 )
4894 .map(|b| b.closing_balance)
4895 .unwrap_or_default();
4896 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4897 subledger.inventory_positions.iter().collect();
4898 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4899 }
4900
4901 stats.subledger_reconciliation_count = results.len();
4902 info!(
4903 "Subledger reconciliation complete: {} reconciliations",
4904 results.len()
4905 );
4906 self.check_resources_with_log("post-subledger-reconciliation")?;
4907
4908 Ok(results)
4909 }
4910
4911 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4913 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4914
4915 let use_french_pcg = self.config.accounting_standards.enabled
4916 && matches!(
4917 self.config.accounting_standards.framework,
4918 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap)
4919 );
4920
4921 let mut gen = ChartOfAccountsGenerator::new(
4922 self.config.chart_of_accounts.complexity,
4923 self.config.global.industry,
4924 self.seed,
4925 )
4926 .with_french_pcg(use_french_pcg);
4927
4928 let coa = Arc::new(gen.generate());
4929 self.coa = Some(Arc::clone(&coa));
4930
4931 if let Some(pb) = pb {
4932 pb.finish_with_message("Chart of Accounts complete");
4933 }
4934
4935 Ok(coa)
4936 }
4937
4938 fn generate_master_data(&mut self) -> SynthResult<()> {
4940 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4941 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4942 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4943
4944 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
4946
4947 let pack = self.primary_pack().clone();
4949
4950 let vendors_per_company = self.phase_config.vendors_per_company;
4952 let customers_per_company = self.phase_config.customers_per_company;
4953 let materials_per_company = self.phase_config.materials_per_company;
4954 let assets_per_company = self.phase_config.assets_per_company;
4955
4956 let per_company_results: Vec<_> = self
4959 .config
4960 .companies
4961 .par_iter()
4962 .enumerate()
4963 .map(|(i, company)| {
4964 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4965 let pack = pack.clone();
4966
4967 let mut vendor_gen = VendorGenerator::new(company_seed);
4969 vendor_gen.set_country_pack(pack.clone());
4970 let vendor_pool =
4971 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
4972
4973 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
4975 customer_gen.set_country_pack(pack.clone());
4976 let customer_pool = customer_gen.generate_customer_pool(
4977 customers_per_company,
4978 &company.code,
4979 start_date,
4980 );
4981
4982 let mut material_gen = MaterialGenerator::new(company_seed + 200);
4984 material_gen.set_country_pack(pack);
4985 let material_pool = material_gen.generate_material_pool(
4986 materials_per_company,
4987 &company.code,
4988 start_date,
4989 );
4990
4991 let mut asset_gen = AssetGenerator::new(company_seed + 300);
4993 let asset_pool = asset_gen.generate_asset_pool(
4994 assets_per_company,
4995 &company.code,
4996 (start_date, end_date),
4997 );
4998
4999 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5001 let employee_pool =
5002 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5003
5004 (
5005 vendor_pool.vendors,
5006 customer_pool.customers,
5007 material_pool.materials,
5008 asset_pool.assets,
5009 employee_pool.employees,
5010 )
5011 })
5012 .collect();
5013
5014 for (vendors, customers, materials, assets, employees) in per_company_results {
5016 self.master_data.vendors.extend(vendors);
5017 self.master_data.customers.extend(customers);
5018 self.master_data.materials.extend(materials);
5019 self.master_data.assets.extend(assets);
5020 self.master_data.employees.extend(employees);
5021 }
5022
5023 if let Some(pb) = &pb {
5024 pb.inc(total);
5025 }
5026 if let Some(pb) = pb {
5027 pb.finish_with_message("Master data generation complete");
5028 }
5029
5030 Ok(())
5031 }
5032
5033 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5035 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5036 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5037
5038 let p2p_count = self
5040 .phase_config
5041 .p2p_chains
5042 .min(self.master_data.vendors.len() * 2);
5043 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5044
5045 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5047 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5048 p2p_gen.set_country_pack(self.primary_pack().clone());
5049
5050 for i in 0..p2p_count {
5051 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5052 let materials: Vec<&Material> = self
5053 .master_data
5054 .materials
5055 .iter()
5056 .skip(i % self.master_data.materials.len().max(1))
5057 .take(2.min(self.master_data.materials.len()))
5058 .collect();
5059
5060 if materials.is_empty() {
5061 continue;
5062 }
5063
5064 let company = &self.config.companies[i % self.config.companies.len()];
5065 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5066 let fiscal_period = po_date.month() as u8;
5067 let created_by = if self.master_data.employees.is_empty() {
5068 "SYSTEM"
5069 } else {
5070 self.master_data.employees[i % self.master_data.employees.len()]
5071 .user_id
5072 .as_str()
5073 };
5074
5075 let chain = p2p_gen.generate_chain(
5076 &company.code,
5077 vendor,
5078 &materials,
5079 po_date,
5080 start_date.year() as u16,
5081 fiscal_period,
5082 created_by,
5083 );
5084
5085 flows.purchase_orders.push(chain.purchase_order.clone());
5087 flows.goods_receipts.extend(chain.goods_receipts.clone());
5088 if let Some(vi) = &chain.vendor_invoice {
5089 flows.vendor_invoices.push(vi.clone());
5090 }
5091 if let Some(payment) = &chain.payment {
5092 flows.payments.push(payment.clone());
5093 }
5094 flows.p2p_chains.push(chain);
5095
5096 if let Some(pb) = &pb {
5097 pb.inc(1);
5098 }
5099 }
5100
5101 if let Some(pb) = pb {
5102 pb.finish_with_message("P2P document flows complete");
5103 }
5104
5105 let o2c_count = self
5107 .phase_config
5108 .o2c_chains
5109 .min(self.master_data.customers.len() * 2);
5110 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5111
5112 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5114 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5115 o2c_gen.set_country_pack(self.primary_pack().clone());
5116
5117 for i in 0..o2c_count {
5118 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5119 let materials: Vec<&Material> = self
5120 .master_data
5121 .materials
5122 .iter()
5123 .skip(i % self.master_data.materials.len().max(1))
5124 .take(2.min(self.master_data.materials.len()))
5125 .collect();
5126
5127 if materials.is_empty() {
5128 continue;
5129 }
5130
5131 let company = &self.config.companies[i % self.config.companies.len()];
5132 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5133 let fiscal_period = so_date.month() as u8;
5134 let created_by = if self.master_data.employees.is_empty() {
5135 "SYSTEM"
5136 } else {
5137 self.master_data.employees[i % self.master_data.employees.len()]
5138 .user_id
5139 .as_str()
5140 };
5141
5142 let chain = o2c_gen.generate_chain(
5143 &company.code,
5144 customer,
5145 &materials,
5146 so_date,
5147 start_date.year() as u16,
5148 fiscal_period,
5149 created_by,
5150 );
5151
5152 flows.sales_orders.push(chain.sales_order.clone());
5154 flows.deliveries.extend(chain.deliveries.clone());
5155 if let Some(ci) = &chain.customer_invoice {
5156 flows.customer_invoices.push(ci.clone());
5157 }
5158 if let Some(receipt) = &chain.customer_receipt {
5159 flows.payments.push(receipt.clone());
5160 }
5161 flows.o2c_chains.push(chain);
5162
5163 if let Some(pb) = &pb {
5164 pb.inc(1);
5165 }
5166 }
5167
5168 if let Some(pb) = pb {
5169 pb.finish_with_message("O2C document flows complete");
5170 }
5171
5172 Ok(())
5173 }
5174
5175 fn generate_journal_entries(
5177 &mut self,
5178 coa: &Arc<ChartOfAccounts>,
5179 ) -> SynthResult<Vec<JournalEntry>> {
5180 use datasynth_core::traits::ParallelGenerator;
5181
5182 let total = self.calculate_total_transactions();
5183 let pb = self.create_progress_bar(total, "Generating Journal Entries");
5184
5185 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5186 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5187 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5188
5189 let company_codes: Vec<String> = self
5190 .config
5191 .companies
5192 .iter()
5193 .map(|c| c.code.clone())
5194 .collect();
5195
5196 let generator = JournalEntryGenerator::new_with_params(
5197 self.config.transactions.clone(),
5198 Arc::clone(coa),
5199 company_codes,
5200 start_date,
5201 end_date,
5202 self.seed,
5203 );
5204
5205 let je_pack = self.primary_pack();
5209
5210 let mut generator = generator
5211 .with_master_data(
5212 &self.master_data.vendors,
5213 &self.master_data.customers,
5214 &self.master_data.materials,
5215 )
5216 .with_country_pack_names(je_pack)
5217 .with_country_pack_temporal(
5218 self.config.temporal_patterns.clone(),
5219 self.seed + 200,
5220 je_pack,
5221 )
5222 .with_persona_errors(true)
5223 .with_fraud_config(self.config.fraud.clone());
5224
5225 if self.config.temporal.enabled {
5227 let drift_config = self.config.temporal.to_core_config();
5228 generator = generator.with_drift_config(drift_config, self.seed + 100);
5229 }
5230
5231 self.check_memory_limit()?;
5233
5234 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5236
5237 let entries = if total >= 10_000 && num_threads > 1 {
5241 let sub_generators = generator.split(num_threads);
5244 let entries_per_thread = total as usize / num_threads;
5245 let remainder = total as usize % num_threads;
5246
5247 let batches: Vec<Vec<JournalEntry>> = sub_generators
5248 .into_par_iter()
5249 .enumerate()
5250 .map(|(i, mut gen)| {
5251 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5252 gen.generate_batch(count)
5253 })
5254 .collect();
5255
5256 let entries = JournalEntryGenerator::merge_results(batches);
5258
5259 if let Some(pb) = &pb {
5260 pb.inc(total);
5261 }
5262 entries
5263 } else {
5264 let mut entries = Vec::with_capacity(total as usize);
5266 for _ in 0..total {
5267 let entry = generator.generate();
5268 entries.push(entry);
5269 if let Some(pb) = &pb {
5270 pb.inc(1);
5271 }
5272 }
5273 entries
5274 };
5275
5276 if let Some(pb) = pb {
5277 pb.finish_with_message("Journal entries complete");
5278 }
5279
5280 Ok(entries)
5281 }
5282
5283 fn generate_jes_from_document_flows(
5288 &mut self,
5289 flows: &DocumentFlowSnapshot,
5290 ) -> SynthResult<Vec<JournalEntry>> {
5291 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5292 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5293
5294 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(
5295 DocumentFlowJeConfig::default(),
5296 self.seed,
5297 );
5298 let mut entries = Vec::new();
5299
5300 for chain in &flows.p2p_chains {
5302 let chain_entries = generator.generate_from_p2p_chain(chain);
5303 entries.extend(chain_entries);
5304 if let Some(pb) = &pb {
5305 pb.inc(1);
5306 }
5307 }
5308
5309 for chain in &flows.o2c_chains {
5311 let chain_entries = generator.generate_from_o2c_chain(chain);
5312 entries.extend(chain_entries);
5313 if let Some(pb) = &pb {
5314 pb.inc(1);
5315 }
5316 }
5317
5318 if let Some(pb) = pb {
5319 pb.finish_with_message(format!(
5320 "Generated {} JEs from document flows",
5321 entries.len()
5322 ));
5323 }
5324
5325 Ok(entries)
5326 }
5327
5328 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5334 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5335
5336 let mut jes = Vec::with_capacity(payroll_runs.len());
5337
5338 for run in payroll_runs {
5339 let mut je = JournalEntry::new_simple(
5340 format!("JE-PAYROLL-{}", run.payroll_id),
5341 run.company_code.clone(),
5342 run.run_date,
5343 format!("Payroll {}", run.payroll_id),
5344 );
5345
5346 je.add_line(JournalEntryLine {
5348 line_number: 1,
5349 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5350 debit_amount: run.total_gross,
5351 reference: Some(run.payroll_id.clone()),
5352 text: Some(format!(
5353 "Payroll {} ({} employees)",
5354 run.payroll_id, run.employee_count
5355 )),
5356 ..Default::default()
5357 });
5358
5359 je.add_line(JournalEntryLine {
5361 line_number: 2,
5362 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5363 credit_amount: run.total_gross,
5364 reference: Some(run.payroll_id.clone()),
5365 ..Default::default()
5366 });
5367
5368 jes.push(je);
5369 }
5370
5371 jes
5372 }
5373
5374 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5380 use datasynth_core::accounts::{control_accounts, expense_accounts};
5381 use datasynth_core::models::ProductionOrderStatus;
5382
5383 let mut jes = Vec::new();
5384
5385 for order in production_orders {
5386 if !matches!(
5388 order.status,
5389 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5390 ) {
5391 continue;
5392 }
5393
5394 let mut je = JournalEntry::new_simple(
5395 format!("JE-MFG-{}", order.order_id),
5396 order.company_code.clone(),
5397 order.actual_end.unwrap_or(order.planned_end),
5398 format!(
5399 "Production Order {} - {}",
5400 order.order_id, order.material_description
5401 ),
5402 );
5403
5404 je.add_line(JournalEntryLine {
5406 line_number: 1,
5407 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5408 debit_amount: order.actual_cost,
5409 reference: Some(order.order_id.clone()),
5410 text: Some(format!(
5411 "Material consumption for {}",
5412 order.material_description
5413 )),
5414 quantity: Some(order.actual_quantity),
5415 unit: Some("EA".to_string()),
5416 ..Default::default()
5417 });
5418
5419 je.add_line(JournalEntryLine {
5421 line_number: 2,
5422 gl_account: control_accounts::INVENTORY.to_string(),
5423 credit_amount: order.actual_cost,
5424 reference: Some(order.order_id.clone()),
5425 ..Default::default()
5426 });
5427
5428 jes.push(je);
5429 }
5430
5431 jes
5432 }
5433
5434 fn link_document_flows_to_subledgers(
5439 &mut self,
5440 flows: &DocumentFlowSnapshot,
5441 ) -> SynthResult<SubledgerSnapshot> {
5442 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5443 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5444
5445 let vendor_names: std::collections::HashMap<String, String> = self
5447 .master_data
5448 .vendors
5449 .iter()
5450 .map(|v| (v.vendor_id.clone(), v.name.clone()))
5451 .collect();
5452 let customer_names: std::collections::HashMap<String, String> = self
5453 .master_data
5454 .customers
5455 .iter()
5456 .map(|c| (c.customer_id.clone(), c.name.clone()))
5457 .collect();
5458
5459 let mut linker = DocumentFlowLinker::new()
5460 .with_vendor_names(vendor_names)
5461 .with_customer_names(customer_names);
5462
5463 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5465 if let Some(pb) = &pb {
5466 pb.inc(flows.vendor_invoices.len() as u64);
5467 }
5468
5469 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5471 if let Some(pb) = &pb {
5472 pb.inc(flows.customer_invoices.len() as u64);
5473 }
5474
5475 if let Some(pb) = pb {
5476 pb.finish_with_message(format!(
5477 "Linked {} AP and {} AR invoices",
5478 ap_invoices.len(),
5479 ar_invoices.len()
5480 ));
5481 }
5482
5483 Ok(SubledgerSnapshot {
5484 ap_invoices,
5485 ar_invoices,
5486 fa_records: Vec::new(),
5487 inventory_positions: Vec::new(),
5488 inventory_movements: Vec::new(),
5489 })
5490 }
5491
5492 #[allow(clippy::too_many_arguments)]
5497 fn generate_ocpm_events(
5498 &mut self,
5499 flows: &DocumentFlowSnapshot,
5500 sourcing: &SourcingSnapshot,
5501 hr: &HrSnapshot,
5502 manufacturing: &ManufacturingSnapshot,
5503 banking: &BankingSnapshot,
5504 audit: &AuditSnapshot,
5505 financial_reporting: &FinancialReportingSnapshot,
5506 ) -> SynthResult<OcpmSnapshot> {
5507 let total_chains = flows.p2p_chains.len()
5508 + flows.o2c_chains.len()
5509 + sourcing.sourcing_projects.len()
5510 + hr.payroll_runs.len()
5511 + manufacturing.production_orders.len()
5512 + banking.customers.len()
5513 + audit.engagements.len()
5514 + financial_reporting.bank_reconciliations.len();
5515 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5516
5517 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5519 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5520
5521 let ocpm_config = OcpmGeneratorConfig {
5523 generate_p2p: true,
5524 generate_o2c: true,
5525 generate_s2c: !sourcing.sourcing_projects.is_empty(),
5526 generate_h2r: !hr.payroll_runs.is_empty(),
5527 generate_mfg: !manufacturing.production_orders.is_empty(),
5528 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5529 generate_bank: !banking.customers.is_empty(),
5530 generate_audit: !audit.engagements.is_empty(),
5531 happy_path_rate: 0.75,
5532 exception_path_rate: 0.20,
5533 error_path_rate: 0.05,
5534 add_duration_variability: true,
5535 duration_std_dev_factor: 0.3,
5536 };
5537 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5538 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5539
5540 let available_users: Vec<String> = self
5542 .master_data
5543 .employees
5544 .iter()
5545 .take(20)
5546 .map(|e| e.user_id.clone())
5547 .collect();
5548
5549 let fallback_date =
5551 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5552 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5553 .unwrap_or(fallback_date);
5554 let base_midnight = base_date
5555 .and_hms_opt(0, 0, 0)
5556 .expect("midnight is always valid");
5557 let base_datetime =
5558 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5559
5560 let add_result = |event_log: &mut OcpmEventLog,
5562 result: datasynth_ocpm::CaseGenerationResult| {
5563 for event in result.events {
5564 event_log.add_event(event);
5565 }
5566 for object in result.objects {
5567 event_log.add_object(object);
5568 }
5569 for relationship in result.relationships {
5570 event_log.add_relationship(relationship);
5571 }
5572 event_log.add_case(result.case_trace);
5573 };
5574
5575 for chain in &flows.p2p_chains {
5577 let po = &chain.purchase_order;
5578 let documents = P2pDocuments::new(
5579 &po.header.document_id,
5580 &po.vendor_id,
5581 &po.header.company_code,
5582 po.total_net_amount,
5583 &po.header.currency,
5584 &ocpm_uuid_factory,
5585 )
5586 .with_goods_receipt(
5587 chain
5588 .goods_receipts
5589 .first()
5590 .map(|gr| gr.header.document_id.as_str())
5591 .unwrap_or(""),
5592 &ocpm_uuid_factory,
5593 )
5594 .with_invoice(
5595 chain
5596 .vendor_invoice
5597 .as_ref()
5598 .map(|vi| vi.header.document_id.as_str())
5599 .unwrap_or(""),
5600 &ocpm_uuid_factory,
5601 )
5602 .with_payment(
5603 chain
5604 .payment
5605 .as_ref()
5606 .map(|p| p.header.document_id.as_str())
5607 .unwrap_or(""),
5608 &ocpm_uuid_factory,
5609 );
5610
5611 let start_time =
5612 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5613 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5614 add_result(&mut event_log, result);
5615
5616 if let Some(pb) = &pb {
5617 pb.inc(1);
5618 }
5619 }
5620
5621 for chain in &flows.o2c_chains {
5623 let so = &chain.sales_order;
5624 let documents = O2cDocuments::new(
5625 &so.header.document_id,
5626 &so.customer_id,
5627 &so.header.company_code,
5628 so.total_net_amount,
5629 &so.header.currency,
5630 &ocpm_uuid_factory,
5631 )
5632 .with_delivery(
5633 chain
5634 .deliveries
5635 .first()
5636 .map(|d| d.header.document_id.as_str())
5637 .unwrap_or(""),
5638 &ocpm_uuid_factory,
5639 )
5640 .with_invoice(
5641 chain
5642 .customer_invoice
5643 .as_ref()
5644 .map(|ci| ci.header.document_id.as_str())
5645 .unwrap_or(""),
5646 &ocpm_uuid_factory,
5647 )
5648 .with_receipt(
5649 chain
5650 .customer_receipt
5651 .as_ref()
5652 .map(|r| r.header.document_id.as_str())
5653 .unwrap_or(""),
5654 &ocpm_uuid_factory,
5655 );
5656
5657 let start_time =
5658 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5659 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5660 add_result(&mut event_log, result);
5661
5662 if let Some(pb) = &pb {
5663 pb.inc(1);
5664 }
5665 }
5666
5667 for project in &sourcing.sourcing_projects {
5669 let vendor_id = sourcing
5671 .contracts
5672 .iter()
5673 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5674 .map(|c| c.vendor_id.clone())
5675 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5676 .or_else(|| {
5677 self.master_data
5678 .vendors
5679 .first()
5680 .map(|v| v.vendor_id.clone())
5681 })
5682 .unwrap_or_else(|| "V000".to_string());
5683 let mut docs = S2cDocuments::new(
5684 &project.project_id,
5685 &vendor_id,
5686 &project.company_code,
5687 project.estimated_annual_spend,
5688 &ocpm_uuid_factory,
5689 );
5690 if let Some(rfx) = sourcing
5692 .rfx_events
5693 .iter()
5694 .find(|r| r.sourcing_project_id == project.project_id)
5695 {
5696 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5697 if let Some(bid) = sourcing.bids.iter().find(|b| {
5699 b.rfx_id == rfx.rfx_id
5700 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5701 }) {
5702 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5703 }
5704 }
5705 if let Some(contract) = sourcing
5707 .contracts
5708 .iter()
5709 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5710 {
5711 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5712 }
5713 let start_time = base_datetime - chrono::Duration::days(90);
5714 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5715 add_result(&mut event_log, result);
5716
5717 if let Some(pb) = &pb {
5718 pb.inc(1);
5719 }
5720 }
5721
5722 for run in &hr.payroll_runs {
5724 let employee_id = hr
5726 .payroll_line_items
5727 .iter()
5728 .find(|li| li.payroll_id == run.payroll_id)
5729 .map(|li| li.employee_id.as_str())
5730 .unwrap_or("EMP000");
5731 let docs = H2rDocuments::new(
5732 &run.payroll_id,
5733 employee_id,
5734 &run.company_code,
5735 run.total_gross,
5736 &ocpm_uuid_factory,
5737 )
5738 .with_time_entries(
5739 hr.time_entries
5740 .iter()
5741 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5742 .take(5)
5743 .map(|t| t.entry_id.as_str())
5744 .collect(),
5745 );
5746 let start_time = base_datetime - chrono::Duration::days(30);
5747 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5748 add_result(&mut event_log, result);
5749
5750 if let Some(pb) = &pb {
5751 pb.inc(1);
5752 }
5753 }
5754
5755 for order in &manufacturing.production_orders {
5757 let mut docs = MfgDocuments::new(
5758 &order.order_id,
5759 &order.material_id,
5760 &order.company_code,
5761 order.planned_quantity,
5762 &ocpm_uuid_factory,
5763 )
5764 .with_operations(
5765 order
5766 .operations
5767 .iter()
5768 .map(|o| format!("OP-{:04}", o.operation_number))
5769 .collect::<Vec<_>>()
5770 .iter()
5771 .map(|s| s.as_str())
5772 .collect(),
5773 );
5774 if let Some(insp) = manufacturing
5776 .quality_inspections
5777 .iter()
5778 .find(|i| i.reference_id == order.order_id)
5779 {
5780 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5781 }
5782 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5784 cc.items
5785 .iter()
5786 .any(|item| item.material_id == order.material_id)
5787 }) {
5788 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5789 }
5790 let start_time = base_datetime - chrono::Duration::days(60);
5791 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5792 add_result(&mut event_log, result);
5793
5794 if let Some(pb) = &pb {
5795 pb.inc(1);
5796 }
5797 }
5798
5799 for customer in &banking.customers {
5801 let customer_id_str = customer.customer_id.to_string();
5802 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5803 if let Some(account) = banking
5805 .accounts
5806 .iter()
5807 .find(|a| a.primary_owner_id == customer.customer_id)
5808 {
5809 let account_id_str = account.account_id.to_string();
5810 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5811 let txn_strs: Vec<String> = banking
5813 .transactions
5814 .iter()
5815 .filter(|t| t.account_id == account.account_id)
5816 .take(10)
5817 .map(|t| t.transaction_id.to_string())
5818 .collect();
5819 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5820 let txn_amounts: Vec<rust_decimal::Decimal> = banking
5821 .transactions
5822 .iter()
5823 .filter(|t| t.account_id == account.account_id)
5824 .take(10)
5825 .map(|t| t.amount)
5826 .collect();
5827 if !txn_ids.is_empty() {
5828 docs = docs.with_transactions(txn_ids, txn_amounts);
5829 }
5830 }
5831 let start_time = base_datetime - chrono::Duration::days(180);
5832 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5833 add_result(&mut event_log, result);
5834
5835 if let Some(pb) = &pb {
5836 pb.inc(1);
5837 }
5838 }
5839
5840 for engagement in &audit.engagements {
5842 let engagement_id_str = engagement.engagement_id.to_string();
5843 let docs = AuditDocuments::new(
5844 &engagement_id_str,
5845 &engagement.client_entity_id,
5846 &ocpm_uuid_factory,
5847 )
5848 .with_workpapers(
5849 audit
5850 .workpapers
5851 .iter()
5852 .filter(|w| w.engagement_id == engagement.engagement_id)
5853 .take(10)
5854 .map(|w| w.workpaper_id.to_string())
5855 .collect::<Vec<_>>()
5856 .iter()
5857 .map(|s| s.as_str())
5858 .collect(),
5859 )
5860 .with_evidence(
5861 audit
5862 .evidence
5863 .iter()
5864 .filter(|e| e.engagement_id == engagement.engagement_id)
5865 .take(10)
5866 .map(|e| e.evidence_id.to_string())
5867 .collect::<Vec<_>>()
5868 .iter()
5869 .map(|s| s.as_str())
5870 .collect(),
5871 )
5872 .with_risks(
5873 audit
5874 .risk_assessments
5875 .iter()
5876 .filter(|r| r.engagement_id == engagement.engagement_id)
5877 .take(5)
5878 .map(|r| r.risk_id.to_string())
5879 .collect::<Vec<_>>()
5880 .iter()
5881 .map(|s| s.as_str())
5882 .collect(),
5883 )
5884 .with_findings(
5885 audit
5886 .findings
5887 .iter()
5888 .filter(|f| f.engagement_id == engagement.engagement_id)
5889 .take(5)
5890 .map(|f| f.finding_id.to_string())
5891 .collect::<Vec<_>>()
5892 .iter()
5893 .map(|s| s.as_str())
5894 .collect(),
5895 )
5896 .with_judgments(
5897 audit
5898 .judgments
5899 .iter()
5900 .filter(|j| j.engagement_id == engagement.engagement_id)
5901 .take(5)
5902 .map(|j| j.judgment_id.to_string())
5903 .collect::<Vec<_>>()
5904 .iter()
5905 .map(|s| s.as_str())
5906 .collect(),
5907 );
5908 let start_time = base_datetime - chrono::Duration::days(120);
5909 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5910 add_result(&mut event_log, result);
5911
5912 if let Some(pb) = &pb {
5913 pb.inc(1);
5914 }
5915 }
5916
5917 for recon in &financial_reporting.bank_reconciliations {
5919 let docs = BankReconDocuments::new(
5920 &recon.reconciliation_id,
5921 &recon.bank_account_id,
5922 &recon.company_code,
5923 recon.bank_ending_balance,
5924 &ocpm_uuid_factory,
5925 )
5926 .with_statement_lines(
5927 recon
5928 .statement_lines
5929 .iter()
5930 .take(20)
5931 .map(|l| l.line_id.as_str())
5932 .collect(),
5933 )
5934 .with_reconciling_items(
5935 recon
5936 .reconciling_items
5937 .iter()
5938 .take(10)
5939 .map(|i| i.item_id.as_str())
5940 .collect(),
5941 );
5942 let start_time = base_datetime - chrono::Duration::days(30);
5943 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
5944 add_result(&mut event_log, result);
5945
5946 if let Some(pb) = &pb {
5947 pb.inc(1);
5948 }
5949 }
5950
5951 event_log.compute_variants();
5953
5954 let summary = event_log.summary();
5955
5956 if let Some(pb) = pb {
5957 pb.finish_with_message(format!(
5958 "Generated {} OCPM events, {} objects",
5959 summary.event_count, summary.object_count
5960 ));
5961 }
5962
5963 Ok(OcpmSnapshot {
5964 event_count: summary.event_count,
5965 object_count: summary.object_count,
5966 case_count: summary.case_count,
5967 event_log: Some(event_log),
5968 })
5969 }
5970
5971 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
5973 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
5974
5975 let total_rate = if self.config.anomaly_injection.enabled {
5978 self.config.anomaly_injection.rates.total_rate
5979 } else if self.config.fraud.enabled {
5980 self.config.fraud.fraud_rate
5981 } else {
5982 0.02
5983 };
5984
5985 let fraud_rate = if self.config.anomaly_injection.enabled {
5986 self.config.anomaly_injection.rates.fraud_rate
5987 } else {
5988 AnomalyRateConfig::default().fraud_rate
5989 };
5990
5991 let error_rate = if self.config.anomaly_injection.enabled {
5992 self.config.anomaly_injection.rates.error_rate
5993 } else {
5994 AnomalyRateConfig::default().error_rate
5995 };
5996
5997 let process_issue_rate = if self.config.anomaly_injection.enabled {
5998 self.config.anomaly_injection.rates.process_rate
5999 } else {
6000 AnomalyRateConfig::default().process_issue_rate
6001 };
6002
6003 let anomaly_config = AnomalyInjectorConfig {
6004 rates: AnomalyRateConfig {
6005 total_rate,
6006 fraud_rate,
6007 error_rate,
6008 process_issue_rate,
6009 ..Default::default()
6010 },
6011 seed: self.seed + 5000,
6012 ..Default::default()
6013 };
6014
6015 let mut injector = AnomalyInjector::new(anomaly_config);
6016 let result = injector.process_entries(entries);
6017
6018 if let Some(pb) = &pb {
6019 pb.inc(entries.len() as u64);
6020 pb.finish_with_message("Anomaly injection complete");
6021 }
6022
6023 let mut by_type = HashMap::new();
6024 for label in &result.labels {
6025 *by_type
6026 .entry(format!("{:?}", label.anomaly_type))
6027 .or_insert(0) += 1;
6028 }
6029
6030 Ok(AnomalyLabels {
6031 labels: result.labels,
6032 summary: Some(result.summary),
6033 by_type,
6034 })
6035 }
6036
6037 fn validate_journal_entries(
6046 &mut self,
6047 entries: &[JournalEntry],
6048 ) -> SynthResult<BalanceValidationResult> {
6049 let clean_entries: Vec<&JournalEntry> = entries
6051 .iter()
6052 .filter(|e| {
6053 e.header
6054 .header_text
6055 .as_ref()
6056 .map(|t| !t.contains("[HUMAN_ERROR:"))
6057 .unwrap_or(true)
6058 })
6059 .collect();
6060
6061 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6062
6063 let config = BalanceTrackerConfig {
6065 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
6069 };
6070 let validation_currency = self
6071 .config
6072 .companies
6073 .first()
6074 .map(|c| c.currency.clone())
6075 .unwrap_or_else(|| "USD".to_string());
6076
6077 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6078
6079 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6081 let errors = tracker.apply_entries(&clean_refs);
6082
6083 if let Some(pb) = &pb {
6084 pb.inc(entries.len() as u64);
6085 }
6086
6087 let has_unbalanced = tracker
6090 .get_validation_errors()
6091 .iter()
6092 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6093
6094 let mut all_errors = errors;
6097 all_errors.extend(tracker.get_validation_errors().iter().cloned());
6098 let company_codes: Vec<String> = self
6099 .config
6100 .companies
6101 .iter()
6102 .map(|c| c.code.clone())
6103 .collect();
6104
6105 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6106 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6107 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6108
6109 for company_code in &company_codes {
6110 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6111 all_errors.push(e);
6112 }
6113 }
6114
6115 let stats = tracker.get_statistics();
6117
6118 let is_balanced = all_errors.is_empty();
6120
6121 if let Some(pb) = pb {
6122 let msg = if is_balanced {
6123 "Balance validation passed"
6124 } else {
6125 "Balance validation completed with errors"
6126 };
6127 pb.finish_with_message(msg);
6128 }
6129
6130 Ok(BalanceValidationResult {
6131 validated: true,
6132 is_balanced,
6133 entries_processed: stats.entries_processed,
6134 total_debits: stats.total_debits,
6135 total_credits: stats.total_credits,
6136 accounts_tracked: stats.accounts_tracked,
6137 companies_tracked: stats.companies_tracked,
6138 validation_errors: all_errors,
6139 has_unbalanced_entries: has_unbalanced,
6140 })
6141 }
6142
6143 fn inject_data_quality(
6148 &mut self,
6149 entries: &mut [JournalEntry],
6150 ) -> SynthResult<DataQualityStats> {
6151 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6152
6153 let config = if self.config.data_quality.enabled {
6156 let dq = &self.config.data_quality;
6157 DataQualityConfig {
6158 enable_missing_values: dq.missing_values.enabled,
6159 missing_values: datasynth_generators::MissingValueConfig {
6160 global_rate: dq.effective_missing_rate(),
6161 ..Default::default()
6162 },
6163 enable_format_variations: dq.format_variations.enabled,
6164 format_variations: datasynth_generators::FormatVariationConfig {
6165 date_variation_rate: dq.format_variations.dates.rate,
6166 amount_variation_rate: dq.format_variations.amounts.rate,
6167 identifier_variation_rate: dq.format_variations.identifiers.rate,
6168 ..Default::default()
6169 },
6170 enable_duplicates: dq.duplicates.enabled,
6171 duplicates: datasynth_generators::DuplicateConfig {
6172 duplicate_rate: dq.effective_duplicate_rate(),
6173 ..Default::default()
6174 },
6175 enable_typos: dq.typos.enabled,
6176 typos: datasynth_generators::TypoConfig {
6177 char_error_rate: dq.effective_typo_rate(),
6178 ..Default::default()
6179 },
6180 enable_encoding_issues: dq.encoding_issues.enabled,
6181 encoding_issue_rate: dq.encoding_issues.rate,
6182 seed: self.seed.wrapping_add(77), track_statistics: true,
6184 }
6185 } else {
6186 DataQualityConfig::minimal()
6187 };
6188 let mut injector = DataQualityInjector::new(config);
6189
6190 injector.set_country_pack(self.primary_pack().clone());
6192
6193 let context = HashMap::new();
6195
6196 for entry in entries.iter_mut() {
6197 if let Some(text) = &entry.header.header_text {
6199 let processed = injector.process_text_field(
6200 "header_text",
6201 text,
6202 &entry.header.document_id.to_string(),
6203 &context,
6204 );
6205 match processed {
6206 Some(new_text) if new_text != *text => {
6207 entry.header.header_text = Some(new_text);
6208 }
6209 None => {
6210 entry.header.header_text = None; }
6212 _ => {}
6213 }
6214 }
6215
6216 if let Some(ref_text) = &entry.header.reference {
6218 let processed = injector.process_text_field(
6219 "reference",
6220 ref_text,
6221 &entry.header.document_id.to_string(),
6222 &context,
6223 );
6224 match processed {
6225 Some(new_text) if new_text != *ref_text => {
6226 entry.header.reference = Some(new_text);
6227 }
6228 None => {
6229 entry.header.reference = None;
6230 }
6231 _ => {}
6232 }
6233 }
6234
6235 let user_persona = entry.header.user_persona.clone();
6237 if let Some(processed) = injector.process_text_field(
6238 "user_persona",
6239 &user_persona,
6240 &entry.header.document_id.to_string(),
6241 &context,
6242 ) {
6243 if processed != user_persona {
6244 entry.header.user_persona = processed;
6245 }
6246 }
6247
6248 for line in &mut entry.lines {
6250 if let Some(ref text) = line.line_text {
6252 let processed = injector.process_text_field(
6253 "line_text",
6254 text,
6255 &entry.header.document_id.to_string(),
6256 &context,
6257 );
6258 match processed {
6259 Some(new_text) if new_text != *text => {
6260 line.line_text = Some(new_text);
6261 }
6262 None => {
6263 line.line_text = None;
6264 }
6265 _ => {}
6266 }
6267 }
6268
6269 if let Some(cc) = &line.cost_center {
6271 let processed = injector.process_text_field(
6272 "cost_center",
6273 cc,
6274 &entry.header.document_id.to_string(),
6275 &context,
6276 );
6277 match processed {
6278 Some(new_cc) if new_cc != *cc => {
6279 line.cost_center = Some(new_cc);
6280 }
6281 None => {
6282 line.cost_center = None;
6283 }
6284 _ => {}
6285 }
6286 }
6287 }
6288
6289 if let Some(pb) = &pb {
6290 pb.inc(1);
6291 }
6292 }
6293
6294 if let Some(pb) = pb {
6295 pb.finish_with_message("Data quality injection complete");
6296 }
6297
6298 Ok(injector.stats().clone())
6299 }
6300
6301 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6312 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6313 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6314 let fiscal_year = start_date.year() as u16;
6315 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6316
6317 let total_revenue: rust_decimal::Decimal = entries
6319 .iter()
6320 .flat_map(|e| e.lines.iter())
6321 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6322 .map(|l| l.credit_amount)
6323 .sum();
6324
6325 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6327
6328 let mut snapshot = AuditSnapshot::default();
6329
6330 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6332 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6333 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6334 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6335 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6336 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6337
6338 let accounts: Vec<String> = self
6340 .coa
6341 .as_ref()
6342 .map(|coa| {
6343 coa.get_postable_accounts()
6344 .iter()
6345 .map(|acc| acc.account_code().to_string())
6346 .collect()
6347 })
6348 .unwrap_or_default();
6349
6350 for (i, company) in self.config.companies.iter().enumerate() {
6352 let company_revenue = total_revenue
6354 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6355
6356 let engagements_for_company =
6358 self.phase_config.audit_engagements / self.config.companies.len().max(1);
6359 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6360 1
6361 } else {
6362 0
6363 };
6364
6365 for _eng_idx in 0..(engagements_for_company + extra) {
6366 let mut engagement = engagement_gen.generate_engagement(
6368 &company.code,
6369 &company.name,
6370 fiscal_year,
6371 period_end,
6372 company_revenue,
6373 None, );
6375
6376 if !self.master_data.employees.is_empty() {
6378 let emp_count = self.master_data.employees.len();
6379 let base = (i * 10 + _eng_idx) % emp_count;
6381 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6382 .employee_id
6383 .clone();
6384 engagement.engagement_manager_id = self.master_data.employees
6385 [(base + 1) % emp_count]
6386 .employee_id
6387 .clone();
6388 let real_team: Vec<String> = engagement
6389 .team_member_ids
6390 .iter()
6391 .enumerate()
6392 .map(|(j, _)| {
6393 self.master_data.employees[(base + 2 + j) % emp_count]
6394 .employee_id
6395 .clone()
6396 })
6397 .collect();
6398 engagement.team_member_ids = real_team;
6399 }
6400
6401 if let Some(pb) = &pb {
6402 pb.inc(1);
6403 }
6404
6405 let team_members: Vec<String> = engagement.team_member_ids.clone();
6407
6408 let workpapers =
6410 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6411
6412 for wp in &workpapers {
6413 if let Some(pb) = &pb {
6414 pb.inc(1);
6415 }
6416
6417 let evidence = evidence_gen.generate_evidence_for_workpaper(
6419 wp,
6420 &team_members,
6421 wp.preparer_date,
6422 );
6423
6424 for _ in &evidence {
6425 if let Some(pb) = &pb {
6426 pb.inc(1);
6427 }
6428 }
6429
6430 snapshot.evidence.extend(evidence);
6431 }
6432
6433 let risks =
6435 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6436
6437 for _ in &risks {
6438 if let Some(pb) = &pb {
6439 pb.inc(1);
6440 }
6441 }
6442 snapshot.risk_assessments.extend(risks);
6443
6444 let findings = finding_gen.generate_findings_for_engagement(
6446 &engagement,
6447 &workpapers,
6448 &team_members,
6449 );
6450
6451 for _ in &findings {
6452 if let Some(pb) = &pb {
6453 pb.inc(1);
6454 }
6455 }
6456 snapshot.findings.extend(findings);
6457
6458 let judgments =
6460 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6461
6462 for _ in &judgments {
6463 if let Some(pb) = &pb {
6464 pb.inc(1);
6465 }
6466 }
6467 snapshot.judgments.extend(judgments);
6468
6469 snapshot.workpapers.extend(workpapers);
6471 snapshot.engagements.push(engagement);
6472 }
6473 }
6474
6475 if let Some(pb) = pb {
6476 pb.finish_with_message(format!(
6477 "Audit data: {} engagements, {} workpapers, {} evidence",
6478 snapshot.engagements.len(),
6479 snapshot.workpapers.len(),
6480 snapshot.evidence.len()
6481 ));
6482 }
6483
6484 Ok(snapshot)
6485 }
6486
6487 fn export_graphs(
6494 &mut self,
6495 entries: &[JournalEntry],
6496 _coa: &Arc<ChartOfAccounts>,
6497 stats: &mut EnhancedGenerationStatistics,
6498 ) -> SynthResult<GraphExportSnapshot> {
6499 let pb = self.create_progress_bar(100, "Exporting Graphs");
6500
6501 let mut snapshot = GraphExportSnapshot::default();
6502
6503 let output_dir = self
6505 .output_path
6506 .clone()
6507 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6508 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6509
6510 for graph_type in &self.config.graph_export.graph_types {
6512 if let Some(pb) = &pb {
6513 pb.inc(10);
6514 }
6515
6516 let graph_config = TransactionGraphConfig {
6518 include_vendors: false,
6519 include_customers: false,
6520 create_debit_credit_edges: true,
6521 include_document_nodes: graph_type.include_document_nodes,
6522 min_edge_weight: graph_type.min_edge_weight,
6523 aggregate_parallel_edges: graph_type.aggregate_edges,
6524 };
6525
6526 let mut builder = TransactionGraphBuilder::new(graph_config);
6527 builder.add_journal_entries(entries);
6528 let graph = builder.build();
6529
6530 stats.graph_node_count += graph.node_count();
6532 stats.graph_edge_count += graph.edge_count();
6533
6534 if let Some(pb) = &pb {
6535 pb.inc(40);
6536 }
6537
6538 for format in &self.config.graph_export.formats {
6540 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6541
6542 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6544 warn!("Failed to create graph output directory: {}", e);
6545 continue;
6546 }
6547
6548 match format {
6549 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6550 let pyg_config = PyGExportConfig {
6551 common: datasynth_graph::CommonExportConfig {
6552 export_node_features: true,
6553 export_edge_features: true,
6554 export_node_labels: true,
6555 export_edge_labels: true,
6556 export_masks: true,
6557 train_ratio: self.config.graph_export.train_ratio,
6558 val_ratio: self.config.graph_export.validation_ratio,
6559 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6560 },
6561 one_hot_categoricals: false,
6562 };
6563
6564 let exporter = PyGExporter::new(pyg_config);
6565 match exporter.export(&graph, &format_dir) {
6566 Ok(metadata) => {
6567 snapshot.exports.insert(
6568 format!("{}_{}", graph_type.name, "pytorch_geometric"),
6569 GraphExportInfo {
6570 name: graph_type.name.clone(),
6571 format: "pytorch_geometric".to_string(),
6572 output_path: format_dir.clone(),
6573 node_count: metadata.num_nodes,
6574 edge_count: metadata.num_edges,
6575 },
6576 );
6577 snapshot.graph_count += 1;
6578 }
6579 Err(e) => {
6580 warn!("Failed to export PyTorch Geometric graph: {}", e);
6581 }
6582 }
6583 }
6584 datasynth_config::schema::GraphExportFormat::Neo4j => {
6585 warn!("Neo4j graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6587 }
6588 datasynth_config::schema::GraphExportFormat::Dgl => {
6589 warn!("DGL graph export is not yet implemented; skipping. Use 'pytorch_geometric' or 'rust_graph' format instead.");
6591 }
6592 datasynth_config::schema::GraphExportFormat::RustGraph => {
6593 use datasynth_graph::{
6594 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6595 };
6596
6597 let rustgraph_config = RustGraphExportConfig {
6598 include_features: true,
6599 include_temporal: true,
6600 include_labels: true,
6601 source_name: "datasynth".to_string(),
6602 batch_id: None,
6603 output_format: RustGraphOutputFormat::JsonLines,
6604 export_node_properties: true,
6605 export_edge_properties: true,
6606 pretty_print: false,
6607 };
6608
6609 let exporter = RustGraphExporter::new(rustgraph_config);
6610 match exporter.export(&graph, &format_dir) {
6611 Ok(metadata) => {
6612 snapshot.exports.insert(
6613 format!("{}_{}", graph_type.name, "rustgraph"),
6614 GraphExportInfo {
6615 name: graph_type.name.clone(),
6616 format: "rustgraph".to_string(),
6617 output_path: format_dir.clone(),
6618 node_count: metadata.num_nodes,
6619 edge_count: metadata.num_edges,
6620 },
6621 );
6622 snapshot.graph_count += 1;
6623 }
6624 Err(e) => {
6625 warn!("Failed to export RustGraph: {}", e);
6626 }
6627 }
6628 }
6629 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6630 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6632 }
6633 }
6634 }
6635
6636 if let Some(pb) = &pb {
6637 pb.inc(40);
6638 }
6639 }
6640
6641 stats.graph_export_count = snapshot.graph_count;
6642 snapshot.exported = snapshot.graph_count > 0;
6643
6644 if let Some(pb) = pb {
6645 pb.finish_with_message(format!(
6646 "Graphs exported: {} graphs ({} nodes, {} edges)",
6647 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6648 ));
6649 }
6650
6651 Ok(snapshot)
6652 }
6653
6654 fn build_additional_graphs(
6659 &self,
6660 banking: &BankingSnapshot,
6661 _intercompany: &IntercompanySnapshot,
6662 entries: &[JournalEntry],
6663 stats: &mut EnhancedGenerationStatistics,
6664 ) {
6665 let output_dir = self
6666 .output_path
6667 .clone()
6668 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6669 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6670
6671 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6673 info!("Phase 10c: Building banking network graph");
6674 let config = BankingGraphConfig::default();
6675 let mut builder = BankingGraphBuilder::new(config);
6676 builder.add_customers(&banking.customers);
6677 builder.add_accounts(&banking.accounts, &banking.customers);
6678 builder.add_transactions(&banking.transactions);
6679 let graph = builder.build();
6680
6681 let node_count = graph.node_count();
6682 let edge_count = graph.edge_count();
6683 stats.graph_node_count += node_count;
6684 stats.graph_edge_count += edge_count;
6685
6686 for format in &self.config.graph_export.formats {
6688 if matches!(
6689 format,
6690 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6691 ) {
6692 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6693 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6694 warn!("Failed to create banking graph output dir: {}", e);
6695 continue;
6696 }
6697 let pyg_config = PyGExportConfig::default();
6698 let exporter = PyGExporter::new(pyg_config);
6699 if let Err(e) = exporter.export(&graph, &format_dir) {
6700 warn!("Failed to export banking graph as PyG: {}", e);
6701 } else {
6702 info!(
6703 "Banking network graph exported: {} nodes, {} edges",
6704 node_count, edge_count
6705 );
6706 }
6707 }
6708 }
6709 }
6710
6711 let approval_entries: Vec<_> = entries
6713 .iter()
6714 .filter(|je| je.header.approval_workflow.is_some())
6715 .collect();
6716
6717 if !approval_entries.is_empty() {
6718 info!(
6719 "Phase 10c: Building approval network graph ({} entries with approvals)",
6720 approval_entries.len()
6721 );
6722 let config = ApprovalGraphConfig::default();
6723 let mut builder = ApprovalGraphBuilder::new(config);
6724
6725 for je in &approval_entries {
6726 if let Some(ref wf) = je.header.approval_workflow {
6727 for action in &wf.actions {
6728 let record = datasynth_core::models::ApprovalRecord {
6729 approval_id: format!(
6730 "APR-{}-{}",
6731 je.header.document_id, action.approval_level
6732 ),
6733 document_number: je.header.document_id.to_string(),
6734 document_type: "JE".to_string(),
6735 company_code: je.company_code().to_string(),
6736 requester_id: wf.preparer_id.clone(),
6737 requester_name: Some(wf.preparer_name.clone()),
6738 approver_id: action.actor_id.clone(),
6739 approver_name: action.actor_name.clone(),
6740 approval_date: je.posting_date(),
6741 action: format!("{:?}", action.action),
6742 amount: wf.amount,
6743 approval_limit: None,
6744 comments: action.comments.clone(),
6745 delegation_from: None,
6746 is_auto_approved: false,
6747 };
6748 builder.add_approval(&record);
6749 }
6750 }
6751 }
6752
6753 let graph = builder.build();
6754 let node_count = graph.node_count();
6755 let edge_count = graph.edge_count();
6756 stats.graph_node_count += node_count;
6757 stats.graph_edge_count += edge_count;
6758
6759 for format in &self.config.graph_export.formats {
6761 if matches!(
6762 format,
6763 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6764 ) {
6765 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6766 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6767 warn!("Failed to create approval graph output dir: {}", e);
6768 continue;
6769 }
6770 let pyg_config = PyGExportConfig::default();
6771 let exporter = PyGExporter::new(pyg_config);
6772 if let Err(e) = exporter.export(&graph, &format_dir) {
6773 warn!("Failed to export approval graph as PyG: {}", e);
6774 } else {
6775 info!(
6776 "Approval network graph exported: {} nodes, {} edges",
6777 node_count, edge_count
6778 );
6779 }
6780 }
6781 }
6782 }
6783
6784 debug!(
6787 "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6788 available when intercompany relationships are modeled as IntercompanyRelationship)"
6789 );
6790 }
6791
6792 #[allow(clippy::too_many_arguments)]
6799 fn export_hypergraph(
6800 &self,
6801 coa: &Arc<ChartOfAccounts>,
6802 entries: &[JournalEntry],
6803 document_flows: &DocumentFlowSnapshot,
6804 sourcing: &SourcingSnapshot,
6805 hr: &HrSnapshot,
6806 manufacturing: &ManufacturingSnapshot,
6807 banking: &BankingSnapshot,
6808 audit: &AuditSnapshot,
6809 financial_reporting: &FinancialReportingSnapshot,
6810 ocpm: &OcpmSnapshot,
6811 stats: &mut EnhancedGenerationStatistics,
6812 ) -> SynthResult<HypergraphExportInfo> {
6813 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6814 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6815 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6816 use datasynth_graph::models::hypergraph::AggregationStrategy;
6817
6818 let hg_settings = &self.config.graph_export.hypergraph;
6819
6820 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6822 "truncate" => AggregationStrategy::Truncate,
6823 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6824 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6825 "importance_sample" => AggregationStrategy::ImportanceSample,
6826 _ => AggregationStrategy::PoolByCounterparty,
6827 };
6828
6829 let builder_config = HypergraphConfig {
6830 max_nodes: hg_settings.max_nodes,
6831 aggregation_strategy,
6832 include_coso: hg_settings.governance_layer.include_coso,
6833 include_controls: hg_settings.governance_layer.include_controls,
6834 include_sox: hg_settings.governance_layer.include_sox,
6835 include_vendors: hg_settings.governance_layer.include_vendors,
6836 include_customers: hg_settings.governance_layer.include_customers,
6837 include_employees: hg_settings.governance_layer.include_employees,
6838 include_p2p: hg_settings.process_layer.include_p2p,
6839 include_o2c: hg_settings.process_layer.include_o2c,
6840 include_s2c: hg_settings.process_layer.include_s2c,
6841 include_h2r: hg_settings.process_layer.include_h2r,
6842 include_mfg: hg_settings.process_layer.include_mfg,
6843 include_bank: hg_settings.process_layer.include_bank,
6844 include_audit: hg_settings.process_layer.include_audit,
6845 include_r2r: hg_settings.process_layer.include_r2r,
6846 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6847 docs_per_counterparty_threshold: hg_settings
6848 .process_layer
6849 .docs_per_counterparty_threshold,
6850 include_accounts: hg_settings.accounting_layer.include_accounts,
6851 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6852 include_cross_layer_edges: hg_settings.cross_layer.enabled,
6853 };
6854
6855 let mut builder = HypergraphBuilder::new(builder_config);
6856
6857 builder.add_coso_framework();
6859
6860 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6863 let controls = InternalControl::standard_controls();
6864 builder.add_controls(&controls);
6865 }
6866
6867 builder.add_vendors(&self.master_data.vendors);
6869 builder.add_customers(&self.master_data.customers);
6870 builder.add_employees(&self.master_data.employees);
6871
6872 builder.add_p2p_documents(
6874 &document_flows.purchase_orders,
6875 &document_flows.goods_receipts,
6876 &document_flows.vendor_invoices,
6877 &document_flows.payments,
6878 );
6879 builder.add_o2c_documents(
6880 &document_flows.sales_orders,
6881 &document_flows.deliveries,
6882 &document_flows.customer_invoices,
6883 );
6884 builder.add_s2c_documents(
6885 &sourcing.sourcing_projects,
6886 &sourcing.qualifications,
6887 &sourcing.rfx_events,
6888 &sourcing.bids,
6889 &sourcing.bid_evaluations,
6890 &sourcing.contracts,
6891 );
6892 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
6893 builder.add_mfg_documents(
6894 &manufacturing.production_orders,
6895 &manufacturing.quality_inspections,
6896 &manufacturing.cycle_counts,
6897 );
6898 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
6899 builder.add_audit_documents(
6900 &audit.engagements,
6901 &audit.workpapers,
6902 &audit.findings,
6903 &audit.evidence,
6904 &audit.risk_assessments,
6905 &audit.judgments,
6906 );
6907 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
6908
6909 if let Some(ref event_log) = ocpm.event_log {
6911 builder.add_ocpm_events(event_log);
6912 }
6913
6914 builder.add_accounts(coa);
6916 builder.add_journal_entries_as_hyperedges(entries);
6917
6918 let hypergraph = builder.build();
6920
6921 let output_dir = self
6923 .output_path
6924 .clone()
6925 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6926 let hg_dir = output_dir
6927 .join(&self.config.graph_export.output_subdirectory)
6928 .join(&hg_settings.output_subdirectory);
6929
6930 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
6932 "unified" => {
6933 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6934 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6935 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
6936 })?;
6937 (
6938 metadata.num_nodes,
6939 metadata.num_edges,
6940 metadata.num_hyperedges,
6941 )
6942 }
6943 _ => {
6944 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
6946 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
6947 SynthError::generation(format!("Hypergraph export failed: {}", e))
6948 })?;
6949 (
6950 metadata.num_nodes,
6951 metadata.num_edges,
6952 metadata.num_hyperedges,
6953 )
6954 }
6955 };
6956
6957 #[cfg(feature = "streaming")]
6959 if let Some(ref target_url) = hg_settings.stream_target {
6960 use crate::stream_client::{StreamClient, StreamConfig};
6961 use std::io::Write as _;
6962
6963 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
6964 let stream_config = StreamConfig {
6965 target_url: target_url.clone(),
6966 batch_size: hg_settings.stream_batch_size,
6967 api_key,
6968 ..StreamConfig::default()
6969 };
6970
6971 match StreamClient::new(stream_config) {
6972 Ok(mut client) => {
6973 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
6974 match exporter.export_to_writer(&hypergraph, &mut client) {
6975 Ok(_) => {
6976 if let Err(e) = client.flush() {
6977 warn!("Failed to flush stream client: {}", e);
6978 } else {
6979 info!("Streamed {} records to {}", client.total_sent(), target_url);
6980 }
6981 }
6982 Err(e) => {
6983 warn!("Streaming export failed: {}", e);
6984 }
6985 }
6986 }
6987 Err(e) => {
6988 warn!("Failed to create stream client: {}", e);
6989 }
6990 }
6991 }
6992
6993 stats.graph_node_count += num_nodes;
6995 stats.graph_edge_count += num_edges;
6996 stats.graph_export_count += 1;
6997
6998 Ok(HypergraphExportInfo {
6999 node_count: num_nodes,
7000 edge_count: num_edges,
7001 hyperedge_count: num_hyperedges,
7002 output_path: hg_dir,
7003 })
7004 }
7005
7006 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7011 let pb = self.create_progress_bar(100, "Generating Banking Data");
7012
7013 let orchestrator = BankingOrchestratorBuilder::new()
7015 .config(self.config.banking.clone())
7016 .seed(self.seed + 9000)
7017 .country_pack(self.primary_pack().clone())
7018 .build();
7019
7020 if let Some(pb) = &pb {
7021 pb.inc(10);
7022 }
7023
7024 let result = orchestrator.generate();
7026
7027 if let Some(pb) = &pb {
7028 pb.inc(90);
7029 pb.finish_with_message(format!(
7030 "Banking: {} customers, {} transactions",
7031 result.customers.len(),
7032 result.transactions.len()
7033 ));
7034 }
7035
7036 let mut banking_customers = result.customers;
7041 let core_customers = &self.master_data.customers;
7042 if !core_customers.is_empty() {
7043 for (i, bc) in banking_customers.iter_mut().enumerate() {
7044 let core = &core_customers[i % core_customers.len()];
7045 bc.name = CustomerName::business(&core.name);
7046 bc.residence_country = core.country.clone();
7047 bc.enterprise_customer_id = Some(core.customer_id.clone());
7048 }
7049 debug!(
7050 "Cross-referenced {} banking customers with {} core customers",
7051 banking_customers.len(),
7052 core_customers.len()
7053 );
7054 }
7055
7056 Ok(BankingSnapshot {
7057 customers: banking_customers,
7058 accounts: result.accounts,
7059 transactions: result.transactions,
7060 transaction_labels: result.transaction_labels,
7061 customer_labels: result.customer_labels,
7062 account_labels: result.account_labels,
7063 relationship_labels: result.relationship_labels,
7064 narratives: result.narratives,
7065 suspicious_count: result.stats.suspicious_count,
7066 scenario_count: result.scenarios.len(),
7067 })
7068 }
7069
7070 fn calculate_total_transactions(&self) -> u64 {
7072 let months = self.config.global.period_months as f64;
7073 self.config
7074 .companies
7075 .iter()
7076 .map(|c| {
7077 let annual = c.annual_transaction_volume.count() as f64;
7078 let weighted = annual * c.volume_weight;
7079 (weighted * months / 12.0) as u64
7080 })
7081 .sum()
7082 }
7083
7084 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7086 if !self.phase_config.show_progress {
7087 return None;
7088 }
7089
7090 let pb = if let Some(mp) = &self.multi_progress {
7091 mp.add(ProgressBar::new(total))
7092 } else {
7093 ProgressBar::new(total)
7094 };
7095
7096 pb.set_style(
7097 ProgressStyle::default_bar()
7098 .template(&format!(
7099 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7100 message
7101 ))
7102 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7103 .progress_chars("#>-"),
7104 );
7105
7106 Some(pb)
7107 }
7108
7109 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7111 self.coa.clone()
7112 }
7113
7114 pub fn get_master_data(&self) -> &MasterDataSnapshot {
7116 &self.master_data
7117 }
7118
7119 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7121 use super::lineage::LineageGraphBuilder;
7122
7123 let mut builder = LineageGraphBuilder::new();
7124
7125 builder.add_config_section("config:global", "Global Config");
7127 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7128 builder.add_config_section("config:transactions", "Transaction Config");
7129
7130 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7132 builder.add_generator_phase("phase:je", "Journal Entry Generation");
7133
7134 builder.configured_by("phase:coa", "config:chart_of_accounts");
7136 builder.configured_by("phase:je", "config:transactions");
7137
7138 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7140 builder.produced_by("output:je", "phase:je");
7141
7142 if self.phase_config.generate_master_data {
7144 builder.add_config_section("config:master_data", "Master Data Config");
7145 builder.add_generator_phase("phase:master_data", "Master Data Generation");
7146 builder.configured_by("phase:master_data", "config:master_data");
7147 builder.input_to("phase:master_data", "phase:je");
7148 }
7149
7150 if self.phase_config.generate_document_flows {
7151 builder.add_config_section("config:document_flows", "Document Flow Config");
7152 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7153 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7154 builder.configured_by("phase:p2p", "config:document_flows");
7155 builder.configured_by("phase:o2c", "config:document_flows");
7156
7157 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7158 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7159 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7160 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7161 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7162
7163 builder.produced_by("output:po", "phase:p2p");
7164 builder.produced_by("output:gr", "phase:p2p");
7165 builder.produced_by("output:vi", "phase:p2p");
7166 builder.produced_by("output:so", "phase:o2c");
7167 builder.produced_by("output:ci", "phase:o2c");
7168 }
7169
7170 if self.phase_config.inject_anomalies {
7171 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7172 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7173 builder.configured_by("phase:anomaly", "config:fraud");
7174 builder.add_output_file(
7175 "output:labels",
7176 "Anomaly Labels",
7177 "labels/anomaly_labels.csv",
7178 );
7179 builder.produced_by("output:labels", "phase:anomaly");
7180 }
7181
7182 if self.phase_config.generate_audit {
7183 builder.add_config_section("config:audit", "Audit Config");
7184 builder.add_generator_phase("phase:audit", "Audit Data Generation");
7185 builder.configured_by("phase:audit", "config:audit");
7186 }
7187
7188 if self.phase_config.generate_banking {
7189 builder.add_config_section("config:banking", "Banking Config");
7190 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7191 builder.configured_by("phase:banking", "config:banking");
7192 }
7193
7194 if self.config.llm.enabled {
7195 builder.add_config_section("config:llm", "LLM Enrichment Config");
7196 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7197 builder.configured_by("phase:llm_enrichment", "config:llm");
7198 }
7199
7200 if self.config.diffusion.enabled {
7201 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7202 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7203 builder.configured_by("phase:diffusion", "config:diffusion");
7204 }
7205
7206 if self.config.causal.enabled {
7207 builder.add_config_section("config:causal", "Causal Generation Config");
7208 builder.add_generator_phase("phase:causal", "Causal Overlay");
7209 builder.configured_by("phase:causal", "config:causal");
7210 }
7211
7212 builder.build()
7213 }
7214}
7215
7216fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7218 match format {
7219 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7220 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7221 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7222 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7223 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7224 }
7225}
7226
7227#[cfg(test)]
7228#[allow(clippy::unwrap_used)]
7229mod tests {
7230 use super::*;
7231 use datasynth_config::schema::*;
7232
7233 fn create_test_config() -> GeneratorConfig {
7234 GeneratorConfig {
7235 global: GlobalConfig {
7236 industry: IndustrySector::Manufacturing,
7237 start_date: "2024-01-01".to_string(),
7238 period_months: 1,
7239 seed: Some(42),
7240 parallel: false,
7241 group_currency: "USD".to_string(),
7242 worker_threads: 0,
7243 memory_limit_mb: 0,
7244 },
7245 companies: vec![CompanyConfig {
7246 code: "1000".to_string(),
7247 name: "Test Company".to_string(),
7248 currency: "USD".to_string(),
7249 country: "US".to_string(),
7250 annual_transaction_volume: TransactionVolume::TenK,
7251 volume_weight: 1.0,
7252 fiscal_year_variant: "K4".to_string(),
7253 }],
7254 chart_of_accounts: ChartOfAccountsConfig {
7255 complexity: CoAComplexity::Small,
7256 industry_specific: true,
7257 custom_accounts: None,
7258 min_hierarchy_depth: 2,
7259 max_hierarchy_depth: 4,
7260 },
7261 transactions: TransactionConfig::default(),
7262 output: OutputConfig::default(),
7263 fraud: FraudConfig::default(),
7264 internal_controls: InternalControlsConfig::default(),
7265 business_processes: BusinessProcessConfig::default(),
7266 user_personas: UserPersonaConfig::default(),
7267 templates: TemplateConfig::default(),
7268 approval: ApprovalConfig::default(),
7269 departments: DepartmentConfig::default(),
7270 master_data: MasterDataConfig::default(),
7271 document_flows: DocumentFlowConfig::default(),
7272 intercompany: IntercompanyConfig::default(),
7273 balance: BalanceConfig::default(),
7274 ocpm: OcpmConfig::default(),
7275 audit: AuditGenerationConfig::default(),
7276 banking: datasynth_banking::BankingConfig::default(),
7277 data_quality: DataQualitySchemaConfig::default(),
7278 scenario: ScenarioConfig::default(),
7279 temporal: TemporalDriftConfig::default(),
7280 graph_export: GraphExportConfig::default(),
7281 streaming: StreamingSchemaConfig::default(),
7282 rate_limit: RateLimitSchemaConfig::default(),
7283 temporal_attributes: TemporalAttributeSchemaConfig::default(),
7284 relationships: RelationshipSchemaConfig::default(),
7285 accounting_standards: AccountingStandardsConfig::default(),
7286 audit_standards: AuditStandardsConfig::default(),
7287 distributions: Default::default(),
7288 temporal_patterns: Default::default(),
7289 vendor_network: VendorNetworkSchemaConfig::default(),
7290 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7291 relationship_strength: RelationshipStrengthSchemaConfig::default(),
7292 cross_process_links: CrossProcessLinksSchemaConfig::default(),
7293 organizational_events: OrganizationalEventsSchemaConfig::default(),
7294 behavioral_drift: BehavioralDriftSchemaConfig::default(),
7295 market_drift: MarketDriftSchemaConfig::default(),
7296 drift_labeling: DriftLabelingSchemaConfig::default(),
7297 anomaly_injection: Default::default(),
7298 industry_specific: Default::default(),
7299 fingerprint_privacy: Default::default(),
7300 quality_gates: Default::default(),
7301 compliance: Default::default(),
7302 webhooks: Default::default(),
7303 llm: Default::default(),
7304 diffusion: Default::default(),
7305 causal: Default::default(),
7306 source_to_pay: Default::default(),
7307 financial_reporting: Default::default(),
7308 hr: Default::default(),
7309 manufacturing: Default::default(),
7310 sales_quotes: Default::default(),
7311 tax: Default::default(),
7312 treasury: Default::default(),
7313 project_accounting: Default::default(),
7314 esg: Default::default(),
7315 country_packs: None,
7316 }
7317 }
7318
7319 #[test]
7320 fn test_enhanced_orchestrator_creation() {
7321 let config = create_test_config();
7322 let orchestrator = EnhancedOrchestrator::with_defaults(config);
7323 assert!(orchestrator.is_ok());
7324 }
7325
7326 #[test]
7327 fn test_minimal_generation() {
7328 let config = create_test_config();
7329 let phase_config = PhaseConfig {
7330 generate_master_data: false,
7331 generate_document_flows: false,
7332 generate_journal_entries: true,
7333 inject_anomalies: false,
7334 show_progress: false,
7335 ..Default::default()
7336 };
7337
7338 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7339 let result = orchestrator.generate();
7340
7341 assert!(result.is_ok());
7342 let result = result.unwrap();
7343 assert!(!result.journal_entries.is_empty());
7344 }
7345
7346 #[test]
7347 fn test_master_data_generation() {
7348 let config = create_test_config();
7349 let phase_config = PhaseConfig {
7350 generate_master_data: true,
7351 generate_document_flows: false,
7352 generate_journal_entries: false,
7353 inject_anomalies: false,
7354 show_progress: false,
7355 vendors_per_company: 5,
7356 customers_per_company: 5,
7357 materials_per_company: 10,
7358 assets_per_company: 5,
7359 employees_per_company: 10,
7360 ..Default::default()
7361 };
7362
7363 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7364 let result = orchestrator.generate().unwrap();
7365
7366 assert!(!result.master_data.vendors.is_empty());
7367 assert!(!result.master_data.customers.is_empty());
7368 assert!(!result.master_data.materials.is_empty());
7369 }
7370
7371 #[test]
7372 fn test_document_flow_generation() {
7373 let config = create_test_config();
7374 let phase_config = PhaseConfig {
7375 generate_master_data: true,
7376 generate_document_flows: true,
7377 generate_journal_entries: false,
7378 inject_anomalies: false,
7379 inject_data_quality: false,
7380 validate_balances: false,
7381 generate_ocpm_events: false,
7382 show_progress: false,
7383 vendors_per_company: 5,
7384 customers_per_company: 5,
7385 materials_per_company: 10,
7386 assets_per_company: 5,
7387 employees_per_company: 10,
7388 p2p_chains: 5,
7389 o2c_chains: 5,
7390 ..Default::default()
7391 };
7392
7393 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7394 let result = orchestrator.generate().unwrap();
7395
7396 assert!(!result.document_flows.p2p_chains.is_empty());
7398 assert!(!result.document_flows.o2c_chains.is_empty());
7399
7400 assert!(!result.document_flows.purchase_orders.is_empty());
7402 assert!(!result.document_flows.sales_orders.is_empty());
7403 }
7404
7405 #[test]
7406 fn test_anomaly_injection() {
7407 let config = create_test_config();
7408 let phase_config = PhaseConfig {
7409 generate_master_data: false,
7410 generate_document_flows: false,
7411 generate_journal_entries: true,
7412 inject_anomalies: true,
7413 show_progress: false,
7414 ..Default::default()
7415 };
7416
7417 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7418 let result = orchestrator.generate().unwrap();
7419
7420 assert!(!result.journal_entries.is_empty());
7422
7423 assert!(result.anomaly_labels.summary.is_some());
7426 }
7427
7428 #[test]
7429 fn test_full_generation_pipeline() {
7430 let config = create_test_config();
7431 let phase_config = PhaseConfig {
7432 generate_master_data: true,
7433 generate_document_flows: true,
7434 generate_journal_entries: true,
7435 inject_anomalies: false,
7436 inject_data_quality: false,
7437 validate_balances: true,
7438 generate_ocpm_events: false,
7439 show_progress: false,
7440 vendors_per_company: 3,
7441 customers_per_company: 3,
7442 materials_per_company: 5,
7443 assets_per_company: 3,
7444 employees_per_company: 5,
7445 p2p_chains: 3,
7446 o2c_chains: 3,
7447 ..Default::default()
7448 };
7449
7450 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7451 let result = orchestrator.generate().unwrap();
7452
7453 assert!(!result.master_data.vendors.is_empty());
7455 assert!(!result.master_data.customers.is_empty());
7456 assert!(!result.document_flows.p2p_chains.is_empty());
7457 assert!(!result.document_flows.o2c_chains.is_empty());
7458 assert!(!result.journal_entries.is_empty());
7459 assert!(result.statistics.accounts_count > 0);
7460
7461 assert!(!result.subledger.ap_invoices.is_empty());
7463 assert!(!result.subledger.ar_invoices.is_empty());
7464
7465 assert!(result.balance_validation.validated);
7467 assert!(result.balance_validation.entries_processed > 0);
7468 }
7469
7470 #[test]
7471 fn test_subledger_linking() {
7472 let config = create_test_config();
7473 let phase_config = PhaseConfig {
7474 generate_master_data: true,
7475 generate_document_flows: true,
7476 generate_journal_entries: false,
7477 inject_anomalies: false,
7478 inject_data_quality: false,
7479 validate_balances: false,
7480 generate_ocpm_events: false,
7481 show_progress: false,
7482 vendors_per_company: 5,
7483 customers_per_company: 5,
7484 materials_per_company: 10,
7485 assets_per_company: 3,
7486 employees_per_company: 5,
7487 p2p_chains: 5,
7488 o2c_chains: 5,
7489 ..Default::default()
7490 };
7491
7492 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7493 let result = orchestrator.generate().unwrap();
7494
7495 assert!(!result.document_flows.vendor_invoices.is_empty());
7497 assert!(!result.document_flows.customer_invoices.is_empty());
7498
7499 assert!(!result.subledger.ap_invoices.is_empty());
7501 assert!(!result.subledger.ar_invoices.is_empty());
7502
7503 assert_eq!(
7505 result.subledger.ap_invoices.len(),
7506 result.document_flows.vendor_invoices.len()
7507 );
7508
7509 assert_eq!(
7511 result.subledger.ar_invoices.len(),
7512 result.document_flows.customer_invoices.len()
7513 );
7514
7515 assert_eq!(
7517 result.statistics.ap_invoice_count,
7518 result.subledger.ap_invoices.len()
7519 );
7520 assert_eq!(
7521 result.statistics.ar_invoice_count,
7522 result.subledger.ar_invoices.len()
7523 );
7524 }
7525
7526 #[test]
7527 fn test_balance_validation() {
7528 let config = create_test_config();
7529 let phase_config = PhaseConfig {
7530 generate_master_data: false,
7531 generate_document_flows: false,
7532 generate_journal_entries: true,
7533 inject_anomalies: false,
7534 validate_balances: true,
7535 show_progress: false,
7536 ..Default::default()
7537 };
7538
7539 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7540 let result = orchestrator.generate().unwrap();
7541
7542 assert!(result.balance_validation.validated);
7544 assert!(result.balance_validation.entries_processed > 0);
7545
7546 assert!(!result.balance_validation.has_unbalanced_entries);
7548
7549 assert_eq!(
7551 result.balance_validation.total_debits,
7552 result.balance_validation.total_credits
7553 );
7554 }
7555
7556 #[test]
7557 fn test_statistics_accuracy() {
7558 let config = create_test_config();
7559 let phase_config = PhaseConfig {
7560 generate_master_data: true,
7561 generate_document_flows: false,
7562 generate_journal_entries: true,
7563 inject_anomalies: false,
7564 show_progress: false,
7565 vendors_per_company: 10,
7566 customers_per_company: 20,
7567 materials_per_company: 15,
7568 assets_per_company: 5,
7569 employees_per_company: 8,
7570 ..Default::default()
7571 };
7572
7573 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7574 let result = orchestrator.generate().unwrap();
7575
7576 assert_eq!(
7578 result.statistics.vendor_count,
7579 result.master_data.vendors.len()
7580 );
7581 assert_eq!(
7582 result.statistics.customer_count,
7583 result.master_data.customers.len()
7584 );
7585 assert_eq!(
7586 result.statistics.material_count,
7587 result.master_data.materials.len()
7588 );
7589 assert_eq!(
7590 result.statistics.total_entries as usize,
7591 result.journal_entries.len()
7592 );
7593 }
7594
7595 #[test]
7596 fn test_phase_config_defaults() {
7597 let config = PhaseConfig::default();
7598 assert!(config.generate_master_data);
7599 assert!(config.generate_document_flows);
7600 assert!(config.generate_journal_entries);
7601 assert!(!config.inject_anomalies);
7602 assert!(config.validate_balances);
7603 assert!(config.show_progress);
7604 assert!(config.vendors_per_company > 0);
7605 assert!(config.customers_per_company > 0);
7606 }
7607
7608 #[test]
7609 fn test_get_coa_before_generation() {
7610 let config = create_test_config();
7611 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7612
7613 assert!(orchestrator.get_coa().is_none());
7615 }
7616
7617 #[test]
7618 fn test_get_coa_after_generation() {
7619 let config = create_test_config();
7620 let phase_config = PhaseConfig {
7621 generate_master_data: false,
7622 generate_document_flows: false,
7623 generate_journal_entries: true,
7624 inject_anomalies: false,
7625 show_progress: false,
7626 ..Default::default()
7627 };
7628
7629 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7630 let _ = orchestrator.generate().unwrap();
7631
7632 assert!(orchestrator.get_coa().is_some());
7634 }
7635
7636 #[test]
7637 fn test_get_master_data() {
7638 let config = create_test_config();
7639 let phase_config = PhaseConfig {
7640 generate_master_data: true,
7641 generate_document_flows: false,
7642 generate_journal_entries: false,
7643 inject_anomalies: false,
7644 show_progress: false,
7645 vendors_per_company: 5,
7646 customers_per_company: 5,
7647 materials_per_company: 5,
7648 assets_per_company: 5,
7649 employees_per_company: 5,
7650 ..Default::default()
7651 };
7652
7653 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7654 let _ = orchestrator.generate().unwrap();
7655
7656 let master_data = orchestrator.get_master_data();
7657 assert!(!master_data.vendors.is_empty());
7658 }
7659
7660 #[test]
7661 fn test_with_progress_builder() {
7662 let config = create_test_config();
7663 let orchestrator = EnhancedOrchestrator::with_defaults(config)
7664 .unwrap()
7665 .with_progress(false);
7666
7667 assert!(!orchestrator.phase_config.show_progress);
7669 }
7670
7671 #[test]
7672 fn test_multi_company_generation() {
7673 let mut config = create_test_config();
7674 config.companies.push(CompanyConfig {
7675 code: "2000".to_string(),
7676 name: "Subsidiary".to_string(),
7677 currency: "EUR".to_string(),
7678 country: "DE".to_string(),
7679 annual_transaction_volume: TransactionVolume::TenK,
7680 volume_weight: 0.5,
7681 fiscal_year_variant: "K4".to_string(),
7682 });
7683
7684 let phase_config = PhaseConfig {
7685 generate_master_data: true,
7686 generate_document_flows: false,
7687 generate_journal_entries: true,
7688 inject_anomalies: false,
7689 show_progress: false,
7690 vendors_per_company: 5,
7691 customers_per_company: 5,
7692 materials_per_company: 5,
7693 assets_per_company: 5,
7694 employees_per_company: 5,
7695 ..Default::default()
7696 };
7697
7698 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7699 let result = orchestrator.generate().unwrap();
7700
7701 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
7704 assert!(result.statistics.companies_count == 2);
7705 }
7706
7707 #[test]
7708 fn test_empty_master_data_skips_document_flows() {
7709 let config = create_test_config();
7710 let phase_config = PhaseConfig {
7711 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
7714 inject_anomalies: false,
7715 show_progress: false,
7716 ..Default::default()
7717 };
7718
7719 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7720 let result = orchestrator.generate().unwrap();
7721
7722 assert!(result.document_flows.p2p_chains.is_empty());
7724 assert!(result.document_flows.o2c_chains.is_empty());
7725 }
7726
7727 #[test]
7728 fn test_journal_entry_line_item_count() {
7729 let config = create_test_config();
7730 let phase_config = PhaseConfig {
7731 generate_master_data: false,
7732 generate_document_flows: false,
7733 generate_journal_entries: true,
7734 inject_anomalies: false,
7735 show_progress: false,
7736 ..Default::default()
7737 };
7738
7739 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7740 let result = orchestrator.generate().unwrap();
7741
7742 let calculated_line_items: u64 = result
7744 .journal_entries
7745 .iter()
7746 .map(|e| e.line_count() as u64)
7747 .sum();
7748 assert_eq!(result.statistics.total_line_items, calculated_line_items);
7749 }
7750
7751 #[test]
7752 fn test_audit_generation() {
7753 let config = create_test_config();
7754 let phase_config = PhaseConfig {
7755 generate_master_data: false,
7756 generate_document_flows: false,
7757 generate_journal_entries: true,
7758 inject_anomalies: false,
7759 show_progress: false,
7760 generate_audit: true,
7761 audit_engagements: 2,
7762 workpapers_per_engagement: 5,
7763 evidence_per_workpaper: 2,
7764 risks_per_engagement: 3,
7765 findings_per_engagement: 2,
7766 judgments_per_engagement: 2,
7767 ..Default::default()
7768 };
7769
7770 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7771 let result = orchestrator.generate().unwrap();
7772
7773 assert_eq!(result.audit.engagements.len(), 2);
7775 assert!(!result.audit.workpapers.is_empty());
7776 assert!(!result.audit.evidence.is_empty());
7777 assert!(!result.audit.risk_assessments.is_empty());
7778 assert!(!result.audit.findings.is_empty());
7779 assert!(!result.audit.judgments.is_empty());
7780
7781 assert_eq!(
7783 result.statistics.audit_engagement_count,
7784 result.audit.engagements.len()
7785 );
7786 assert_eq!(
7787 result.statistics.audit_workpaper_count,
7788 result.audit.workpapers.len()
7789 );
7790 assert_eq!(
7791 result.statistics.audit_evidence_count,
7792 result.audit.evidence.len()
7793 );
7794 assert_eq!(
7795 result.statistics.audit_risk_count,
7796 result.audit.risk_assessments.len()
7797 );
7798 assert_eq!(
7799 result.statistics.audit_finding_count,
7800 result.audit.findings.len()
7801 );
7802 assert_eq!(
7803 result.statistics.audit_judgment_count,
7804 result.audit.judgments.len()
7805 );
7806 }
7807
7808 #[test]
7809 fn test_new_phases_disabled_by_default() {
7810 let config = create_test_config();
7811 assert!(!config.llm.enabled);
7813 assert!(!config.diffusion.enabled);
7814 assert!(!config.causal.enabled);
7815
7816 let phase_config = PhaseConfig {
7817 generate_master_data: false,
7818 generate_document_flows: false,
7819 generate_journal_entries: true,
7820 inject_anomalies: false,
7821 show_progress: false,
7822 ..Default::default()
7823 };
7824
7825 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7826 let result = orchestrator.generate().unwrap();
7827
7828 assert_eq!(result.statistics.llm_enrichment_ms, 0);
7830 assert_eq!(result.statistics.llm_vendors_enriched, 0);
7831 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7832 assert_eq!(result.statistics.diffusion_samples_generated, 0);
7833 assert_eq!(result.statistics.causal_generation_ms, 0);
7834 assert_eq!(result.statistics.causal_samples_generated, 0);
7835 assert!(result.statistics.causal_validation_passed.is_none());
7836 }
7837
7838 #[test]
7839 fn test_llm_enrichment_enabled() {
7840 let mut config = create_test_config();
7841 config.llm.enabled = true;
7842 config.llm.max_vendor_enrichments = 3;
7843
7844 let phase_config = PhaseConfig {
7845 generate_master_data: true,
7846 generate_document_flows: false,
7847 generate_journal_entries: false,
7848 inject_anomalies: false,
7849 show_progress: false,
7850 vendors_per_company: 5,
7851 customers_per_company: 3,
7852 materials_per_company: 3,
7853 assets_per_company: 3,
7854 employees_per_company: 3,
7855 ..Default::default()
7856 };
7857
7858 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7859 let result = orchestrator.generate().unwrap();
7860
7861 assert!(result.statistics.llm_vendors_enriched > 0);
7863 assert!(result.statistics.llm_vendors_enriched <= 3);
7864 }
7865
7866 #[test]
7867 fn test_diffusion_enhancement_enabled() {
7868 let mut config = create_test_config();
7869 config.diffusion.enabled = true;
7870 config.diffusion.n_steps = 50;
7871 config.diffusion.sample_size = 20;
7872
7873 let phase_config = PhaseConfig {
7874 generate_master_data: false,
7875 generate_document_flows: false,
7876 generate_journal_entries: true,
7877 inject_anomalies: false,
7878 show_progress: false,
7879 ..Default::default()
7880 };
7881
7882 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7883 let result = orchestrator.generate().unwrap();
7884
7885 assert_eq!(result.statistics.diffusion_samples_generated, 20);
7887 }
7888
7889 #[test]
7890 fn test_causal_overlay_enabled() {
7891 let mut config = create_test_config();
7892 config.causal.enabled = true;
7893 config.causal.template = "fraud_detection".to_string();
7894 config.causal.sample_size = 100;
7895 config.causal.validate = true;
7896
7897 let phase_config = PhaseConfig {
7898 generate_master_data: false,
7899 generate_document_flows: false,
7900 generate_journal_entries: true,
7901 inject_anomalies: false,
7902 show_progress: false,
7903 ..Default::default()
7904 };
7905
7906 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7907 let result = orchestrator.generate().unwrap();
7908
7909 assert_eq!(result.statistics.causal_samples_generated, 100);
7911 assert!(result.statistics.causal_validation_passed.is_some());
7913 }
7914
7915 #[test]
7916 fn test_causal_overlay_revenue_cycle_template() {
7917 let mut config = create_test_config();
7918 config.causal.enabled = true;
7919 config.causal.template = "revenue_cycle".to_string();
7920 config.causal.sample_size = 50;
7921 config.causal.validate = false;
7922
7923 let phase_config = PhaseConfig {
7924 generate_master_data: false,
7925 generate_document_flows: false,
7926 generate_journal_entries: true,
7927 inject_anomalies: false,
7928 show_progress: false,
7929 ..Default::default()
7930 };
7931
7932 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7933 let result = orchestrator.generate().unwrap();
7934
7935 assert_eq!(result.statistics.causal_samples_generated, 50);
7937 assert!(result.statistics.causal_validation_passed.is_none());
7939 }
7940
7941 #[test]
7942 fn test_all_new_phases_enabled_together() {
7943 let mut config = create_test_config();
7944 config.llm.enabled = true;
7945 config.llm.max_vendor_enrichments = 2;
7946 config.diffusion.enabled = true;
7947 config.diffusion.n_steps = 20;
7948 config.diffusion.sample_size = 10;
7949 config.causal.enabled = true;
7950 config.causal.sample_size = 50;
7951 config.causal.validate = true;
7952
7953 let phase_config = PhaseConfig {
7954 generate_master_data: true,
7955 generate_document_flows: false,
7956 generate_journal_entries: true,
7957 inject_anomalies: false,
7958 show_progress: false,
7959 vendors_per_company: 5,
7960 customers_per_company: 3,
7961 materials_per_company: 3,
7962 assets_per_company: 3,
7963 employees_per_company: 3,
7964 ..Default::default()
7965 };
7966
7967 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7968 let result = orchestrator.generate().unwrap();
7969
7970 assert!(result.statistics.llm_vendors_enriched > 0);
7972 assert_eq!(result.statistics.diffusion_samples_generated, 10);
7973 assert_eq!(result.statistics.causal_samples_generated, 50);
7974 assert!(result.statistics.causal_validation_passed.is_some());
7975 }
7976
7977 #[test]
7978 fn test_statistics_serialization_with_new_fields() {
7979 let stats = EnhancedGenerationStatistics {
7980 total_entries: 100,
7981 total_line_items: 500,
7982 llm_enrichment_ms: 42,
7983 llm_vendors_enriched: 10,
7984 diffusion_enhancement_ms: 100,
7985 diffusion_samples_generated: 50,
7986 causal_generation_ms: 200,
7987 causal_samples_generated: 100,
7988 causal_validation_passed: Some(true),
7989 ..Default::default()
7990 };
7991
7992 let json = serde_json::to_string(&stats).unwrap();
7993 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
7994
7995 assert_eq!(deserialized.llm_enrichment_ms, 42);
7996 assert_eq!(deserialized.llm_vendors_enriched, 10);
7997 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
7998 assert_eq!(deserialized.diffusion_samples_generated, 50);
7999 assert_eq!(deserialized.causal_generation_ms, 200);
8000 assert_eq!(deserialized.causal_samples_generated, 100);
8001 assert_eq!(deserialized.causal_validation_passed, Some(true));
8002 }
8003
8004 #[test]
8005 fn test_statistics_backward_compat_deserialization() {
8006 let old_json = r#"{
8008 "total_entries": 100,
8009 "total_line_items": 500,
8010 "accounts_count": 50,
8011 "companies_count": 1,
8012 "period_months": 12,
8013 "vendor_count": 10,
8014 "customer_count": 20,
8015 "material_count": 15,
8016 "asset_count": 5,
8017 "employee_count": 8,
8018 "p2p_chain_count": 5,
8019 "o2c_chain_count": 5,
8020 "ap_invoice_count": 5,
8021 "ar_invoice_count": 5,
8022 "ocpm_event_count": 0,
8023 "ocpm_object_count": 0,
8024 "ocpm_case_count": 0,
8025 "audit_engagement_count": 0,
8026 "audit_workpaper_count": 0,
8027 "audit_evidence_count": 0,
8028 "audit_risk_count": 0,
8029 "audit_finding_count": 0,
8030 "audit_judgment_count": 0,
8031 "anomalies_injected": 0,
8032 "data_quality_issues": 0,
8033 "banking_customer_count": 0,
8034 "banking_account_count": 0,
8035 "banking_transaction_count": 0,
8036 "banking_suspicious_count": 0,
8037 "graph_export_count": 0,
8038 "graph_node_count": 0,
8039 "graph_edge_count": 0
8040 }"#;
8041
8042 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8043
8044 assert_eq!(stats.llm_enrichment_ms, 0);
8046 assert_eq!(stats.llm_vendors_enriched, 0);
8047 assert_eq!(stats.diffusion_enhancement_ms, 0);
8048 assert_eq!(stats.diffusion_samples_generated, 0);
8049 assert_eq!(stats.causal_generation_ms, 0);
8050 assert_eq!(stats.causal_samples_generated, 0);
8051 assert!(stats.causal_validation_passed.is_none());
8052 }
8053}