1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33 BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42 SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50 io::FingerprintReader,
51 models::Fingerprint,
52 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55 AnomalyInjector,
57 AnomalyInjectorConfig,
58 AssetGenerator,
59 AuditEngagementGenerator,
61 BalanceTrackerConfig,
62 BankReconciliationGenerator,
64 BidEvaluationGenerator,
66 BidGenerator,
67 CatalogGenerator,
68 ChartOfAccountsGenerator,
70 ContractGenerator,
71 CustomerGenerator,
72 DataQualityConfig,
73 DataQualityInjector,
75 DataQualityStats,
76 DocumentFlowJeConfig,
78 DocumentFlowJeGenerator,
79 DocumentFlowLinker,
81 EmployeeGenerator,
82 EsgAnomalyLabel,
84 EvidenceGenerator,
85 FinancialStatementGenerator,
87 FindingGenerator,
88 JournalEntryGenerator,
89 JudgmentGenerator,
90 LatePaymentDistribution,
91 MaterialGenerator,
92 O2CDocumentChain,
93 O2CGenerator,
94 O2CGeneratorConfig,
95 O2CPaymentBehavior,
96 P2PDocumentChain,
97 P2PGenerator,
99 P2PGeneratorConfig,
100 P2PPaymentBehavior,
101 PaymentReference,
102 QualificationGenerator,
103 RfxGenerator,
104 RiskAssessmentGenerator,
105 RunningBalanceTracker,
107 ScorecardGenerator,
108 SourcingProjectGenerator,
109 SpendAnalysisGenerator,
110 ValidationError,
111 VendorGenerator,
113 WorkpaperGenerator,
114};
115use datasynth_graph::{
116 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122 OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::coa_generator::CoAFramework;
133use datasynth_generators::llm_enrichment::VendorLlmEnricher;
134use rayon::prelude::*;
135
136fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
142 tracing::debug!(
143 "P2P config: using hardcoded defaults for over_delivery_rate=0.02, early_payment_discount_rate=0.30"
144 );
145 let payment_behavior = &schema_config.payment_behavior;
146 let late_dist = &payment_behavior.late_payment_days_distribution;
147
148 P2PGeneratorConfig {
149 three_way_match_rate: schema_config.three_way_match_rate,
150 partial_delivery_rate: schema_config.partial_delivery_rate,
151 over_delivery_rate: 0.02, price_variance_rate: schema_config.price_variance_rate,
154 max_price_variance_percent: schema_config.max_price_variance_percent,
155 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
156 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
157 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
158 payment_method_distribution: vec![
159 (PaymentMethod::BankTransfer, 0.60),
160 (PaymentMethod::Check, 0.25),
161 (PaymentMethod::Wire, 0.10),
162 (PaymentMethod::CreditCard, 0.05),
163 ],
164 early_payment_discount_rate: 0.30, payment_behavior: P2PPaymentBehavior {
167 late_payment_rate: payment_behavior.late_payment_rate,
168 late_payment_distribution: LatePaymentDistribution {
169 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
170 late_8_to_14: late_dist.late_8_to_14,
171 very_late_15_to_30: late_dist.very_late_15_to_30,
172 severely_late_31_to_60: late_dist.severely_late_31_to_60,
173 extremely_late_over_60: late_dist.extremely_late_over_60,
174 },
175 partial_payment_rate: payment_behavior.partial_payment_rate,
176 payment_correction_rate: payment_behavior.payment_correction_rate,
177 },
178 }
179}
180
181fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
183 tracing::debug!("O2C config: using hardcoded default for late_payment_rate=0.15");
184 let payment_behavior = &schema_config.payment_behavior;
185
186 O2CGeneratorConfig {
187 credit_check_failure_rate: schema_config.credit_check_failure_rate,
188 partial_shipment_rate: schema_config.partial_shipment_rate,
189 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
190 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
191 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
192 late_payment_rate: 0.15, bad_debt_rate: schema_config.bad_debt_rate,
195 returns_rate: schema_config.return_rate,
196 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
197 payment_method_distribution: vec![
198 (PaymentMethod::BankTransfer, 0.50),
199 (PaymentMethod::Check, 0.30),
200 (PaymentMethod::Wire, 0.15),
201 (PaymentMethod::CreditCard, 0.05),
202 ],
203 payment_behavior: O2CPaymentBehavior {
204 partial_payment_rate: payment_behavior.partial_payments.rate,
205 short_payment_rate: payment_behavior.short_payments.rate,
206 max_short_percent: payment_behavior.short_payments.max_short_percent,
207 on_account_rate: payment_behavior.on_account_payments.rate,
208 payment_correction_rate: payment_behavior.payment_corrections.rate,
209 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
210 },
211 }
212}
213
214#[derive(Debug, Clone)]
216pub struct PhaseConfig {
217 pub generate_master_data: bool,
219 pub generate_document_flows: bool,
221 pub generate_ocpm_events: bool,
223 pub generate_journal_entries: bool,
225 pub inject_anomalies: bool,
227 pub inject_data_quality: bool,
229 pub validate_balances: bool,
231 pub show_progress: bool,
233 pub vendors_per_company: usize,
235 pub customers_per_company: usize,
237 pub materials_per_company: usize,
239 pub assets_per_company: usize,
241 pub employees_per_company: usize,
243 pub p2p_chains: usize,
245 pub o2c_chains: usize,
247 pub generate_audit: bool,
249 pub audit_engagements: usize,
251 pub workpapers_per_engagement: usize,
253 pub evidence_per_workpaper: usize,
255 pub risks_per_engagement: usize,
257 pub findings_per_engagement: usize,
259 pub judgments_per_engagement: usize,
261 pub generate_banking: bool,
263 pub generate_graph_export: bool,
265 pub generate_sourcing: bool,
267 pub generate_bank_reconciliation: bool,
269 pub generate_financial_statements: bool,
271 pub generate_accounting_standards: bool,
273 pub generate_manufacturing: bool,
275 pub generate_sales_kpi_budgets: bool,
277 pub generate_tax: bool,
279 pub generate_esg: bool,
281 pub generate_intercompany: bool,
283}
284
285impl Default for PhaseConfig {
286 fn default() -> Self {
287 Self {
288 generate_master_data: true,
289 generate_document_flows: true,
290 generate_ocpm_events: false, generate_journal_entries: true,
292 inject_anomalies: false,
293 inject_data_quality: false, validate_balances: true,
295 show_progress: true,
296 vendors_per_company: 50,
297 customers_per_company: 100,
298 materials_per_company: 200,
299 assets_per_company: 50,
300 employees_per_company: 100,
301 p2p_chains: 100,
302 o2c_chains: 100,
303 generate_audit: false, audit_engagements: 5,
305 workpapers_per_engagement: 20,
306 evidence_per_workpaper: 5,
307 risks_per_engagement: 15,
308 findings_per_engagement: 8,
309 judgments_per_engagement: 10,
310 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, }
322 }
323}
324
325#[derive(Debug, Clone, Default)]
327pub struct MasterDataSnapshot {
328 pub vendors: Vec<Vendor>,
330 pub customers: Vec<Customer>,
332 pub materials: Vec<Material>,
334 pub assets: Vec<FixedAsset>,
336 pub employees: Vec<Employee>,
338}
339
340#[derive(Debug, Clone)]
342pub struct HypergraphExportInfo {
343 pub node_count: usize,
345 pub edge_count: usize,
347 pub hyperedge_count: usize,
349 pub output_path: PathBuf,
351}
352
353#[derive(Debug, Clone, Default)]
355pub struct DocumentFlowSnapshot {
356 pub p2p_chains: Vec<P2PDocumentChain>,
358 pub o2c_chains: Vec<O2CDocumentChain>,
360 pub purchase_orders: Vec<documents::PurchaseOrder>,
362 pub goods_receipts: Vec<documents::GoodsReceipt>,
364 pub vendor_invoices: Vec<documents::VendorInvoice>,
366 pub sales_orders: Vec<documents::SalesOrder>,
368 pub deliveries: Vec<documents::Delivery>,
370 pub customer_invoices: Vec<documents::CustomerInvoice>,
372 pub payments: Vec<documents::Payment>,
374}
375
376#[derive(Debug, Clone, Default)]
378pub struct SubledgerSnapshot {
379 pub ap_invoices: Vec<APInvoice>,
381 pub ar_invoices: Vec<ARInvoice>,
383 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
385 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
387 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
389}
390
391#[derive(Debug, Clone, Default)]
393pub struct OcpmSnapshot {
394 pub event_log: Option<OcpmEventLog>,
396 pub event_count: usize,
398 pub object_count: usize,
400 pub case_count: usize,
402}
403
404#[derive(Debug, Clone, Default)]
406pub struct AuditSnapshot {
407 pub engagements: Vec<AuditEngagement>,
409 pub workpapers: Vec<Workpaper>,
411 pub evidence: Vec<AuditEvidence>,
413 pub risk_assessments: Vec<RiskAssessment>,
415 pub findings: Vec<AuditFinding>,
417 pub judgments: Vec<ProfessionalJudgment>,
419}
420
421#[derive(Debug, Clone, Default)]
423pub struct BankingSnapshot {
424 pub customers: Vec<BankingCustomer>,
426 pub accounts: Vec<BankAccount>,
428 pub transactions: Vec<BankTransaction>,
430 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
432 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
434 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
436 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
438 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
440 pub suspicious_count: usize,
442 pub scenario_count: usize,
444}
445
446#[derive(Debug, Clone, Default, Serialize)]
448pub struct GraphExportSnapshot {
449 pub exported: bool,
451 pub graph_count: usize,
453 pub exports: HashMap<String, GraphExportInfo>,
455}
456
457#[derive(Debug, Clone, Serialize)]
459pub struct GraphExportInfo {
460 pub name: String,
462 pub format: String,
464 pub output_path: PathBuf,
466 pub node_count: usize,
468 pub edge_count: usize,
470}
471
472#[derive(Debug, Clone, Default)]
474pub struct SourcingSnapshot {
475 pub spend_analyses: Vec<SpendAnalysis>,
477 pub sourcing_projects: Vec<SourcingProject>,
479 pub qualifications: Vec<SupplierQualification>,
481 pub rfx_events: Vec<RfxEvent>,
483 pub bids: Vec<SupplierBid>,
485 pub bid_evaluations: Vec<BidEvaluation>,
487 pub contracts: Vec<ProcurementContract>,
489 pub catalog_items: Vec<CatalogItem>,
491 pub scorecards: Vec<SupplierScorecard>,
493}
494
495#[derive(Debug, Clone, Serialize, Deserialize)]
497pub struct PeriodTrialBalance {
498 pub fiscal_year: u16,
500 pub fiscal_period: u8,
502 pub period_start: NaiveDate,
504 pub period_end: NaiveDate,
506 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
508}
509
510#[derive(Debug, Clone, Default)]
512pub struct FinancialReportingSnapshot {
513 pub financial_statements: Vec<FinancialStatement>,
515 pub bank_reconciliations: Vec<BankReconciliation>,
517 pub trial_balances: Vec<PeriodTrialBalance>,
519}
520
521#[derive(Debug, Clone, Default)]
523pub struct HrSnapshot {
524 pub payroll_runs: Vec<PayrollRun>,
526 pub payroll_line_items: Vec<PayrollLineItem>,
528 pub time_entries: Vec<TimeEntry>,
530 pub expense_reports: Vec<ExpenseReport>,
532 pub payroll_run_count: usize,
534 pub payroll_line_item_count: usize,
536 pub time_entry_count: usize,
538 pub expense_report_count: usize,
540}
541
542#[derive(Debug, Clone, Default)]
544pub struct AccountingStandardsSnapshot {
545 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
547 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
549 pub revenue_contract_count: usize,
551 pub impairment_test_count: usize,
553}
554
555#[derive(Debug, Clone, Default)]
557pub struct ManufacturingSnapshot {
558 pub production_orders: Vec<ProductionOrder>,
560 pub quality_inspections: Vec<QualityInspection>,
562 pub cycle_counts: Vec<CycleCount>,
564 pub production_order_count: usize,
566 pub quality_inspection_count: usize,
568 pub cycle_count_count: usize,
570}
571
572#[derive(Debug, Clone, Default)]
574pub struct SalesKpiBudgetsSnapshot {
575 pub sales_quotes: Vec<SalesQuote>,
577 pub kpis: Vec<ManagementKpi>,
579 pub budgets: Vec<Budget>,
581 pub sales_quote_count: usize,
583 pub kpi_count: usize,
585 pub budget_line_count: usize,
587}
588
589#[derive(Debug, Clone, Default)]
591pub struct AnomalyLabels {
592 pub labels: Vec<LabeledAnomaly>,
594 pub summary: Option<AnomalySummary>,
596 pub by_type: HashMap<String, usize>,
598}
599
600#[derive(Debug, Clone, Default)]
602pub struct BalanceValidationResult {
603 pub validated: bool,
605 pub is_balanced: bool,
607 pub entries_processed: u64,
609 pub total_debits: rust_decimal::Decimal,
611 pub total_credits: rust_decimal::Decimal,
613 pub accounts_tracked: usize,
615 pub companies_tracked: usize,
617 pub validation_errors: Vec<ValidationError>,
619 pub has_unbalanced_entries: bool,
621}
622
623#[derive(Debug, Clone, Default)]
625pub struct TaxSnapshot {
626 pub jurisdictions: Vec<TaxJurisdiction>,
628 pub codes: Vec<TaxCode>,
630 pub tax_lines: Vec<TaxLine>,
632 pub tax_returns: Vec<TaxReturn>,
634 pub tax_provisions: Vec<TaxProvision>,
636 pub withholding_records: Vec<WithholdingTaxRecord>,
638 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
640 pub jurisdiction_count: usize,
642 pub code_count: usize,
644}
645
646#[derive(Debug, Clone, Default, Serialize, Deserialize)]
648pub struct IntercompanySnapshot {
649 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
651 pub seller_journal_entries: Vec<JournalEntry>,
653 pub buyer_journal_entries: Vec<JournalEntry>,
655 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
657 pub matched_pair_count: usize,
659 pub elimination_entry_count: usize,
661 pub match_rate: f64,
663}
664
665#[derive(Debug, Clone, Default)]
667pub struct EsgSnapshot {
668 pub emissions: Vec<EmissionRecord>,
670 pub energy: Vec<EnergyConsumption>,
672 pub water: Vec<WaterUsage>,
674 pub waste: Vec<WasteRecord>,
676 pub diversity: Vec<WorkforceDiversityMetric>,
678 pub pay_equity: Vec<PayEquityMetric>,
680 pub safety_incidents: Vec<SafetyIncident>,
682 pub safety_metrics: Vec<SafetyMetric>,
684 pub governance: Vec<GovernanceMetric>,
686 pub supplier_assessments: Vec<SupplierEsgAssessment>,
688 pub materiality: Vec<MaterialityAssessment>,
690 pub disclosures: Vec<EsgDisclosure>,
692 pub climate_scenarios: Vec<ClimateScenario>,
694 pub anomaly_labels: Vec<EsgAnomalyLabel>,
696 pub emission_count: usize,
698 pub disclosure_count: usize,
700}
701
702#[derive(Debug, Clone, Default)]
704pub struct TreasurySnapshot {
705 pub cash_positions: Vec<CashPosition>,
707 pub cash_forecasts: Vec<CashForecast>,
709 pub cash_pools: Vec<CashPool>,
711 pub cash_pool_sweeps: Vec<CashPoolSweep>,
713 pub hedging_instruments: Vec<HedgingInstrument>,
715 pub hedge_relationships: Vec<HedgeRelationship>,
717 pub debt_instruments: Vec<DebtInstrument>,
719 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
721}
722
723#[derive(Debug, Clone, Default)]
725pub struct ProjectAccountingSnapshot {
726 pub projects: Vec<Project>,
728 pub cost_lines: Vec<ProjectCostLine>,
730 pub revenue_records: Vec<ProjectRevenue>,
732 pub earned_value_metrics: Vec<EarnedValueMetric>,
734 pub change_orders: Vec<ChangeOrder>,
736 pub milestones: Vec<ProjectMilestone>,
738}
739
740#[derive(Debug)]
742pub struct EnhancedGenerationResult {
743 pub chart_of_accounts: ChartOfAccounts,
745 pub master_data: MasterDataSnapshot,
747 pub document_flows: DocumentFlowSnapshot,
749 pub subledger: SubledgerSnapshot,
751 pub ocpm: OcpmSnapshot,
753 pub audit: AuditSnapshot,
755 pub banking: BankingSnapshot,
757 pub graph_export: GraphExportSnapshot,
759 pub sourcing: SourcingSnapshot,
761 pub financial_reporting: FinancialReportingSnapshot,
763 pub hr: HrSnapshot,
765 pub accounting_standards: AccountingStandardsSnapshot,
767 pub manufacturing: ManufacturingSnapshot,
769 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
771 pub tax: TaxSnapshot,
773 pub esg: EsgSnapshot,
775 pub treasury: TreasurySnapshot,
777 pub project_accounting: ProjectAccountingSnapshot,
779 pub intercompany: IntercompanySnapshot,
781 pub journal_entries: Vec<JournalEntry>,
783 pub anomaly_labels: AnomalyLabels,
785 pub balance_validation: BalanceValidationResult,
787 pub data_quality_stats: DataQualityStats,
789 pub statistics: EnhancedGenerationStatistics,
791 pub lineage: Option<super::lineage::LineageGraph>,
793 pub gate_result: Option<datasynth_eval::gates::GateResult>,
795 pub internal_controls: Vec<InternalControl>,
797 pub opening_balances: Vec<GeneratedOpeningBalance>,
799 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
801}
802
803#[derive(Debug, Clone, Default, Serialize, Deserialize)]
805pub struct EnhancedGenerationStatistics {
806 pub total_entries: u64,
808 pub total_line_items: u64,
810 pub accounts_count: usize,
812 pub companies_count: usize,
814 pub period_months: u32,
816 pub vendor_count: usize,
818 pub customer_count: usize,
819 pub material_count: usize,
820 pub asset_count: usize,
821 pub employee_count: usize,
822 pub p2p_chain_count: usize,
824 pub o2c_chain_count: usize,
825 pub ap_invoice_count: usize,
827 pub ar_invoice_count: usize,
828 pub ocpm_event_count: usize,
830 pub ocpm_object_count: usize,
831 pub ocpm_case_count: usize,
832 pub audit_engagement_count: usize,
834 pub audit_workpaper_count: usize,
835 pub audit_evidence_count: usize,
836 pub audit_risk_count: usize,
837 pub audit_finding_count: usize,
838 pub audit_judgment_count: usize,
839 pub anomalies_injected: usize,
841 pub data_quality_issues: usize,
843 pub banking_customer_count: usize,
845 pub banking_account_count: usize,
846 pub banking_transaction_count: usize,
847 pub banking_suspicious_count: usize,
848 pub graph_export_count: usize,
850 pub graph_node_count: usize,
851 pub graph_edge_count: usize,
852 #[serde(default)]
854 pub llm_enrichment_ms: u64,
855 #[serde(default)]
857 pub llm_vendors_enriched: usize,
858 #[serde(default)]
860 pub diffusion_enhancement_ms: u64,
861 #[serde(default)]
863 pub diffusion_samples_generated: usize,
864 #[serde(default)]
866 pub causal_generation_ms: u64,
867 #[serde(default)]
869 pub causal_samples_generated: usize,
870 #[serde(default)]
872 pub causal_validation_passed: Option<bool>,
873 #[serde(default)]
875 pub sourcing_project_count: usize,
876 #[serde(default)]
877 pub rfx_event_count: usize,
878 #[serde(default)]
879 pub bid_count: usize,
880 #[serde(default)]
881 pub contract_count: usize,
882 #[serde(default)]
883 pub catalog_item_count: usize,
884 #[serde(default)]
885 pub scorecard_count: usize,
886 #[serde(default)]
888 pub financial_statement_count: usize,
889 #[serde(default)]
890 pub bank_reconciliation_count: usize,
891 #[serde(default)]
893 pub payroll_run_count: usize,
894 #[serde(default)]
895 pub time_entry_count: usize,
896 #[serde(default)]
897 pub expense_report_count: usize,
898 #[serde(default)]
900 pub revenue_contract_count: usize,
901 #[serde(default)]
902 pub impairment_test_count: usize,
903 #[serde(default)]
905 pub production_order_count: usize,
906 #[serde(default)]
907 pub quality_inspection_count: usize,
908 #[serde(default)]
909 pub cycle_count_count: usize,
910 #[serde(default)]
912 pub sales_quote_count: usize,
913 #[serde(default)]
914 pub kpi_count: usize,
915 #[serde(default)]
916 pub budget_line_count: usize,
917 #[serde(default)]
919 pub tax_jurisdiction_count: usize,
920 #[serde(default)]
921 pub tax_code_count: usize,
922 #[serde(default)]
924 pub esg_emission_count: usize,
925 #[serde(default)]
926 pub esg_disclosure_count: usize,
927 #[serde(default)]
929 pub ic_matched_pair_count: usize,
930 #[serde(default)]
931 pub ic_elimination_count: usize,
932 #[serde(default)]
934 pub ic_transaction_count: usize,
935 #[serde(default)]
937 pub fa_subledger_count: usize,
938 #[serde(default)]
940 pub inventory_subledger_count: usize,
941 #[serde(default)]
943 pub treasury_debt_instrument_count: usize,
944 #[serde(default)]
946 pub treasury_hedging_instrument_count: usize,
947 #[serde(default)]
949 pub project_count: usize,
950 #[serde(default)]
952 pub project_change_order_count: usize,
953 #[serde(default)]
955 pub tax_provision_count: usize,
956 #[serde(default)]
958 pub opening_balance_count: usize,
959 #[serde(default)]
961 pub subledger_reconciliation_count: usize,
962 #[serde(default)]
964 pub tax_line_count: usize,
965 #[serde(default)]
967 pub project_cost_line_count: usize,
968 #[serde(default)]
970 pub cash_position_count: usize,
971}
972
973pub struct EnhancedOrchestrator {
975 config: GeneratorConfig,
976 phase_config: PhaseConfig,
977 coa: Option<Arc<ChartOfAccounts>>,
978 master_data: MasterDataSnapshot,
979 seed: u64,
980 multi_progress: Option<MultiProgress>,
981 resource_guard: ResourceGuard,
983 output_path: Option<PathBuf>,
985 copula_generators: Vec<CopulaGeneratorSpec>,
987 country_pack_registry: datasynth_core::CountryPackRegistry,
989}
990
991impl EnhancedOrchestrator {
992 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
994 datasynth_config::validate_config(&config)?;
995
996 let seed = config.global.seed.unwrap_or_else(rand::random);
997
998 let resource_guard = Self::build_resource_guard(&config, None);
1000
1001 let country_pack_registry = match &config.country_packs {
1003 Some(cp) => {
1004 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1005 .map_err(|e| SynthError::config(e.to_string()))?
1006 }
1007 None => datasynth_core::CountryPackRegistry::builtin_only()
1008 .map_err(|e| SynthError::config(e.to_string()))?,
1009 };
1010
1011 Ok(Self {
1012 config,
1013 phase_config,
1014 coa: None,
1015 master_data: MasterDataSnapshot::default(),
1016 seed,
1017 multi_progress: None,
1018 resource_guard,
1019 output_path: None,
1020 copula_generators: Vec::new(),
1021 country_pack_registry,
1022 })
1023 }
1024
1025 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1027 Self::new(config, PhaseConfig::default())
1028 }
1029
1030 pub fn with_progress(mut self, show: bool) -> Self {
1032 self.phase_config.show_progress = show;
1033 if show {
1034 self.multi_progress = Some(MultiProgress::new());
1035 }
1036 self
1037 }
1038
1039 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1041 let path = path.into();
1042 self.output_path = Some(path.clone());
1043 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1045 self
1046 }
1047
1048 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1050 &self.country_pack_registry
1051 }
1052
1053 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1055 self.country_pack_registry.get_by_str(country)
1056 }
1057
1058 fn primary_country_code(&self) -> &str {
1061 self.config
1062 .companies
1063 .first()
1064 .map(|c| c.country.as_str())
1065 .unwrap_or("US")
1066 }
1067
1068 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1070 self.country_pack_for(self.primary_country_code())
1071 }
1072
1073 fn resolve_coa_framework(&self) -> CoAFramework {
1075 if self.config.accounting_standards.enabled {
1076 match self.config.accounting_standards.framework {
1077 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1078 return CoAFramework::FrenchPcg;
1079 }
1080 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1081 return CoAFramework::GermanSkr04;
1082 }
1083 _ => {}
1084 }
1085 }
1086 let pack = self.primary_pack();
1088 match pack.accounting.framework.as_str() {
1089 "french_gaap" => CoAFramework::FrenchPcg,
1090 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1091 _ => CoAFramework::UsGaap,
1092 }
1093 }
1094
1095 pub fn has_copulas(&self) -> bool {
1100 !self.copula_generators.is_empty()
1101 }
1102
1103 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1109 &self.copula_generators
1110 }
1111
1112 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1116 &mut self.copula_generators
1117 }
1118
1119 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1123 self.copula_generators
1124 .iter_mut()
1125 .find(|c| c.name == copula_name)
1126 .map(|c| c.generator.sample())
1127 }
1128
1129 pub fn from_fingerprint(
1152 fingerprint_path: &std::path::Path,
1153 phase_config: PhaseConfig,
1154 scale: f64,
1155 ) -> SynthResult<Self> {
1156 info!("Loading fingerprint from: {}", fingerprint_path.display());
1157
1158 let reader = FingerprintReader::new();
1160 let fingerprint = reader
1161 .read_from_file(fingerprint_path)
1162 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1163
1164 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1165 }
1166
1167 pub fn from_fingerprint_data(
1174 fingerprint: Fingerprint,
1175 phase_config: PhaseConfig,
1176 scale: f64,
1177 ) -> SynthResult<Self> {
1178 info!(
1179 "Synthesizing config from fingerprint (version: {}, tables: {})",
1180 fingerprint.manifest.version,
1181 fingerprint.schema.tables.len()
1182 );
1183
1184 let seed: u64 = rand::random();
1186
1187 let options = SynthesisOptions {
1189 scale,
1190 seed: Some(seed),
1191 preserve_correlations: true,
1192 inject_anomalies: true,
1193 };
1194 let synthesizer = ConfigSynthesizer::with_options(options);
1195
1196 let synthesis_result = synthesizer
1198 .synthesize_full(&fingerprint, seed)
1199 .map_err(|e| {
1200 SynthError::config(format!(
1201 "Failed to synthesize config from fingerprint: {}",
1202 e
1203 ))
1204 })?;
1205
1206 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1208 Self::base_config_for_industry(industry)
1209 } else {
1210 Self::base_config_for_industry("manufacturing")
1211 };
1212
1213 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1215
1216 info!(
1218 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1219 fingerprint.schema.tables.len(),
1220 scale,
1221 synthesis_result.copula_generators.len()
1222 );
1223
1224 if !synthesis_result.copula_generators.is_empty() {
1225 for spec in &synthesis_result.copula_generators {
1226 info!(
1227 " Copula '{}' for table '{}': {} columns",
1228 spec.name,
1229 spec.table,
1230 spec.columns.len()
1231 );
1232 }
1233 }
1234
1235 let mut orchestrator = Self::new(config, phase_config)?;
1237
1238 orchestrator.copula_generators = synthesis_result.copula_generators;
1240
1241 Ok(orchestrator)
1242 }
1243
1244 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1246 use datasynth_config::presets::create_preset;
1247 use datasynth_config::TransactionVolume;
1248 use datasynth_core::models::{CoAComplexity, IndustrySector};
1249
1250 let sector = match industry.to_lowercase().as_str() {
1251 "manufacturing" => IndustrySector::Manufacturing,
1252 "retail" => IndustrySector::Retail,
1253 "financial" | "financial_services" => IndustrySector::FinancialServices,
1254 "healthcare" => IndustrySector::Healthcare,
1255 "technology" | "tech" => IndustrySector::Technology,
1256 _ => IndustrySector::Manufacturing,
1257 };
1258
1259 create_preset(
1261 sector,
1262 1, 12, CoAComplexity::Medium,
1265 TransactionVolume::TenK,
1266 )
1267 }
1268
1269 fn apply_config_patch(
1271 mut config: GeneratorConfig,
1272 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1273 ) -> GeneratorConfig {
1274 use datasynth_fingerprint::synthesis::ConfigValue;
1275
1276 for (key, value) in patch.values() {
1277 match (key.as_str(), value) {
1278 ("transactions.count", ConfigValue::Integer(n)) => {
1281 info!(
1282 "Fingerprint suggests {} transactions (apply via company volumes)",
1283 n
1284 );
1285 }
1286 ("global.period_months", ConfigValue::Integer(n)) => {
1287 config.global.period_months = *n as u32;
1288 }
1289 ("global.start_date", ConfigValue::String(s)) => {
1290 config.global.start_date = s.clone();
1291 }
1292 ("global.seed", ConfigValue::Integer(n)) => {
1293 config.global.seed = Some(*n as u64);
1294 }
1295 ("fraud.enabled", ConfigValue::Bool(b)) => {
1296 config.fraud.enabled = *b;
1297 }
1298 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1299 config.fraud.fraud_rate = *f;
1300 }
1301 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1302 config.data_quality.enabled = *b;
1303 }
1304 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1306 config.fraud.enabled = *b;
1307 }
1308 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1309 config.fraud.fraud_rate = *f;
1310 }
1311 _ => {
1312 debug!("Ignoring unknown config patch key: {}", key);
1313 }
1314 }
1315 }
1316
1317 config
1318 }
1319
1320 fn build_resource_guard(
1322 config: &GeneratorConfig,
1323 output_path: Option<PathBuf>,
1324 ) -> ResourceGuard {
1325 let mut builder = ResourceGuardBuilder::new();
1326
1327 if config.global.memory_limit_mb > 0 {
1329 builder = builder.memory_limit(config.global.memory_limit_mb);
1330 }
1331
1332 if let Some(path) = output_path {
1334 builder = builder.output_path(path).min_free_disk(100); }
1336
1337 builder = builder.conservative();
1339
1340 builder.build()
1341 }
1342
1343 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1348 self.resource_guard.check()
1349 }
1350
1351 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1353 let level = self.resource_guard.check()?;
1354
1355 if level != DegradationLevel::Normal {
1356 warn!(
1357 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1358 phase,
1359 level,
1360 self.resource_guard.current_memory_mb(),
1361 self.resource_guard.available_disk_mb()
1362 );
1363 }
1364
1365 Ok(level)
1366 }
1367
1368 fn get_degradation_actions(&self) -> DegradationActions {
1370 self.resource_guard.get_actions()
1371 }
1372
1373 fn check_memory_limit(&self) -> SynthResult<()> {
1375 self.check_resources()?;
1376 Ok(())
1377 }
1378
1379 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1381 info!("Starting enhanced generation workflow");
1382 info!(
1383 "Config: industry={:?}, period_months={}, companies={}",
1384 self.config.global.industry,
1385 self.config.global.period_months,
1386 self.config.companies.len()
1387 );
1388
1389 let initial_level = self.check_resources_with_log("initial")?;
1391 if initial_level == DegradationLevel::Emergency {
1392 return Err(SynthError::resource(
1393 "Insufficient resources to start generation",
1394 ));
1395 }
1396
1397 let mut stats = EnhancedGenerationStatistics {
1398 companies_count: self.config.companies.len(),
1399 period_months: self.config.global.period_months,
1400 ..Default::default()
1401 };
1402
1403 let coa = self.phase_chart_of_accounts(&mut stats)?;
1405
1406 self.phase_master_data(&mut stats)?;
1408
1409 let (mut document_flows, subledger, fa_journal_entries) =
1411 self.phase_document_flows(&mut stats)?;
1412
1413 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1415
1416 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1424
1425 if !fa_journal_entries.is_empty() {
1427 debug!(
1428 "Appending {} FA acquisition JEs to main entries",
1429 fa_journal_entries.len()
1430 );
1431 entries.extend(fa_journal_entries);
1432 }
1433
1434 let actions = self.get_degradation_actions();
1436
1437 let sourcing = self.phase_sourcing_data(&mut stats)?;
1439
1440 if !sourcing.contracts.is_empty() {
1442 let mut linked_count = 0usize;
1443 for chain in &mut document_flows.p2p_chains {
1444 if chain.purchase_order.contract_id.is_none() {
1445 if let Some(contract) = sourcing
1446 .contracts
1447 .iter()
1448 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1449 {
1450 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1451 linked_count += 1;
1452 }
1453 }
1454 }
1455 if linked_count > 0 {
1456 debug!(
1457 "Linked {} purchase orders to S2C contracts by vendor match",
1458 linked_count
1459 );
1460 }
1461 }
1462
1463 let intercompany = self.phase_intercompany(&mut stats)?;
1465
1466 if !intercompany.seller_journal_entries.is_empty()
1468 || !intercompany.buyer_journal_entries.is_empty()
1469 {
1470 let ic_je_count = intercompany.seller_journal_entries.len()
1471 + intercompany.buyer_journal_entries.len();
1472 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1473 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1474 debug!(
1475 "Appended {} IC journal entries to main entries",
1476 ic_je_count
1477 );
1478 }
1479
1480 let hr = self.phase_hr_data(&mut stats)?;
1482
1483 if !hr.payroll_runs.is_empty() {
1485 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1486 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1487 entries.extend(payroll_jes);
1488 }
1489
1490 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1492
1493 if !manufacturing_snap.production_orders.is_empty() {
1495 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1496 debug!("Generated {} JEs from production orders", mfg_jes.len());
1497 entries.extend(mfg_jes);
1498 }
1499
1500 if !entries.is_empty() {
1503 stats.total_entries = entries.len() as u64;
1504 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1505 debug!(
1506 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1507 stats.total_entries, stats.total_line_items
1508 );
1509 }
1510
1511 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1513
1514 let balance_validation = self.phase_balance_validation(&entries)?;
1516
1517 let subledger_reconciliation =
1519 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1520
1521 let data_quality_stats =
1523 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1524
1525 let audit = self.phase_audit_data(&entries, &mut stats)?;
1527
1528 let banking = self.phase_banking_data(&mut stats)?;
1530
1531 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1533
1534 self.phase_llm_enrichment(&mut stats);
1536
1537 self.phase_diffusion_enhancement(&mut stats);
1539
1540 self.phase_causal_overlay(&mut stats);
1542
1543 let financial_reporting =
1545 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1546
1547 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1549
1550 let ocpm = self.phase_ocpm_events(
1552 &document_flows,
1553 &sourcing,
1554 &hr,
1555 &manufacturing_snap,
1556 &banking,
1557 &audit,
1558 &financial_reporting,
1559 &mut stats,
1560 )?;
1561
1562 let sales_kpi_budgets =
1564 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1565
1566 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1568
1569 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1571
1572 let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1574
1575 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1577
1578 self.phase_hypergraph_export(
1580 &coa,
1581 &entries,
1582 &document_flows,
1583 &sourcing,
1584 &hr,
1585 &manufacturing_snap,
1586 &banking,
1587 &audit,
1588 &financial_reporting,
1589 &ocpm,
1590 &mut stats,
1591 )?;
1592
1593 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1596 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1597 }
1598
1599 if self.config.streaming.enabled {
1601 info!("Note: streaming config is enabled but batch mode does not use it");
1602 }
1603 if self.config.vendor_network.enabled {
1604 debug!("Vendor network config available; relationship graph generation is partial");
1605 }
1606 if self.config.customer_segmentation.enabled {
1607 debug!("Customer segmentation config available; segment-aware generation is partial");
1608 }
1609
1610 let resource_stats = self.resource_guard.stats();
1612 info!(
1613 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1614 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1615 resource_stats.disk.estimated_bytes_written,
1616 resource_stats.degradation_level
1617 );
1618
1619 let lineage = self.build_lineage_graph();
1621
1622 let gate_result = if self.config.quality_gates.enabled {
1624 let profile_name = &self.config.quality_gates.profile;
1625 match datasynth_eval::gates::get_profile(profile_name) {
1626 Some(profile) => {
1627 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1629
1630 if balance_validation.validated {
1632 eval.coherence.balance =
1633 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1634 equation_balanced: balance_validation.is_balanced,
1635 max_imbalance: (balance_validation.total_debits
1636 - balance_validation.total_credits)
1637 .abs(),
1638 periods_evaluated: 1,
1639 periods_imbalanced: if balance_validation.is_balanced {
1640 0
1641 } else {
1642 1
1643 },
1644 period_results: Vec::new(),
1645 companies_evaluated: self.config.companies.len(),
1646 });
1647 }
1648
1649 eval.coherence.passes = balance_validation.is_balanced;
1651 if !balance_validation.is_balanced {
1652 eval.coherence
1653 .failures
1654 .push("Balance sheet equation not satisfied".to_string());
1655 }
1656
1657 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1659 eval.statistical.passes = !entries.is_empty();
1660
1661 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1664
1665 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1666 info!(
1667 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1668 profile_name, result.gates_passed, result.gates_total, result.summary
1669 );
1670 Some(result)
1671 }
1672 None => {
1673 warn!(
1674 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1675 profile_name
1676 );
1677 None
1678 }
1679 }
1680 } else {
1681 None
1682 };
1683
1684 let internal_controls = if self.config.internal_controls.enabled {
1686 InternalControl::standard_controls()
1687 } else {
1688 Vec::new()
1689 };
1690
1691 Ok(EnhancedGenerationResult {
1692 chart_of_accounts: (*coa).clone(),
1693 master_data: self.master_data.clone(),
1694 document_flows,
1695 subledger,
1696 ocpm,
1697 audit,
1698 banking,
1699 graph_export,
1700 sourcing,
1701 financial_reporting,
1702 hr,
1703 accounting_standards,
1704 manufacturing: manufacturing_snap,
1705 sales_kpi_budgets,
1706 tax,
1707 esg: esg_snap,
1708 treasury,
1709 project_accounting,
1710 intercompany,
1711 journal_entries: entries,
1712 anomaly_labels,
1713 balance_validation,
1714 data_quality_stats,
1715 statistics: stats,
1716 lineage: Some(lineage),
1717 gate_result,
1718 internal_controls,
1719 opening_balances,
1720 subledger_reconciliation,
1721 })
1722 }
1723
1724 fn phase_chart_of_accounts(
1730 &mut self,
1731 stats: &mut EnhancedGenerationStatistics,
1732 ) -> SynthResult<Arc<ChartOfAccounts>> {
1733 info!("Phase 1: Generating Chart of Accounts");
1734 let coa = self.generate_coa()?;
1735 stats.accounts_count = coa.account_count();
1736 info!(
1737 "Chart of Accounts generated: {} accounts",
1738 stats.accounts_count
1739 );
1740 self.check_resources_with_log("post-coa")?;
1741 Ok(coa)
1742 }
1743
1744 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1746 if self.phase_config.generate_master_data {
1747 info!("Phase 2: Generating Master Data");
1748 self.generate_master_data()?;
1749 stats.vendor_count = self.master_data.vendors.len();
1750 stats.customer_count = self.master_data.customers.len();
1751 stats.material_count = self.master_data.materials.len();
1752 stats.asset_count = self.master_data.assets.len();
1753 stats.employee_count = self.master_data.employees.len();
1754 info!(
1755 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1756 stats.vendor_count, stats.customer_count, stats.material_count,
1757 stats.asset_count, stats.employee_count
1758 );
1759 self.check_resources_with_log("post-master-data")?;
1760 } else {
1761 debug!("Phase 2: Skipped (master data generation disabled)");
1762 }
1763 Ok(())
1764 }
1765
1766 fn phase_document_flows(
1768 &mut self,
1769 stats: &mut EnhancedGenerationStatistics,
1770 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1771 let mut document_flows = DocumentFlowSnapshot::default();
1772 let mut subledger = SubledgerSnapshot::default();
1773
1774 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1775 info!("Phase 3: Generating Document Flows");
1776 self.generate_document_flows(&mut document_flows)?;
1777 stats.p2p_chain_count = document_flows.p2p_chains.len();
1778 stats.o2c_chain_count = document_flows.o2c_chains.len();
1779 info!(
1780 "Document flows generated: {} P2P chains, {} O2C chains",
1781 stats.p2p_chain_count, stats.o2c_chain_count
1782 );
1783
1784 debug!("Phase 3b: Linking document flows to subledgers");
1786 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1787 stats.ap_invoice_count = subledger.ap_invoices.len();
1788 stats.ar_invoice_count = subledger.ar_invoices.len();
1789 debug!(
1790 "Subledgers linked: {} AP invoices, {} AR invoices",
1791 stats.ap_invoice_count, stats.ar_invoice_count
1792 );
1793
1794 self.check_resources_with_log("post-document-flows")?;
1795 } else {
1796 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1797 }
1798
1799 let mut fa_journal_entries = Vec::new();
1801 if !self.master_data.assets.is_empty() {
1802 debug!("Generating FA subledger records");
1803 let company_code = self
1804 .config
1805 .companies
1806 .first()
1807 .map(|c| c.code.as_str())
1808 .unwrap_or("1000");
1809 let currency = self
1810 .config
1811 .companies
1812 .first()
1813 .map(|c| c.currency.as_str())
1814 .unwrap_or("USD");
1815
1816 let mut fa_gen = datasynth_generators::FAGenerator::new(
1817 datasynth_generators::FAGeneratorConfig::default(),
1818 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1819 );
1820
1821 for asset in &self.master_data.assets {
1822 let (record, je) = fa_gen.generate_asset_acquisition(
1823 company_code,
1824 &format!("{:?}", asset.asset_class),
1825 &asset.description,
1826 asset.acquisition_date,
1827 currency,
1828 asset.cost_center.as_deref(),
1829 );
1830 subledger.fa_records.push(record);
1831 fa_journal_entries.push(je);
1832 }
1833
1834 stats.fa_subledger_count = subledger.fa_records.len();
1835 debug!(
1836 "FA subledger records generated: {} (with {} acquisition JEs)",
1837 stats.fa_subledger_count,
1838 fa_journal_entries.len()
1839 );
1840 }
1841
1842 if !self.master_data.materials.is_empty() {
1844 debug!("Generating Inventory subledger records");
1845 let first_company = self.config.companies.first();
1846 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1847 let inv_currency = first_company
1848 .map(|c| c.currency.clone())
1849 .unwrap_or_else(|| "USD".to_string());
1850
1851 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1852 datasynth_generators::InventoryGeneratorConfig::default(),
1853 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1854 inv_currency.clone(),
1855 );
1856
1857 for (i, material) in self.master_data.materials.iter().enumerate() {
1858 let plant = format!("PLANT{:02}", (i % 3) + 1);
1859 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1860 let initial_qty = rust_decimal::Decimal::from(
1861 material
1862 .safety_stock
1863 .to_string()
1864 .parse::<i64>()
1865 .unwrap_or(100),
1866 );
1867
1868 let position = inv_gen.generate_position(
1869 company_code,
1870 &plant,
1871 &storage_loc,
1872 &material.material_id,
1873 &material.description,
1874 initial_qty,
1875 Some(material.standard_cost),
1876 &inv_currency,
1877 );
1878 subledger.inventory_positions.push(position);
1879 }
1880
1881 stats.inventory_subledger_count = subledger.inventory_positions.len();
1882 debug!(
1883 "Inventory subledger records generated: {}",
1884 stats.inventory_subledger_count
1885 );
1886 }
1887
1888 Ok((document_flows, subledger, fa_journal_entries))
1889 }
1890
1891 #[allow(clippy::too_many_arguments)]
1893 fn phase_ocpm_events(
1894 &mut self,
1895 document_flows: &DocumentFlowSnapshot,
1896 sourcing: &SourcingSnapshot,
1897 hr: &HrSnapshot,
1898 manufacturing: &ManufacturingSnapshot,
1899 banking: &BankingSnapshot,
1900 audit: &AuditSnapshot,
1901 financial_reporting: &FinancialReportingSnapshot,
1902 stats: &mut EnhancedGenerationStatistics,
1903 ) -> SynthResult<OcpmSnapshot> {
1904 if self.phase_config.generate_ocpm_events {
1905 info!("Phase 3c: Generating OCPM Events");
1906 let ocpm_snapshot = self.generate_ocpm_events(
1907 document_flows,
1908 sourcing,
1909 hr,
1910 manufacturing,
1911 banking,
1912 audit,
1913 financial_reporting,
1914 )?;
1915 stats.ocpm_event_count = ocpm_snapshot.event_count;
1916 stats.ocpm_object_count = ocpm_snapshot.object_count;
1917 stats.ocpm_case_count = ocpm_snapshot.case_count;
1918 info!(
1919 "OCPM events generated: {} events, {} objects, {} cases",
1920 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1921 );
1922 self.check_resources_with_log("post-ocpm")?;
1923 Ok(ocpm_snapshot)
1924 } else {
1925 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1926 Ok(OcpmSnapshot::default())
1927 }
1928 }
1929
1930 fn phase_journal_entries(
1932 &mut self,
1933 coa: &Arc<ChartOfAccounts>,
1934 document_flows: &DocumentFlowSnapshot,
1935 _stats: &mut EnhancedGenerationStatistics,
1936 ) -> SynthResult<Vec<JournalEntry>> {
1937 let mut entries = Vec::new();
1938
1939 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1941 debug!("Phase 4a: Generating JEs from document flows");
1942 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1943 debug!("Generated {} JEs from document flows", flow_entries.len());
1944 entries.extend(flow_entries);
1945 }
1946
1947 if self.phase_config.generate_journal_entries {
1949 info!("Phase 4: Generating Journal Entries");
1950 let je_entries = self.generate_journal_entries(coa)?;
1951 info!("Generated {} standalone journal entries", je_entries.len());
1952 entries.extend(je_entries);
1953 } else {
1954 debug!("Phase 4: Skipped (journal entry generation disabled)");
1955 }
1956
1957 if !entries.is_empty() {
1958 self.check_resources_with_log("post-journal-entries")?;
1961 }
1962
1963 Ok(entries)
1964 }
1965
1966 fn phase_anomaly_injection(
1968 &mut self,
1969 entries: &mut [JournalEntry],
1970 actions: &DegradationActions,
1971 stats: &mut EnhancedGenerationStatistics,
1972 ) -> SynthResult<AnomalyLabels> {
1973 if self.phase_config.inject_anomalies
1974 && !entries.is_empty()
1975 && !actions.skip_anomaly_injection
1976 {
1977 info!("Phase 5: Injecting Anomalies");
1978 let result = self.inject_anomalies(entries)?;
1979 stats.anomalies_injected = result.labels.len();
1980 info!("Injected {} anomalies", stats.anomalies_injected);
1981 self.check_resources_with_log("post-anomaly-injection")?;
1982 Ok(result)
1983 } else if actions.skip_anomaly_injection {
1984 warn!("Phase 5: Skipped due to resource degradation");
1985 Ok(AnomalyLabels::default())
1986 } else {
1987 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1988 Ok(AnomalyLabels::default())
1989 }
1990 }
1991
1992 fn phase_balance_validation(
1994 &mut self,
1995 entries: &[JournalEntry],
1996 ) -> SynthResult<BalanceValidationResult> {
1997 if self.phase_config.validate_balances && !entries.is_empty() {
1998 debug!("Phase 6: Validating Balances");
1999 let balance_validation = self.validate_journal_entries(entries)?;
2000 if balance_validation.is_balanced {
2001 debug!("Balance validation passed");
2002 } else {
2003 warn!(
2004 "Balance validation found {} errors",
2005 balance_validation.validation_errors.len()
2006 );
2007 }
2008 Ok(balance_validation)
2009 } else {
2010 Ok(BalanceValidationResult::default())
2011 }
2012 }
2013
2014 fn phase_data_quality_injection(
2016 &mut self,
2017 entries: &mut [JournalEntry],
2018 actions: &DegradationActions,
2019 stats: &mut EnhancedGenerationStatistics,
2020 ) -> SynthResult<DataQualityStats> {
2021 if self.phase_config.inject_data_quality
2022 && !entries.is_empty()
2023 && !actions.skip_data_quality
2024 {
2025 info!("Phase 7: Injecting Data Quality Variations");
2026 let dq_stats = self.inject_data_quality(entries)?;
2027 stats.data_quality_issues = dq_stats.records_with_issues;
2028 info!("Injected {} data quality issues", stats.data_quality_issues);
2029 self.check_resources_with_log("post-data-quality")?;
2030 Ok(dq_stats)
2031 } else if actions.skip_data_quality {
2032 warn!("Phase 7: Skipped due to resource degradation");
2033 Ok(DataQualityStats::default())
2034 } else {
2035 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2036 Ok(DataQualityStats::default())
2037 }
2038 }
2039
2040 fn phase_audit_data(
2042 &mut self,
2043 entries: &[JournalEntry],
2044 stats: &mut EnhancedGenerationStatistics,
2045 ) -> SynthResult<AuditSnapshot> {
2046 if self.phase_config.generate_audit {
2047 info!("Phase 8: Generating Audit Data");
2048 let audit_snapshot = self.generate_audit_data(entries)?;
2049 stats.audit_engagement_count = audit_snapshot.engagements.len();
2050 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2051 stats.audit_evidence_count = audit_snapshot.evidence.len();
2052 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2053 stats.audit_finding_count = audit_snapshot.findings.len();
2054 stats.audit_judgment_count = audit_snapshot.judgments.len();
2055 info!(
2056 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2057 stats.audit_engagement_count, stats.audit_workpaper_count,
2058 stats.audit_evidence_count, stats.audit_risk_count,
2059 stats.audit_finding_count, stats.audit_judgment_count
2060 );
2061 self.check_resources_with_log("post-audit")?;
2062 Ok(audit_snapshot)
2063 } else {
2064 debug!("Phase 8: Skipped (audit generation disabled)");
2065 Ok(AuditSnapshot::default())
2066 }
2067 }
2068
2069 fn phase_banking_data(
2071 &mut self,
2072 stats: &mut EnhancedGenerationStatistics,
2073 ) -> SynthResult<BankingSnapshot> {
2074 if self.phase_config.generate_banking && self.config.banking.enabled {
2075 info!("Phase 9: Generating Banking KYC/AML Data");
2076 let banking_snapshot = self.generate_banking_data()?;
2077 stats.banking_customer_count = banking_snapshot.customers.len();
2078 stats.banking_account_count = banking_snapshot.accounts.len();
2079 stats.banking_transaction_count = banking_snapshot.transactions.len();
2080 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2081 info!(
2082 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2083 stats.banking_customer_count, stats.banking_account_count,
2084 stats.banking_transaction_count, stats.banking_suspicious_count
2085 );
2086 self.check_resources_with_log("post-banking")?;
2087 Ok(banking_snapshot)
2088 } else {
2089 debug!("Phase 9: Skipped (banking generation disabled)");
2090 Ok(BankingSnapshot::default())
2091 }
2092 }
2093
2094 fn phase_graph_export(
2096 &mut self,
2097 entries: &[JournalEntry],
2098 coa: &Arc<ChartOfAccounts>,
2099 stats: &mut EnhancedGenerationStatistics,
2100 ) -> SynthResult<GraphExportSnapshot> {
2101 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2102 && !entries.is_empty()
2103 {
2104 info!("Phase 10: Exporting Accounting Network Graphs");
2105 match self.export_graphs(entries, coa, stats) {
2106 Ok(snapshot) => {
2107 info!(
2108 "Graph export complete: {} graphs ({} nodes, {} edges)",
2109 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2110 );
2111 Ok(snapshot)
2112 }
2113 Err(e) => {
2114 warn!("Phase 10: Graph export failed: {}", e);
2115 Ok(GraphExportSnapshot::default())
2116 }
2117 }
2118 } else {
2119 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2120 Ok(GraphExportSnapshot::default())
2121 }
2122 }
2123
2124 #[allow(clippy::too_many_arguments)]
2126 fn phase_hypergraph_export(
2127 &self,
2128 coa: &Arc<ChartOfAccounts>,
2129 entries: &[JournalEntry],
2130 document_flows: &DocumentFlowSnapshot,
2131 sourcing: &SourcingSnapshot,
2132 hr: &HrSnapshot,
2133 manufacturing: &ManufacturingSnapshot,
2134 banking: &BankingSnapshot,
2135 audit: &AuditSnapshot,
2136 financial_reporting: &FinancialReportingSnapshot,
2137 ocpm: &OcpmSnapshot,
2138 stats: &mut EnhancedGenerationStatistics,
2139 ) -> SynthResult<()> {
2140 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2141 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2142 match self.export_hypergraph(
2143 coa,
2144 entries,
2145 document_flows,
2146 sourcing,
2147 hr,
2148 manufacturing,
2149 banking,
2150 audit,
2151 financial_reporting,
2152 ocpm,
2153 stats,
2154 ) {
2155 Ok(info) => {
2156 info!(
2157 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2158 info.node_count, info.edge_count, info.hyperedge_count
2159 );
2160 }
2161 Err(e) => {
2162 warn!("Phase 10b: Hypergraph export failed: {}", e);
2163 }
2164 }
2165 } else {
2166 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2167 }
2168 Ok(())
2169 }
2170
2171 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2177 if !self.config.llm.enabled {
2178 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2179 return;
2180 }
2181
2182 info!("Phase 11: Starting LLM Enrichment");
2183 let start = std::time::Instant::now();
2184
2185 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2186 let provider = Arc::new(MockLlmProvider::new(self.seed));
2187 let enricher = VendorLlmEnricher::new(provider);
2188
2189 let industry = format!("{:?}", self.config.global.industry);
2190 let max_enrichments = self
2191 .config
2192 .llm
2193 .max_vendor_enrichments
2194 .min(self.master_data.vendors.len());
2195
2196 let mut enriched_count = 0usize;
2197 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2198 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2199 Ok(name) => {
2200 vendor.name = name;
2201 enriched_count += 1;
2202 }
2203 Err(e) => {
2204 warn!(
2205 "LLM vendor enrichment failed for {}: {}",
2206 vendor.vendor_id, e
2207 );
2208 }
2209 }
2210 }
2211
2212 enriched_count
2213 }));
2214
2215 match result {
2216 Ok(enriched_count) => {
2217 stats.llm_vendors_enriched = enriched_count;
2218 let elapsed = start.elapsed();
2219 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2220 info!(
2221 "Phase 11 complete: {} vendors enriched in {}ms",
2222 enriched_count, stats.llm_enrichment_ms
2223 );
2224 }
2225 Err(_) => {
2226 let elapsed = start.elapsed();
2227 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2228 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2229 }
2230 }
2231 }
2232
2233 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2239 if !self.config.diffusion.enabled {
2240 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2241 return;
2242 }
2243
2244 info!("Phase 12: Starting Diffusion Enhancement");
2245 let start = std::time::Instant::now();
2246
2247 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2248 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2251
2252 let diffusion_config = DiffusionConfig {
2253 n_steps: self.config.diffusion.n_steps,
2254 seed: self.seed,
2255 ..Default::default()
2256 };
2257
2258 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2259
2260 let n_samples = self.config.diffusion.sample_size;
2261 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2263
2264 samples.len()
2265 }));
2266
2267 match result {
2268 Ok(sample_count) => {
2269 stats.diffusion_samples_generated = sample_count;
2270 let elapsed = start.elapsed();
2271 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2272 info!(
2273 "Phase 12 complete: {} diffusion samples generated in {}ms",
2274 sample_count, stats.diffusion_enhancement_ms
2275 );
2276 }
2277 Err(_) => {
2278 let elapsed = start.elapsed();
2279 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2280 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2281 }
2282 }
2283 }
2284
2285 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2292 if !self.config.causal.enabled {
2293 debug!("Phase 13: Skipped (causal generation disabled)");
2294 return;
2295 }
2296
2297 info!("Phase 13: Starting Causal Overlay");
2298 let start = std::time::Instant::now();
2299
2300 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2301 let graph = match self.config.causal.template.as_str() {
2303 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2304 _ => CausalGraph::fraud_detection_template(),
2305 };
2306
2307 let scm = StructuralCausalModel::new(graph.clone())
2308 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2309
2310 let n_samples = self.config.causal.sample_size;
2311 let samples = scm
2312 .generate(n_samples, self.seed)
2313 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2314
2315 let validation_passed = if self.config.causal.validate {
2317 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2318 if report.valid {
2319 info!(
2320 "Causal validation passed: all {} checks OK",
2321 report.checks.len()
2322 );
2323 } else {
2324 warn!(
2325 "Causal validation: {} violations detected: {:?}",
2326 report.violations.len(),
2327 report.violations
2328 );
2329 }
2330 Some(report.valid)
2331 } else {
2332 None
2333 };
2334
2335 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2336 }));
2337
2338 match result {
2339 Ok(Ok((sample_count, validation_passed))) => {
2340 stats.causal_samples_generated = sample_count;
2341 stats.causal_validation_passed = validation_passed;
2342 let elapsed = start.elapsed();
2343 stats.causal_generation_ms = elapsed.as_millis() as u64;
2344 info!(
2345 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2346 sample_count, stats.causal_generation_ms, validation_passed,
2347 );
2348 }
2349 Ok(Err(e)) => {
2350 let elapsed = start.elapsed();
2351 stats.causal_generation_ms = elapsed.as_millis() as u64;
2352 warn!("Phase 13: Causal generation failed: {}", e);
2353 }
2354 Err(_) => {
2355 let elapsed = start.elapsed();
2356 stats.causal_generation_ms = elapsed.as_millis() as u64;
2357 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2358 }
2359 }
2360 }
2361
2362 fn phase_sourcing_data(
2364 &mut self,
2365 stats: &mut EnhancedGenerationStatistics,
2366 ) -> SynthResult<SourcingSnapshot> {
2367 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2368 debug!("Phase 14: Skipped (sourcing generation disabled)");
2369 return Ok(SourcingSnapshot::default());
2370 }
2371
2372 info!("Phase 14: Generating S2C Sourcing Data");
2373 let seed = self.seed;
2374
2375 let vendor_ids: Vec<String> = self
2377 .master_data
2378 .vendors
2379 .iter()
2380 .map(|v| v.vendor_id.clone())
2381 .collect();
2382 if vendor_ids.is_empty() {
2383 debug!("Phase 14: Skipped (no vendors available)");
2384 return Ok(SourcingSnapshot::default());
2385 }
2386
2387 let categories: Vec<(String, String)> = vec![
2388 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2389 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2390 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2391 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2392 ("CAT-LOG".to_string(), "Logistics".to_string()),
2393 ];
2394 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2395 .iter()
2396 .map(|(id, name)| {
2397 (
2398 id.clone(),
2399 name.clone(),
2400 rust_decimal::Decimal::from(100_000),
2401 )
2402 })
2403 .collect();
2404
2405 let company_code = self
2406 .config
2407 .companies
2408 .first()
2409 .map(|c| c.code.as_str())
2410 .unwrap_or("1000");
2411 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2412 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2413 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2414 let fiscal_year = start_date.year() as u16;
2415 let owner_ids: Vec<String> = self
2416 .master_data
2417 .employees
2418 .iter()
2419 .take(5)
2420 .map(|e| e.employee_id.clone())
2421 .collect();
2422 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2423
2424 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2426 let spend_analyses =
2427 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2428
2429 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2431 let sourcing_projects = if owner_ids.is_empty() {
2432 Vec::new()
2433 } else {
2434 project_gen.generate(
2435 company_code,
2436 &categories_with_spend,
2437 &owner_ids,
2438 start_date,
2439 self.config.global.period_months,
2440 )
2441 };
2442 stats.sourcing_project_count = sourcing_projects.len();
2443
2444 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2446 let mut qual_gen = QualificationGenerator::new(seed + 2);
2447 let qualifications = qual_gen.generate(
2448 company_code,
2449 &qual_vendor_ids,
2450 sourcing_projects.first().map(|p| p.project_id.as_str()),
2451 owner_id,
2452 start_date,
2453 );
2454
2455 let mut rfx_gen = RfxGenerator::new(seed + 3);
2457 let rfx_events: Vec<RfxEvent> = sourcing_projects
2458 .iter()
2459 .map(|proj| {
2460 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2461 rfx_gen.generate(
2462 company_code,
2463 &proj.project_id,
2464 &proj.category_id,
2465 &qualified_vids,
2466 owner_id,
2467 start_date,
2468 50000.0,
2469 )
2470 })
2471 .collect();
2472 stats.rfx_event_count = rfx_events.len();
2473
2474 let mut bid_gen = BidGenerator::new(seed + 4);
2476 let mut all_bids = Vec::new();
2477 for rfx in &rfx_events {
2478 let bidder_count = vendor_ids.len().clamp(2, 5);
2479 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2480 let bids = bid_gen.generate(rfx, &responding, start_date);
2481 all_bids.extend(bids);
2482 }
2483 stats.bid_count = all_bids.len();
2484
2485 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2487 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2488 .iter()
2489 .map(|rfx| {
2490 let rfx_bids: Vec<SupplierBid> = all_bids
2491 .iter()
2492 .filter(|b| b.rfx_id == rfx.rfx_id)
2493 .cloned()
2494 .collect();
2495 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2496 })
2497 .collect();
2498
2499 let mut contract_gen = ContractGenerator::new(seed + 6);
2501 let contracts: Vec<ProcurementContract> = bid_evaluations
2502 .iter()
2503 .zip(rfx_events.iter())
2504 .filter_map(|(eval, rfx)| {
2505 eval.ranked_bids.first().and_then(|winner| {
2506 all_bids
2507 .iter()
2508 .find(|b| b.bid_id == winner.bid_id)
2509 .map(|winning_bid| {
2510 contract_gen.generate_from_bid(
2511 winning_bid,
2512 Some(&rfx.sourcing_project_id),
2513 &rfx.category_id,
2514 owner_id,
2515 start_date,
2516 )
2517 })
2518 })
2519 })
2520 .collect();
2521 stats.contract_count = contracts.len();
2522
2523 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2525 let catalog_items = catalog_gen.generate(&contracts);
2526 stats.catalog_item_count = catalog_items.len();
2527
2528 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2530 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2531 .iter()
2532 .fold(
2533 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2534 |mut acc, c| {
2535 acc.entry(c.vendor_id.clone()).or_default().push(c);
2536 acc
2537 },
2538 )
2539 .into_iter()
2540 .collect();
2541 let scorecards = scorecard_gen.generate(
2542 company_code,
2543 &vendor_contracts,
2544 start_date,
2545 end_date,
2546 owner_id,
2547 );
2548 stats.scorecard_count = scorecards.len();
2549
2550 let mut sourcing_projects = sourcing_projects;
2553 for project in &mut sourcing_projects {
2554 project.rfx_ids = rfx_events
2556 .iter()
2557 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2558 .map(|rfx| rfx.rfx_id.clone())
2559 .collect();
2560
2561 project.contract_id = contracts
2563 .iter()
2564 .find(|c| {
2565 c.sourcing_project_id
2566 .as_deref()
2567 .is_some_and(|sp| sp == project.project_id)
2568 })
2569 .map(|c| c.contract_id.clone());
2570
2571 project.spend_analysis_id = spend_analyses
2573 .iter()
2574 .find(|sa| sa.category_id == project.category_id)
2575 .map(|sa| sa.category_id.clone());
2576 }
2577
2578 info!(
2579 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2580 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2581 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2582 );
2583 self.check_resources_with_log("post-sourcing")?;
2584
2585 Ok(SourcingSnapshot {
2586 spend_analyses,
2587 sourcing_projects,
2588 qualifications,
2589 rfx_events,
2590 bids: all_bids,
2591 bid_evaluations,
2592 contracts,
2593 catalog_items,
2594 scorecards,
2595 })
2596 }
2597
2598 fn phase_intercompany(
2600 &mut self,
2601 stats: &mut EnhancedGenerationStatistics,
2602 ) -> SynthResult<IntercompanySnapshot> {
2603 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2605 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2606 return Ok(IntercompanySnapshot::default());
2607 }
2608
2609 if self.config.companies.len() < 2 {
2611 debug!(
2612 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2613 self.config.companies.len()
2614 );
2615 return Ok(IntercompanySnapshot::default());
2616 }
2617
2618 info!("Phase 14b: Generating Intercompany Transactions");
2619
2620 let seed = self.seed;
2621 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2622 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2623 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2624
2625 let parent_code = self.config.companies[0].code.clone();
2628 let mut ownership_structure =
2629 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2630
2631 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2632 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2633 format!("REL{:03}", i + 1),
2634 parent_code.clone(),
2635 company.code.clone(),
2636 rust_decimal::Decimal::from(100), start_date,
2638 );
2639 ownership_structure.add_relationship(relationship);
2640 }
2641
2642 let tp_method = match self.config.intercompany.transfer_pricing_method {
2644 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2645 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2646 }
2647 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2648 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2649 }
2650 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2651 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2652 }
2653 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2654 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2655 }
2656 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2657 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2658 }
2659 };
2660
2661 let ic_currency = self
2663 .config
2664 .companies
2665 .first()
2666 .map(|c| c.currency.clone())
2667 .unwrap_or_else(|| "USD".to_string());
2668 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2669 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2670 transfer_pricing_method: tp_method,
2671 markup_percent: rust_decimal::Decimal::from_f64_retain(
2672 self.config.intercompany.markup_percent,
2673 )
2674 .unwrap_or(rust_decimal::Decimal::from(5)),
2675 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2676 default_currency: ic_currency,
2677 ..Default::default()
2678 };
2679
2680 let mut ic_generator = datasynth_generators::ICGenerator::new(
2682 ic_gen_config,
2683 ownership_structure.clone(),
2684 seed + 50,
2685 );
2686
2687 let transactions_per_day = 3;
2690 let matched_pairs = ic_generator.generate_transactions_for_period(
2691 start_date,
2692 end_date,
2693 transactions_per_day,
2694 );
2695
2696 let mut seller_entries = Vec::new();
2698 let mut buyer_entries = Vec::new();
2699 let fiscal_year = start_date.year();
2700
2701 for pair in &matched_pairs {
2702 let fiscal_period = pair.posting_date.month();
2703 let (seller_je, buyer_je) =
2704 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2705 seller_entries.push(seller_je);
2706 buyer_entries.push(buyer_je);
2707 }
2708
2709 let matching_config = datasynth_generators::ICMatchingConfig {
2711 base_currency: self
2712 .config
2713 .companies
2714 .first()
2715 .map(|c| c.currency.clone())
2716 .unwrap_or_else(|| "USD".to_string()),
2717 ..Default::default()
2718 };
2719 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2720 matching_engine.load_matched_pairs(&matched_pairs);
2721 let matching_result = matching_engine.run_matching(end_date);
2722
2723 let mut elimination_entries = Vec::new();
2725 if self.config.intercompany.generate_eliminations {
2726 let elim_config = datasynth_generators::EliminationConfig {
2727 consolidation_entity: "GROUP".to_string(),
2728 base_currency: self
2729 .config
2730 .companies
2731 .first()
2732 .map(|c| c.currency.clone())
2733 .unwrap_or_else(|| "USD".to_string()),
2734 ..Default::default()
2735 };
2736
2737 let mut elim_generator =
2738 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2739
2740 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2741 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2742 matching_result
2743 .matched_balances
2744 .iter()
2745 .chain(matching_result.unmatched_balances.iter())
2746 .cloned()
2747 .collect();
2748
2749 let journal = elim_generator.generate_eliminations(
2750 &fiscal_period,
2751 end_date,
2752 &all_balances,
2753 &matched_pairs,
2754 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2757
2758 elimination_entries = journal.entries.clone();
2759 }
2760
2761 let matched_pair_count = matched_pairs.len();
2762 let elimination_entry_count = elimination_entries.len();
2763 let match_rate = matching_result.match_rate;
2764
2765 stats.ic_matched_pair_count = matched_pair_count;
2766 stats.ic_elimination_count = elimination_entry_count;
2767 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2768
2769 info!(
2770 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2771 matched_pair_count,
2772 stats.ic_transaction_count,
2773 seller_entries.len(),
2774 buyer_entries.len(),
2775 elimination_entry_count,
2776 match_rate * 100.0
2777 );
2778 self.check_resources_with_log("post-intercompany")?;
2779
2780 Ok(IntercompanySnapshot {
2781 matched_pairs,
2782 seller_journal_entries: seller_entries,
2783 buyer_journal_entries: buyer_entries,
2784 elimination_entries,
2785 matched_pair_count,
2786 elimination_entry_count,
2787 match_rate,
2788 })
2789 }
2790
2791 fn phase_financial_reporting(
2793 &mut self,
2794 document_flows: &DocumentFlowSnapshot,
2795 journal_entries: &[JournalEntry],
2796 coa: &Arc<ChartOfAccounts>,
2797 stats: &mut EnhancedGenerationStatistics,
2798 ) -> SynthResult<FinancialReportingSnapshot> {
2799 let fs_enabled = self.phase_config.generate_financial_statements
2800 || self.config.financial_reporting.enabled;
2801 let br_enabled = self.phase_config.generate_bank_reconciliation;
2802
2803 if !fs_enabled && !br_enabled {
2804 debug!("Phase 15: Skipped (financial reporting disabled)");
2805 return Ok(FinancialReportingSnapshot::default());
2806 }
2807
2808 info!("Phase 15: Generating Financial Reporting Data");
2809
2810 let seed = self.seed;
2811 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2812 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2813
2814 let mut financial_statements = Vec::new();
2815 let mut bank_reconciliations = Vec::new();
2816 let mut trial_balances = Vec::new();
2817
2818 if fs_enabled {
2826 let company_code = self
2827 .config
2828 .companies
2829 .first()
2830 .map(|c| c.code.as_str())
2831 .unwrap_or("1000");
2832 let currency = self
2833 .config
2834 .companies
2835 .first()
2836 .map(|c| c.currency.as_str())
2837 .unwrap_or("USD");
2838 let has_journal_entries = !journal_entries.is_empty();
2839
2840 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2843
2844 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2846 None;
2847
2848 for period in 0..self.config.global.period_months {
2850 let period_start = start_date + chrono::Months::new(period);
2851 let period_end =
2852 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2853 let fiscal_year = period_end.year() as u16;
2854 let fiscal_period = period_end.month() as u8;
2855
2856 if has_journal_entries {
2857 let tb_entries = Self::build_cumulative_trial_balance(
2860 journal_entries,
2861 coa,
2862 company_code,
2863 start_date,
2864 period_end,
2865 fiscal_year,
2866 fiscal_period,
2867 );
2868
2869 let prior_ref = prior_cumulative_tb.as_deref();
2872 let stmts = fs_gen.generate(
2873 company_code,
2874 currency,
2875 &tb_entries,
2876 period_start,
2877 period_end,
2878 fiscal_year,
2879 fiscal_period,
2880 prior_ref,
2881 "SYS-AUTOCLOSE",
2882 );
2883
2884 for stmt in stmts {
2886 if stmt.statement_type == StatementType::CashFlowStatement {
2887 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2889 let cf_items = Self::build_cash_flow_from_trial_balances(
2890 &tb_entries,
2891 prior_ref,
2892 net_income,
2893 );
2894 financial_statements.push(FinancialStatement {
2895 cash_flow_items: cf_items,
2896 ..stmt
2897 });
2898 } else {
2899 financial_statements.push(stmt);
2900 }
2901 }
2902
2903 trial_balances.push(PeriodTrialBalance {
2905 fiscal_year,
2906 fiscal_period,
2907 period_start,
2908 period_end,
2909 entries: tb_entries.clone(),
2910 });
2911
2912 prior_cumulative_tb = Some(tb_entries);
2914 } else {
2915 let tb_entries = Self::build_trial_balance_from_entries(
2918 journal_entries,
2919 coa,
2920 company_code,
2921 fiscal_year,
2922 fiscal_period,
2923 );
2924
2925 let stmts = fs_gen.generate(
2926 company_code,
2927 currency,
2928 &tb_entries,
2929 period_start,
2930 period_end,
2931 fiscal_year,
2932 fiscal_period,
2933 None,
2934 "SYS-AUTOCLOSE",
2935 );
2936 financial_statements.extend(stmts);
2937
2938 if !tb_entries.is_empty() {
2940 trial_balances.push(PeriodTrialBalance {
2941 fiscal_year,
2942 fiscal_period,
2943 period_start,
2944 period_end,
2945 entries: tb_entries,
2946 });
2947 }
2948 }
2949 }
2950 stats.financial_statement_count = financial_statements.len();
2951 info!(
2952 "Financial statements generated: {} statements (JE-derived: {})",
2953 stats.financial_statement_count, has_journal_entries
2954 );
2955 }
2956
2957 if br_enabled && !document_flows.payments.is_empty() {
2959 let employee_ids: Vec<String> = self
2960 .master_data
2961 .employees
2962 .iter()
2963 .map(|e| e.employee_id.clone())
2964 .collect();
2965 let mut br_gen =
2966 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2967
2968 for company in &self.config.companies {
2970 let company_payments: Vec<PaymentReference> = document_flows
2971 .payments
2972 .iter()
2973 .filter(|p| p.header.company_code == company.code)
2974 .map(|p| PaymentReference {
2975 id: p.header.document_id.clone(),
2976 amount: if p.is_vendor { p.amount } else { -p.amount },
2977 date: p.header.document_date,
2978 reference: p
2979 .check_number
2980 .clone()
2981 .or_else(|| p.wire_reference.clone())
2982 .unwrap_or_else(|| p.header.document_id.clone()),
2983 })
2984 .collect();
2985
2986 if company_payments.is_empty() {
2987 continue;
2988 }
2989
2990 let bank_account_id = format!("{}-MAIN", company.code);
2991
2992 for period in 0..self.config.global.period_months {
2994 let period_start = start_date + chrono::Months::new(period);
2995 let period_end =
2996 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2997
2998 let period_payments: Vec<PaymentReference> = company_payments
2999 .iter()
3000 .filter(|p| p.date >= period_start && p.date <= period_end)
3001 .cloned()
3002 .collect();
3003
3004 let recon = br_gen.generate(
3005 &company.code,
3006 &bank_account_id,
3007 period_start,
3008 period_end,
3009 &company.currency,
3010 &period_payments,
3011 );
3012 bank_reconciliations.push(recon);
3013 }
3014 }
3015 info!(
3016 "Bank reconciliations generated: {} reconciliations",
3017 bank_reconciliations.len()
3018 );
3019 }
3020
3021 stats.bank_reconciliation_count = bank_reconciliations.len();
3022 self.check_resources_with_log("post-financial-reporting")?;
3023
3024 if !trial_balances.is_empty() {
3025 info!(
3026 "Period-close trial balances captured: {} periods",
3027 trial_balances.len()
3028 );
3029 }
3030
3031 Ok(FinancialReportingSnapshot {
3032 financial_statements,
3033 bank_reconciliations,
3034 trial_balances,
3035 })
3036 }
3037
3038 fn build_trial_balance_from_entries(
3044 journal_entries: &[JournalEntry],
3045 coa: &ChartOfAccounts,
3046 company_code: &str,
3047 fiscal_year: u16,
3048 fiscal_period: u8,
3049 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3050 use rust_decimal::Decimal;
3051
3052 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3054 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3055
3056 for je in journal_entries {
3057 if je.header.company_code != company_code
3059 || je.header.fiscal_year != fiscal_year
3060 || je.header.fiscal_period != fiscal_period
3061 {
3062 continue;
3063 }
3064
3065 for line in &je.lines {
3066 let acct = &line.gl_account;
3067 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3068 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3069 }
3070 }
3071
3072 let mut all_accounts: Vec<&String> = account_debits
3074 .keys()
3075 .chain(account_credits.keys())
3076 .collect::<std::collections::HashSet<_>>()
3077 .into_iter()
3078 .collect();
3079 all_accounts.sort();
3080
3081 let mut entries = Vec::new();
3082
3083 for acct_number in all_accounts {
3084 let debit = account_debits
3085 .get(acct_number)
3086 .copied()
3087 .unwrap_or(Decimal::ZERO);
3088 let credit = account_credits
3089 .get(acct_number)
3090 .copied()
3091 .unwrap_or(Decimal::ZERO);
3092
3093 if debit.is_zero() && credit.is_zero() {
3094 continue;
3095 }
3096
3097 let account_name = coa
3099 .get_account(acct_number)
3100 .map(|gl| gl.short_description.clone())
3101 .unwrap_or_else(|| format!("Account {}", acct_number));
3102
3103 let category = Self::category_from_account_code(acct_number);
3108
3109 entries.push(datasynth_generators::TrialBalanceEntry {
3110 account_code: acct_number.clone(),
3111 account_name,
3112 category,
3113 debit_balance: debit,
3114 credit_balance: credit,
3115 });
3116 }
3117
3118 entries
3119 }
3120
3121 fn build_cumulative_trial_balance(
3128 journal_entries: &[JournalEntry],
3129 coa: &ChartOfAccounts,
3130 company_code: &str,
3131 start_date: NaiveDate,
3132 period_end: NaiveDate,
3133 fiscal_year: u16,
3134 fiscal_period: u8,
3135 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3136 use rust_decimal::Decimal;
3137
3138 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3140 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3141
3142 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3144 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3145
3146 for je in journal_entries {
3147 if je.header.company_code != company_code {
3148 continue;
3149 }
3150
3151 for line in &je.lines {
3152 let acct = &line.gl_account;
3153 let category = Self::category_from_account_code(acct);
3154 let is_bs_account = matches!(
3155 category.as_str(),
3156 "Cash"
3157 | "Receivables"
3158 | "Inventory"
3159 | "FixedAssets"
3160 | "Payables"
3161 | "AccruedLiabilities"
3162 | "LongTermDebt"
3163 | "Equity"
3164 );
3165
3166 if is_bs_account {
3167 if je.header.document_date <= period_end
3169 && je.header.document_date >= start_date
3170 {
3171 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3172 line.debit_amount;
3173 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3174 line.credit_amount;
3175 }
3176 } else {
3177 if je.header.fiscal_year == fiscal_year
3179 && je.header.fiscal_period == fiscal_period
3180 {
3181 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3182 line.debit_amount;
3183 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3184 line.credit_amount;
3185 }
3186 }
3187 }
3188 }
3189
3190 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3192 all_accounts.extend(bs_debits.keys().cloned());
3193 all_accounts.extend(bs_credits.keys().cloned());
3194 all_accounts.extend(is_debits.keys().cloned());
3195 all_accounts.extend(is_credits.keys().cloned());
3196
3197 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3198 sorted_accounts.sort();
3199
3200 let mut entries = Vec::new();
3201
3202 for acct_number in &sorted_accounts {
3203 let category = Self::category_from_account_code(acct_number);
3204 let is_bs_account = matches!(
3205 category.as_str(),
3206 "Cash"
3207 | "Receivables"
3208 | "Inventory"
3209 | "FixedAssets"
3210 | "Payables"
3211 | "AccruedLiabilities"
3212 | "LongTermDebt"
3213 | "Equity"
3214 );
3215
3216 let (debit, credit) = if is_bs_account {
3217 (
3218 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3219 bs_credits
3220 .get(acct_number)
3221 .copied()
3222 .unwrap_or(Decimal::ZERO),
3223 )
3224 } else {
3225 (
3226 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3227 is_credits
3228 .get(acct_number)
3229 .copied()
3230 .unwrap_or(Decimal::ZERO),
3231 )
3232 };
3233
3234 if debit.is_zero() && credit.is_zero() {
3235 continue;
3236 }
3237
3238 let account_name = coa
3239 .get_account(acct_number)
3240 .map(|gl| gl.short_description.clone())
3241 .unwrap_or_else(|| format!("Account {}", acct_number));
3242
3243 entries.push(datasynth_generators::TrialBalanceEntry {
3244 account_code: acct_number.clone(),
3245 account_name,
3246 category,
3247 debit_balance: debit,
3248 credit_balance: credit,
3249 });
3250 }
3251
3252 entries
3253 }
3254
3255 fn build_cash_flow_from_trial_balances(
3260 current_tb: &[datasynth_generators::TrialBalanceEntry],
3261 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3262 net_income: rust_decimal::Decimal,
3263 ) -> Vec<CashFlowItem> {
3264 use rust_decimal::Decimal;
3265
3266 let aggregate =
3268 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3269 let mut map: HashMap<String, Decimal> = HashMap::new();
3270 for entry in tb {
3271 let net = entry.debit_balance - entry.credit_balance;
3272 *map.entry(entry.category.clone()).or_default() += net;
3273 }
3274 map
3275 };
3276
3277 let current = aggregate(current_tb);
3278 let prior = prior_tb.map(aggregate);
3279
3280 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3282 *map.get(key).unwrap_or(&Decimal::ZERO)
3283 };
3284
3285 let change = |key: &str| -> Decimal {
3287 let curr = get(¤t, key);
3288 match &prior {
3289 Some(p) => curr - get(p, key),
3290 None => curr,
3291 }
3292 };
3293
3294 let fixed_asset_change = change("FixedAssets");
3297 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3298 -fixed_asset_change
3299 } else {
3300 Decimal::ZERO
3301 };
3302
3303 let ar_change = change("Receivables");
3305 let inventory_change = change("Inventory");
3306 let ap_change = change("Payables");
3308 let accrued_change = change("AccruedLiabilities");
3309
3310 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3311 + (-ap_change)
3312 + (-accrued_change);
3313
3314 let capex = if fixed_asset_change > Decimal::ZERO {
3316 -fixed_asset_change
3317 } else {
3318 Decimal::ZERO
3319 };
3320 let investing_cf = capex;
3321
3322 let debt_change = -change("LongTermDebt");
3324 let equity_change = -change("Equity");
3325 let financing_cf = debt_change + equity_change;
3326
3327 let net_change = operating_cf + investing_cf + financing_cf;
3328
3329 vec![
3330 CashFlowItem {
3331 item_code: "CF-NI".to_string(),
3332 label: "Net Income".to_string(),
3333 category: CashFlowCategory::Operating,
3334 amount: net_income,
3335 amount_prior: None,
3336 sort_order: 1,
3337 is_total: false,
3338 },
3339 CashFlowItem {
3340 item_code: "CF-DEP".to_string(),
3341 label: "Depreciation & Amortization".to_string(),
3342 category: CashFlowCategory::Operating,
3343 amount: depreciation_addback,
3344 amount_prior: None,
3345 sort_order: 2,
3346 is_total: false,
3347 },
3348 CashFlowItem {
3349 item_code: "CF-AR".to_string(),
3350 label: "Change in Accounts Receivable".to_string(),
3351 category: CashFlowCategory::Operating,
3352 amount: -ar_change,
3353 amount_prior: None,
3354 sort_order: 3,
3355 is_total: false,
3356 },
3357 CashFlowItem {
3358 item_code: "CF-AP".to_string(),
3359 label: "Change in Accounts Payable".to_string(),
3360 category: CashFlowCategory::Operating,
3361 amount: -ap_change,
3362 amount_prior: None,
3363 sort_order: 4,
3364 is_total: false,
3365 },
3366 CashFlowItem {
3367 item_code: "CF-INV".to_string(),
3368 label: "Change in Inventory".to_string(),
3369 category: CashFlowCategory::Operating,
3370 amount: -inventory_change,
3371 amount_prior: None,
3372 sort_order: 5,
3373 is_total: false,
3374 },
3375 CashFlowItem {
3376 item_code: "CF-OP".to_string(),
3377 label: "Net Cash from Operating Activities".to_string(),
3378 category: CashFlowCategory::Operating,
3379 amount: operating_cf,
3380 amount_prior: None,
3381 sort_order: 6,
3382 is_total: true,
3383 },
3384 CashFlowItem {
3385 item_code: "CF-CAPEX".to_string(),
3386 label: "Capital Expenditures".to_string(),
3387 category: CashFlowCategory::Investing,
3388 amount: capex,
3389 amount_prior: None,
3390 sort_order: 7,
3391 is_total: false,
3392 },
3393 CashFlowItem {
3394 item_code: "CF-INV-T".to_string(),
3395 label: "Net Cash from Investing Activities".to_string(),
3396 category: CashFlowCategory::Investing,
3397 amount: investing_cf,
3398 amount_prior: None,
3399 sort_order: 8,
3400 is_total: true,
3401 },
3402 CashFlowItem {
3403 item_code: "CF-DEBT".to_string(),
3404 label: "Net Borrowings / (Repayments)".to_string(),
3405 category: CashFlowCategory::Financing,
3406 amount: debt_change,
3407 amount_prior: None,
3408 sort_order: 9,
3409 is_total: false,
3410 },
3411 CashFlowItem {
3412 item_code: "CF-EQ".to_string(),
3413 label: "Equity Changes".to_string(),
3414 category: CashFlowCategory::Financing,
3415 amount: equity_change,
3416 amount_prior: None,
3417 sort_order: 10,
3418 is_total: false,
3419 },
3420 CashFlowItem {
3421 item_code: "CF-FIN-T".to_string(),
3422 label: "Net Cash from Financing Activities".to_string(),
3423 category: CashFlowCategory::Financing,
3424 amount: financing_cf,
3425 amount_prior: None,
3426 sort_order: 11,
3427 is_total: true,
3428 },
3429 CashFlowItem {
3430 item_code: "CF-NET".to_string(),
3431 label: "Net Change in Cash".to_string(),
3432 category: CashFlowCategory::Operating,
3433 amount: net_change,
3434 amount_prior: None,
3435 sort_order: 12,
3436 is_total: true,
3437 },
3438 ]
3439 }
3440
3441 fn calculate_net_income_from_tb(
3445 tb: &[datasynth_generators::TrialBalanceEntry],
3446 ) -> rust_decimal::Decimal {
3447 use rust_decimal::Decimal;
3448
3449 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3450 for entry in tb {
3451 let net = entry.debit_balance - entry.credit_balance;
3452 *aggregated.entry(entry.category.clone()).or_default() += net;
3453 }
3454
3455 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3456 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3457 let opex = *aggregated
3458 .get("OperatingExpenses")
3459 .unwrap_or(&Decimal::ZERO);
3460 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3461 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3462
3463 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3466 let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3467 let tax = operating_income * tax_rate;
3468 operating_income - tax
3469 }
3470
3471 fn category_from_account_code(code: &str) -> String {
3478 let prefix: String = code.chars().take(2).collect();
3479 match prefix.as_str() {
3480 "10" => "Cash",
3481 "11" => "Receivables",
3482 "12" | "13" | "14" => "Inventory",
3483 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3484 "20" => "Payables",
3485 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3486 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3487 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3488 "40" | "41" | "42" | "43" | "44" => "Revenue",
3489 "50" | "51" | "52" => "CostOfSales",
3490 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3491 "OperatingExpenses"
3492 }
3493 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3494 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3495 _ => "OperatingExpenses",
3496 }
3497 .to_string()
3498 }
3499
3500 fn phase_hr_data(
3502 &mut self,
3503 stats: &mut EnhancedGenerationStatistics,
3504 ) -> SynthResult<HrSnapshot> {
3505 if !self.config.hr.enabled {
3506 debug!("Phase 16: Skipped (HR generation disabled)");
3507 return Ok(HrSnapshot::default());
3508 }
3509
3510 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3511
3512 let seed = self.seed;
3513 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3514 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3515 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3516 let company_code = self
3517 .config
3518 .companies
3519 .first()
3520 .map(|c| c.code.as_str())
3521 .unwrap_or("1000");
3522 let currency = self
3523 .config
3524 .companies
3525 .first()
3526 .map(|c| c.currency.as_str())
3527 .unwrap_or("USD");
3528
3529 let employee_ids: Vec<String> = self
3530 .master_data
3531 .employees
3532 .iter()
3533 .map(|e| e.employee_id.clone())
3534 .collect();
3535
3536 if employee_ids.is_empty() {
3537 debug!("Phase 16: Skipped (no employees available)");
3538 return Ok(HrSnapshot::default());
3539 }
3540
3541 let cost_center_ids: Vec<String> = self
3544 .master_data
3545 .employees
3546 .iter()
3547 .filter_map(|e| e.cost_center.clone())
3548 .collect::<std::collections::HashSet<_>>()
3549 .into_iter()
3550 .collect();
3551
3552 let mut snapshot = HrSnapshot::default();
3553
3554 if self.config.hr.payroll.enabled {
3556 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3557 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3558
3559 let payroll_pack = self.primary_pack();
3561
3562 payroll_gen.set_country_pack(payroll_pack.clone());
3565
3566 let employees_with_salary: Vec<(
3567 String,
3568 rust_decimal::Decimal,
3569 Option<String>,
3570 Option<String>,
3571 )> = self
3572 .master_data
3573 .employees
3574 .iter()
3575 .map(|e| {
3576 (
3577 e.employee_id.clone(),
3578 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3580 e.department_id.clone(),
3581 )
3582 })
3583 .collect();
3584
3585 for month in 0..self.config.global.period_months {
3586 let period_start = start_date + chrono::Months::new(month);
3587 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3588 let (run, items) = payroll_gen.generate(
3589 company_code,
3590 &employees_with_salary,
3591 period_start,
3592 period_end,
3593 currency,
3594 );
3595 snapshot.payroll_runs.push(run);
3596 snapshot.payroll_run_count += 1;
3597 snapshot.payroll_line_item_count += items.len();
3598 snapshot.payroll_line_items.extend(items);
3599 }
3600 }
3601
3602 if self.config.hr.time_attendance.enabled {
3604 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3605 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3606 let entries = time_gen.generate(
3607 &employee_ids,
3608 start_date,
3609 end_date,
3610 &self.config.hr.time_attendance,
3611 );
3612 snapshot.time_entry_count = entries.len();
3613 snapshot.time_entries = entries;
3614 }
3615
3616 if self.config.hr.expenses.enabled {
3618 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3619 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3620 let company_currency = self
3621 .config
3622 .companies
3623 .first()
3624 .map(|c| c.currency.as_str())
3625 .unwrap_or("USD");
3626 let reports = expense_gen.generate_with_currency(
3627 &employee_ids,
3628 start_date,
3629 end_date,
3630 &self.config.hr.expenses,
3631 company_currency,
3632 );
3633 snapshot.expense_report_count = reports.len();
3634 snapshot.expense_reports = reports;
3635 }
3636
3637 stats.payroll_run_count = snapshot.payroll_run_count;
3638 stats.time_entry_count = snapshot.time_entry_count;
3639 stats.expense_report_count = snapshot.expense_report_count;
3640
3641 info!(
3642 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3643 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3644 snapshot.time_entry_count, snapshot.expense_report_count
3645 );
3646 self.check_resources_with_log("post-hr")?;
3647
3648 Ok(snapshot)
3649 }
3650
3651 fn phase_accounting_standards(
3653 &mut self,
3654 stats: &mut EnhancedGenerationStatistics,
3655 ) -> SynthResult<AccountingStandardsSnapshot> {
3656 if !self.phase_config.generate_accounting_standards
3657 || !self.config.accounting_standards.enabled
3658 {
3659 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3660 return Ok(AccountingStandardsSnapshot::default());
3661 }
3662 info!("Phase 17: Generating Accounting Standards Data");
3663
3664 let seed = self.seed;
3665 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3666 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3667 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3668 let company_code = self
3669 .config
3670 .companies
3671 .first()
3672 .map(|c| c.code.as_str())
3673 .unwrap_or("1000");
3674 let currency = self
3675 .config
3676 .companies
3677 .first()
3678 .map(|c| c.currency.as_str())
3679 .unwrap_or("USD");
3680
3681 let framework = match self.config.accounting_standards.framework {
3686 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3687 datasynth_standards::framework::AccountingFramework::UsGaap
3688 }
3689 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3690 datasynth_standards::framework::AccountingFramework::Ifrs
3691 }
3692 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3693 datasynth_standards::framework::AccountingFramework::DualReporting
3694 }
3695 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3696 datasynth_standards::framework::AccountingFramework::FrenchGaap
3697 }
3698 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3699 datasynth_standards::framework::AccountingFramework::GermanGaap
3700 }
3701 None => {
3702 let pack = self.primary_pack();
3704 let pack_fw = pack.accounting.framework.as_str();
3705 match pack_fw {
3706 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3707 "dual_reporting" => {
3708 datasynth_standards::framework::AccountingFramework::DualReporting
3709 }
3710 "french_gaap" => {
3711 datasynth_standards::framework::AccountingFramework::FrenchGaap
3712 }
3713 "german_gaap" | "hgb" => {
3714 datasynth_standards::framework::AccountingFramework::GermanGaap
3715 }
3716 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3718 }
3719 }
3720 };
3721
3722 let mut snapshot = AccountingStandardsSnapshot::default();
3723
3724 if self.config.accounting_standards.revenue_recognition.enabled {
3726 let customer_ids: Vec<String> = self
3727 .master_data
3728 .customers
3729 .iter()
3730 .map(|c| c.customer_id.clone())
3731 .collect();
3732
3733 if !customer_ids.is_empty() {
3734 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3735 let contracts = rev_gen.generate(
3736 company_code,
3737 &customer_ids,
3738 start_date,
3739 end_date,
3740 currency,
3741 &self.config.accounting_standards.revenue_recognition,
3742 framework,
3743 );
3744 snapshot.revenue_contract_count = contracts.len();
3745 snapshot.contracts = contracts;
3746 }
3747 }
3748
3749 if self.config.accounting_standards.impairment.enabled {
3751 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3752 .master_data
3753 .assets
3754 .iter()
3755 .map(|a| {
3756 (
3757 a.asset_id.clone(),
3758 a.description.clone(),
3759 a.acquisition_cost,
3760 )
3761 })
3762 .collect();
3763
3764 if !asset_data.is_empty() {
3765 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3766 let tests = imp_gen.generate(
3767 company_code,
3768 &asset_data,
3769 end_date,
3770 &self.config.accounting_standards.impairment,
3771 framework,
3772 );
3773 snapshot.impairment_test_count = tests.len();
3774 snapshot.impairment_tests = tests;
3775 }
3776 }
3777
3778 stats.revenue_contract_count = snapshot.revenue_contract_count;
3779 stats.impairment_test_count = snapshot.impairment_test_count;
3780
3781 info!(
3782 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3783 snapshot.revenue_contract_count, snapshot.impairment_test_count
3784 );
3785 self.check_resources_with_log("post-accounting-standards")?;
3786
3787 Ok(snapshot)
3788 }
3789
3790 fn phase_manufacturing(
3792 &mut self,
3793 stats: &mut EnhancedGenerationStatistics,
3794 ) -> SynthResult<ManufacturingSnapshot> {
3795 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3796 debug!("Phase 18: Skipped (manufacturing generation disabled)");
3797 return Ok(ManufacturingSnapshot::default());
3798 }
3799 info!("Phase 18: Generating Manufacturing Data");
3800
3801 let seed = self.seed;
3802 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3803 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3804 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3805 let company_code = self
3806 .config
3807 .companies
3808 .first()
3809 .map(|c| c.code.as_str())
3810 .unwrap_or("1000");
3811
3812 let material_data: Vec<(String, String)> = self
3813 .master_data
3814 .materials
3815 .iter()
3816 .map(|m| (m.material_id.clone(), m.description.clone()))
3817 .collect();
3818
3819 if material_data.is_empty() {
3820 debug!("Phase 18: Skipped (no materials available)");
3821 return Ok(ManufacturingSnapshot::default());
3822 }
3823
3824 let mut snapshot = ManufacturingSnapshot::default();
3825
3826 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3828 let production_orders = prod_gen.generate(
3829 company_code,
3830 &material_data,
3831 start_date,
3832 end_date,
3833 &self.config.manufacturing.production_orders,
3834 &self.config.manufacturing.costing,
3835 &self.config.manufacturing.routing,
3836 );
3837 snapshot.production_order_count = production_orders.len();
3838
3839 let inspection_data: Vec<(String, String, String)> = production_orders
3841 .iter()
3842 .map(|po| {
3843 (
3844 po.order_id.clone(),
3845 po.material_id.clone(),
3846 po.material_description.clone(),
3847 )
3848 })
3849 .collect();
3850
3851 snapshot.production_orders = production_orders;
3852
3853 if !inspection_data.is_empty() {
3854 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3855 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3856 snapshot.quality_inspection_count = inspections.len();
3857 snapshot.quality_inspections = inspections;
3858 }
3859
3860 let storage_locations: Vec<(String, String)> = material_data
3862 .iter()
3863 .enumerate()
3864 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3865 .collect();
3866
3867 let employee_ids: Vec<String> = self
3868 .master_data
3869 .employees
3870 .iter()
3871 .map(|e| e.employee_id.clone())
3872 .collect();
3873 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3874 .with_employee_pool(employee_ids);
3875 let mut cycle_count_total = 0usize;
3876 for month in 0..self.config.global.period_months {
3877 let count_date = start_date + chrono::Months::new(month);
3878 let items_per_count = storage_locations.len().clamp(10, 50);
3879 let cc = cc_gen.generate(
3880 company_code,
3881 &storage_locations,
3882 count_date,
3883 items_per_count,
3884 );
3885 snapshot.cycle_counts.push(cc);
3886 cycle_count_total += 1;
3887 }
3888 snapshot.cycle_count_count = cycle_count_total;
3889
3890 stats.production_order_count = snapshot.production_order_count;
3891 stats.quality_inspection_count = snapshot.quality_inspection_count;
3892 stats.cycle_count_count = snapshot.cycle_count_count;
3893
3894 info!(
3895 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3896 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3897 );
3898 self.check_resources_with_log("post-manufacturing")?;
3899
3900 Ok(snapshot)
3901 }
3902
3903 fn phase_sales_kpi_budgets(
3905 &mut self,
3906 coa: &Arc<ChartOfAccounts>,
3907 financial_reporting: &FinancialReportingSnapshot,
3908 stats: &mut EnhancedGenerationStatistics,
3909 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3910 if !self.phase_config.generate_sales_kpi_budgets {
3911 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3912 return Ok(SalesKpiBudgetsSnapshot::default());
3913 }
3914 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3915
3916 let seed = self.seed;
3917 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3918 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3919 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3920 let company_code = self
3921 .config
3922 .companies
3923 .first()
3924 .map(|c| c.code.as_str())
3925 .unwrap_or("1000");
3926
3927 let mut snapshot = SalesKpiBudgetsSnapshot::default();
3928
3929 if self.config.sales_quotes.enabled {
3931 let customer_data: Vec<(String, String)> = self
3932 .master_data
3933 .customers
3934 .iter()
3935 .map(|c| (c.customer_id.clone(), c.name.clone()))
3936 .collect();
3937 let material_data: Vec<(String, String)> = self
3938 .master_data
3939 .materials
3940 .iter()
3941 .map(|m| (m.material_id.clone(), m.description.clone()))
3942 .collect();
3943
3944 if !customer_data.is_empty() && !material_data.is_empty() {
3945 let employee_ids: Vec<String> = self
3946 .master_data
3947 .employees
3948 .iter()
3949 .map(|e| e.employee_id.clone())
3950 .collect();
3951 let customer_ids: Vec<String> = self
3952 .master_data
3953 .customers
3954 .iter()
3955 .map(|c| c.customer_id.clone())
3956 .collect();
3957 let company_currency = self
3958 .config
3959 .companies
3960 .first()
3961 .map(|c| c.currency.as_str())
3962 .unwrap_or("USD");
3963
3964 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3965 .with_pools(employee_ids, customer_ids);
3966 let quotes = quote_gen.generate_with_currency(
3967 company_code,
3968 &customer_data,
3969 &material_data,
3970 start_date,
3971 end_date,
3972 &self.config.sales_quotes,
3973 company_currency,
3974 );
3975 snapshot.sales_quote_count = quotes.len();
3976 snapshot.sales_quotes = quotes;
3977 }
3978 }
3979
3980 if self.config.financial_reporting.management_kpis.enabled {
3982 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3983 let mut kpis = kpi_gen.generate(
3984 company_code,
3985 start_date,
3986 end_date,
3987 &self.config.financial_reporting.management_kpis,
3988 );
3989
3990 {
3992 use rust_decimal::Decimal;
3993
3994 if let Some(income_stmt) =
3995 financial_reporting.financial_statements.iter().find(|fs| {
3996 fs.statement_type == StatementType::IncomeStatement
3997 && fs.company_code == company_code
3998 })
3999 {
4000 let total_revenue: Decimal = income_stmt
4002 .line_items
4003 .iter()
4004 .filter(|li| li.section.contains("Revenue") && !li.is_total)
4005 .map(|li| li.amount)
4006 .sum();
4007 let total_cogs: Decimal = income_stmt
4008 .line_items
4009 .iter()
4010 .filter(|li| {
4011 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4012 && !li.is_total
4013 })
4014 .map(|li| li.amount.abs())
4015 .sum();
4016 let total_opex: Decimal = income_stmt
4017 .line_items
4018 .iter()
4019 .filter(|li| {
4020 li.section.contains("Expense")
4021 && !li.is_total
4022 && !li.section.contains("Cost")
4023 })
4024 .map(|li| li.amount.abs())
4025 .sum();
4026
4027 if total_revenue > Decimal::ZERO {
4028 let hundred = Decimal::from(100);
4029 let gross_margin_pct =
4030 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4031 let operating_income = total_revenue - total_cogs - total_opex;
4032 let op_margin_pct =
4033 (operating_income * hundred / total_revenue).round_dp(2);
4034
4035 for kpi in &mut kpis {
4037 if kpi.name == "Gross Margin" {
4038 kpi.value = gross_margin_pct;
4039 } else if kpi.name == "Operating Margin" {
4040 kpi.value = op_margin_pct;
4041 }
4042 }
4043 }
4044 }
4045
4046 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4048 fs.statement_type == StatementType::BalanceSheet
4049 && fs.company_code == company_code
4050 }) {
4051 let current_assets: Decimal = bs
4052 .line_items
4053 .iter()
4054 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4055 .map(|li| li.amount)
4056 .sum();
4057 let current_liabilities: Decimal = bs
4058 .line_items
4059 .iter()
4060 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4061 .map(|li| li.amount.abs())
4062 .sum();
4063
4064 if current_liabilities > Decimal::ZERO {
4065 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4066 for kpi in &mut kpis {
4067 if kpi.name == "Current Ratio" {
4068 kpi.value = current_ratio;
4069 }
4070 }
4071 }
4072 }
4073 }
4074
4075 snapshot.kpi_count = kpis.len();
4076 snapshot.kpis = kpis;
4077 }
4078
4079 if self.config.financial_reporting.budgets.enabled {
4081 let account_data: Vec<(String, String)> = coa
4082 .accounts
4083 .iter()
4084 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4085 .collect();
4086
4087 if !account_data.is_empty() {
4088 let fiscal_year = start_date.year() as u32;
4089 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4090 let budget = budget_gen.generate(
4091 company_code,
4092 fiscal_year,
4093 &account_data,
4094 &self.config.financial_reporting.budgets,
4095 );
4096 snapshot.budget_line_count = budget.line_items.len();
4097 snapshot.budgets.push(budget);
4098 }
4099 }
4100
4101 stats.sales_quote_count = snapshot.sales_quote_count;
4102 stats.kpi_count = snapshot.kpi_count;
4103 stats.budget_line_count = snapshot.budget_line_count;
4104
4105 info!(
4106 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4107 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4108 );
4109 self.check_resources_with_log("post-sales-kpi-budgets")?;
4110
4111 Ok(snapshot)
4112 }
4113
4114 fn phase_tax_generation(
4116 &mut self,
4117 document_flows: &DocumentFlowSnapshot,
4118 stats: &mut EnhancedGenerationStatistics,
4119 ) -> SynthResult<TaxSnapshot> {
4120 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4121 debug!("Phase 20: Skipped (tax generation disabled)");
4122 return Ok(TaxSnapshot::default());
4123 }
4124 info!("Phase 20: Generating Tax Data");
4125
4126 let seed = self.seed;
4127 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4128 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4129 let fiscal_year = start_date.year();
4130 let company_code = self
4131 .config
4132 .companies
4133 .first()
4134 .map(|c| c.code.as_str())
4135 .unwrap_or("1000");
4136
4137 let mut gen =
4138 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4139
4140 let pack = self.primary_pack().clone();
4141 let (jurisdictions, codes) =
4142 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4143
4144 let mut provisions = Vec::new();
4146 if self.config.tax.provisions.enabled {
4147 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4148 for company in &self.config.companies {
4149 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4150 let statutory_rate = rust_decimal::Decimal::new(
4151 (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4152 2,
4153 );
4154 let provision = provision_gen.generate(
4155 &company.code,
4156 start_date,
4157 pre_tax_income,
4158 statutory_rate,
4159 );
4160 provisions.push(provision);
4161 }
4162 }
4163
4164 let mut tax_lines = Vec::new();
4166 if !codes.is_empty() {
4167 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4168 datasynth_generators::TaxLineGeneratorConfig::default(),
4169 codes.clone(),
4170 seed + 72,
4171 );
4172
4173 let buyer_country = self
4176 .config
4177 .companies
4178 .first()
4179 .map(|c| c.country.as_str())
4180 .unwrap_or("US");
4181 for vi in &document_flows.vendor_invoices {
4182 let lines = tax_line_gen.generate_for_document(
4183 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4184 &vi.header.document_id,
4185 buyer_country, buyer_country,
4187 vi.payable_amount,
4188 vi.header.document_date,
4189 None,
4190 );
4191 tax_lines.extend(lines);
4192 }
4193
4194 for ci in &document_flows.customer_invoices {
4196 let lines = tax_line_gen.generate_for_document(
4197 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4198 &ci.header.document_id,
4199 buyer_country, buyer_country,
4201 ci.total_gross_amount,
4202 ci.header.document_date,
4203 None,
4204 );
4205 tax_lines.extend(lines);
4206 }
4207 }
4208
4209 let snapshot = TaxSnapshot {
4210 jurisdiction_count: jurisdictions.len(),
4211 code_count: codes.len(),
4212 jurisdictions,
4213 codes,
4214 tax_provisions: provisions,
4215 tax_lines,
4216 tax_returns: Vec::new(),
4217 withholding_records: Vec::new(),
4218 tax_anomaly_labels: Vec::new(),
4219 };
4220
4221 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4222 stats.tax_code_count = snapshot.code_count;
4223 stats.tax_provision_count = snapshot.tax_provisions.len();
4224 stats.tax_line_count = snapshot.tax_lines.len();
4225
4226 info!(
4227 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4228 snapshot.jurisdiction_count,
4229 snapshot.code_count,
4230 snapshot.tax_provisions.len()
4231 );
4232 self.check_resources_with_log("post-tax")?;
4233
4234 Ok(snapshot)
4235 }
4236
4237 fn phase_esg_generation(
4239 &mut self,
4240 document_flows: &DocumentFlowSnapshot,
4241 stats: &mut EnhancedGenerationStatistics,
4242 ) -> SynthResult<EsgSnapshot> {
4243 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4244 debug!("Phase 21: Skipped (ESG generation disabled)");
4245 return Ok(EsgSnapshot::default());
4246 }
4247 info!("Phase 21: Generating ESG Data");
4248
4249 let seed = self.seed;
4250 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4251 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4252 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4253 let entity_id = self
4254 .config
4255 .companies
4256 .first()
4257 .map(|c| c.code.as_str())
4258 .unwrap_or("1000");
4259
4260 let esg_cfg = &self.config.esg;
4261 let mut snapshot = EsgSnapshot::default();
4262
4263 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4265 esg_cfg.environmental.energy.clone(),
4266 seed + 80,
4267 );
4268 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4269
4270 let facility_count = esg_cfg.environmental.energy.facility_count;
4272 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4273 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4274
4275 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4277 seed + 82,
4278 esg_cfg.environmental.waste.diversion_target,
4279 facility_count,
4280 );
4281 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4282
4283 let mut emission_gen =
4285 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4286
4287 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4289 .iter()
4290 .map(|e| datasynth_generators::EnergyInput {
4291 facility_id: e.facility_id.clone(),
4292 energy_type: match e.energy_source {
4293 EnergySourceType::NaturalGas => {
4294 datasynth_generators::EnergyInputType::NaturalGas
4295 }
4296 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4297 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4298 _ => datasynth_generators::EnergyInputType::Electricity,
4299 },
4300 consumption_kwh: e.consumption_kwh,
4301 period: e.period,
4302 })
4303 .collect();
4304
4305 let mut emissions = Vec::new();
4306 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4307 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4308
4309 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4311 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4312 for payment in &document_flows.payments {
4313 if payment.is_vendor {
4314 *totals
4315 .entry(payment.business_partner_id.clone())
4316 .or_default() += payment.amount;
4317 }
4318 }
4319 totals
4320 };
4321 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4322 .master_data
4323 .vendors
4324 .iter()
4325 .map(|v| {
4326 let spend = vendor_payment_totals
4327 .get(&v.vendor_id)
4328 .copied()
4329 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4330 datasynth_generators::VendorSpendInput {
4331 vendor_id: v.vendor_id.clone(),
4332 category: format!("{:?}", v.vendor_type).to_lowercase(),
4333 spend,
4334 country: v.country.clone(),
4335 }
4336 })
4337 .collect();
4338 if !vendor_spend.is_empty() {
4339 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4340 entity_id,
4341 &vendor_spend,
4342 start_date,
4343 end_date,
4344 ));
4345 }
4346
4347 let headcount = self.master_data.employees.len() as u32;
4349 if headcount > 0 {
4350 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4351 emissions.extend(emission_gen.generate_scope3_business_travel(
4352 entity_id,
4353 travel_spend,
4354 start_date,
4355 ));
4356 emissions
4357 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4358 }
4359
4360 snapshot.emission_count = emissions.len();
4361 snapshot.emissions = emissions;
4362 snapshot.energy = energy_records;
4363
4364 let mut workforce_gen =
4366 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4367 let total_headcount = headcount.max(100);
4368 snapshot.diversity =
4369 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4370 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4371 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4372 entity_id,
4373 facility_count,
4374 start_date,
4375 end_date,
4376 );
4377
4378 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4381 entity_id,
4382 &snapshot.safety_incidents,
4383 total_hours,
4384 start_date,
4385 );
4386 snapshot.safety_metrics = vec![safety_metric];
4387
4388 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4390 seed + 85,
4391 esg_cfg.governance.board_size,
4392 esg_cfg.governance.independence_target,
4393 );
4394 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4395
4396 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4398 esg_cfg.supply_chain_esg.clone(),
4399 seed + 86,
4400 );
4401 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4402 .master_data
4403 .vendors
4404 .iter()
4405 .map(|v| datasynth_generators::VendorInput {
4406 vendor_id: v.vendor_id.clone(),
4407 country: v.country.clone(),
4408 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4409 quality_score: None,
4410 })
4411 .collect();
4412 snapshot.supplier_assessments =
4413 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4414
4415 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4417 seed + 87,
4418 esg_cfg.reporting.clone(),
4419 esg_cfg.climate_scenarios.clone(),
4420 );
4421 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4422 snapshot.disclosures = disclosure_gen.generate_disclosures(
4423 entity_id,
4424 &snapshot.materiality,
4425 start_date,
4426 end_date,
4427 );
4428 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4429 snapshot.disclosure_count = snapshot.disclosures.len();
4430
4431 if esg_cfg.anomaly_rate > 0.0 {
4433 let mut anomaly_injector =
4434 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4435 let mut labels = Vec::new();
4436 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4437 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4438 labels.extend(
4439 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4440 );
4441 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4442 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4443 snapshot.anomaly_labels = labels;
4444 }
4445
4446 stats.esg_emission_count = snapshot.emission_count;
4447 stats.esg_disclosure_count = snapshot.disclosure_count;
4448
4449 info!(
4450 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4451 snapshot.emission_count,
4452 snapshot.disclosure_count,
4453 snapshot.supplier_assessments.len()
4454 );
4455 self.check_resources_with_log("post-esg")?;
4456
4457 Ok(snapshot)
4458 }
4459
4460 fn phase_treasury_data(
4462 &mut self,
4463 document_flows: &DocumentFlowSnapshot,
4464 stats: &mut EnhancedGenerationStatistics,
4465 ) -> SynthResult<TreasurySnapshot> {
4466 if !self.config.treasury.enabled {
4467 debug!("Phase 22: Skipped (treasury generation disabled)");
4468 return Ok(TreasurySnapshot::default());
4469 }
4470 info!("Phase 22: Generating Treasury Data");
4471
4472 let seed = self.seed;
4473 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4474 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4475 let currency = self
4476 .config
4477 .companies
4478 .first()
4479 .map(|c| c.currency.as_str())
4480 .unwrap_or("USD");
4481 let entity_id = self
4482 .config
4483 .companies
4484 .first()
4485 .map(|c| c.code.as_str())
4486 .unwrap_or("1000");
4487
4488 let mut snapshot = TreasurySnapshot::default();
4489
4490 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4492 self.config.treasury.debt.clone(),
4493 seed + 90,
4494 );
4495 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4496
4497 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4499 self.config.treasury.hedging.clone(),
4500 seed + 91,
4501 );
4502 for debt in &snapshot.debt_instruments {
4503 if debt.rate_type == InterestRateType::Variable {
4504 let swap = hedge_gen.generate_ir_swap(
4505 currency,
4506 debt.principal,
4507 debt.origination_date,
4508 debt.maturity_date,
4509 );
4510 snapshot.hedging_instruments.push(swap);
4511 }
4512 }
4513
4514 {
4517 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4518 for payment in &document_flows.payments {
4519 if payment.currency != currency {
4520 let entry = fx_map
4521 .entry(payment.currency.clone())
4522 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4523 entry.0 += payment.amount;
4524 if payment.header.document_date > entry.1 {
4526 entry.1 = payment.header.document_date;
4527 }
4528 }
4529 }
4530 if !fx_map.is_empty() {
4531 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4532 .into_iter()
4533 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4534 datasynth_generators::treasury::FxExposure {
4535 currency_pair: format!("{}/{}", foreign_ccy, currency),
4536 foreign_currency: foreign_ccy,
4537 net_amount,
4538 settlement_date,
4539 description: "AP payment FX exposure".to_string(),
4540 }
4541 })
4542 .collect();
4543 let (fx_instruments, fx_relationships) =
4544 hedge_gen.generate(start_date, &fx_exposures);
4545 snapshot.hedging_instruments.extend(fx_instruments);
4546 snapshot.hedge_relationships.extend(fx_relationships);
4547 }
4548 }
4549
4550 if self.config.treasury.anomaly_rate > 0.0 {
4552 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4553 seed + 92,
4554 self.config.treasury.anomaly_rate,
4555 );
4556 let mut labels = Vec::new();
4557 labels.extend(
4558 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4559 );
4560 snapshot.treasury_anomaly_labels = labels;
4561 }
4562
4563 if self.config.treasury.cash_positioning.enabled {
4565 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4566
4567 for payment in &document_flows.payments {
4569 cash_flows.push(datasynth_generators::treasury::CashFlow {
4570 date: payment.header.document_date,
4571 account_id: format!("{}-MAIN", entity_id),
4572 amount: payment.amount,
4573 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4574 });
4575 }
4576
4577 for chain in &document_flows.o2c_chains {
4579 if let Some(ref receipt) = chain.customer_receipt {
4580 cash_flows.push(datasynth_generators::treasury::CashFlow {
4581 date: receipt.header.document_date,
4582 account_id: format!("{}-MAIN", entity_id),
4583 amount: receipt.amount,
4584 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4585 });
4586 }
4587 }
4588
4589 if !cash_flows.is_empty() {
4590 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4591 self.config.treasury.cash_positioning.clone(),
4592 seed + 93,
4593 );
4594 let account_id = format!("{}-MAIN", entity_id);
4595 snapshot.cash_positions = cash_gen.generate(
4596 entity_id,
4597 &account_id,
4598 currency,
4599 &cash_flows,
4600 start_date,
4601 start_date + chrono::Months::new(self.config.global.period_months),
4602 rust_decimal::Decimal::new(1_000_000, 0), );
4604 }
4605 }
4606
4607 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4608 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4609 stats.cash_position_count = snapshot.cash_positions.len();
4610
4611 info!(
4612 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4613 snapshot.debt_instruments.len(),
4614 snapshot.hedging_instruments.len(),
4615 snapshot.cash_positions.len()
4616 );
4617 self.check_resources_with_log("post-treasury")?;
4618
4619 Ok(snapshot)
4620 }
4621
4622 fn phase_project_accounting(
4624 &mut self,
4625 document_flows: &DocumentFlowSnapshot,
4626 hr: &HrSnapshot,
4627 stats: &mut EnhancedGenerationStatistics,
4628 ) -> SynthResult<ProjectAccountingSnapshot> {
4629 if !self.config.project_accounting.enabled {
4630 debug!("Phase 23: Skipped (project accounting disabled)");
4631 return Ok(ProjectAccountingSnapshot::default());
4632 }
4633 info!("Phase 23: Generating Project Accounting Data");
4634
4635 let seed = self.seed;
4636 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4637 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4638 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4639 let company_code = self
4640 .config
4641 .companies
4642 .first()
4643 .map(|c| c.code.as_str())
4644 .unwrap_or("1000");
4645
4646 let mut snapshot = ProjectAccountingSnapshot::default();
4647
4648 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4650 self.config.project_accounting.clone(),
4651 seed + 95,
4652 );
4653 let pool = project_gen.generate(company_code, start_date, end_date);
4654 snapshot.projects = pool.projects.clone();
4655
4656 {
4658 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4659 Vec::new();
4660
4661 for te in &hr.time_entries {
4663 let total_hours = te.hours_regular + te.hours_overtime;
4664 if total_hours > 0.0 {
4665 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4666 id: te.entry_id.clone(),
4667 entity_id: company_code.to_string(),
4668 date: te.date,
4669 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4670 .unwrap_or(rust_decimal::Decimal::ZERO),
4671 source_type: CostSourceType::TimeEntry,
4672 hours: Some(
4673 rust_decimal::Decimal::from_f64_retain(total_hours)
4674 .unwrap_or(rust_decimal::Decimal::ZERO),
4675 ),
4676 });
4677 }
4678 }
4679
4680 for er in &hr.expense_reports {
4682 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4683 id: er.report_id.clone(),
4684 entity_id: company_code.to_string(),
4685 date: er.submission_date,
4686 amount: er.total_amount,
4687 source_type: CostSourceType::ExpenseReport,
4688 hours: None,
4689 });
4690 }
4691
4692 for po in &document_flows.purchase_orders {
4694 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4695 id: po.header.document_id.clone(),
4696 entity_id: company_code.to_string(),
4697 date: po.header.document_date,
4698 amount: po.total_net_amount,
4699 source_type: CostSourceType::PurchaseOrder,
4700 hours: None,
4701 });
4702 }
4703
4704 for vi in &document_flows.vendor_invoices {
4706 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4707 id: vi.header.document_id.clone(),
4708 entity_id: company_code.to_string(),
4709 date: vi.header.document_date,
4710 amount: vi.payable_amount,
4711 source_type: CostSourceType::VendorInvoice,
4712 hours: None,
4713 });
4714 }
4715
4716 if !source_docs.is_empty() && !pool.projects.is_empty() {
4717 let mut cost_gen =
4718 datasynth_generators::project_accounting::ProjectCostGenerator::new(
4719 self.config.project_accounting.cost_allocation.clone(),
4720 seed + 99,
4721 );
4722 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4723 }
4724 }
4725
4726 if self.config.project_accounting.change_orders.enabled {
4728 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4729 self.config.project_accounting.change_orders.clone(),
4730 seed + 96,
4731 );
4732 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4733 }
4734
4735 if self.config.project_accounting.milestones.enabled {
4737 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4738 self.config.project_accounting.milestones.clone(),
4739 seed + 97,
4740 );
4741 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4742 }
4743
4744 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4746 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4747 self.config.project_accounting.earned_value.clone(),
4748 seed + 98,
4749 );
4750 snapshot.earned_value_metrics =
4751 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4752 }
4753
4754 stats.project_count = snapshot.projects.len();
4755 stats.project_change_order_count = snapshot.change_orders.len();
4756 stats.project_cost_line_count = snapshot.cost_lines.len();
4757
4758 info!(
4759 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4760 snapshot.projects.len(),
4761 snapshot.change_orders.len(),
4762 snapshot.milestones.len(),
4763 snapshot.earned_value_metrics.len()
4764 );
4765 self.check_resources_with_log("post-project-accounting")?;
4766
4767 Ok(snapshot)
4768 }
4769
4770 fn phase_opening_balances(
4772 &mut self,
4773 coa: &Arc<ChartOfAccounts>,
4774 stats: &mut EnhancedGenerationStatistics,
4775 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4776 if !self.config.balance.generate_opening_balances {
4777 debug!("Phase 3b: Skipped (opening balance generation disabled)");
4778 return Ok(Vec::new());
4779 }
4780 info!("Phase 3b: Generating Opening Balances");
4781
4782 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4783 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4784 let fiscal_year = start_date.year();
4785
4786 let industry = match self.config.global.industry {
4787 IndustrySector::Manufacturing => IndustryType::Manufacturing,
4788 IndustrySector::Retail => IndustryType::Retail,
4789 IndustrySector::FinancialServices => IndustryType::Financial,
4790 IndustrySector::Healthcare => IndustryType::Healthcare,
4791 IndustrySector::Technology => IndustryType::Technology,
4792 _ => IndustryType::Manufacturing,
4793 };
4794
4795 let config = datasynth_generators::OpeningBalanceConfig {
4796 industry,
4797 ..Default::default()
4798 };
4799 let mut gen =
4800 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4801
4802 let mut results = Vec::new();
4803 for company in &self.config.companies {
4804 let spec = OpeningBalanceSpec::new(
4805 company.code.clone(),
4806 start_date,
4807 fiscal_year,
4808 company.currency.clone(),
4809 rust_decimal::Decimal::new(10_000_000, 0),
4810 industry,
4811 );
4812 let ob = gen.generate(&spec, coa, start_date, &company.code);
4813 results.push(ob);
4814 }
4815
4816 stats.opening_balance_count = results.len();
4817 info!("Opening balances generated: {} companies", results.len());
4818 self.check_resources_with_log("post-opening-balances")?;
4819
4820 Ok(results)
4821 }
4822
4823 fn phase_subledger_reconciliation(
4825 &mut self,
4826 subledger: &SubledgerSnapshot,
4827 entries: &[JournalEntry],
4828 stats: &mut EnhancedGenerationStatistics,
4829 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4830 if !self.config.balance.reconcile_subledgers {
4831 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4832 return Ok(Vec::new());
4833 }
4834 info!("Phase 9b: Reconciling GL to subledger balances");
4835
4836 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4837 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4838 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4839
4840 let tracker_config = BalanceTrackerConfig {
4842 validate_on_each_entry: false,
4843 track_history: false,
4844 fail_on_validation_error: false,
4845 ..Default::default()
4846 };
4847 let recon_currency = self
4848 .config
4849 .companies
4850 .first()
4851 .map(|c| c.currency.clone())
4852 .unwrap_or_else(|| "USD".to_string());
4853 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4854 let _ = tracker.apply_entries(entries);
4855
4856 let mut engine = datasynth_generators::ReconciliationEngine::new(
4857 datasynth_generators::ReconciliationConfig::default(),
4858 );
4859
4860 let mut results = Vec::new();
4861 let company_code = self
4862 .config
4863 .companies
4864 .first()
4865 .map(|c| c.code.as_str())
4866 .unwrap_or("1000");
4867
4868 if !subledger.ar_invoices.is_empty() {
4870 let gl_balance = tracker
4871 .get_account_balance(
4872 company_code,
4873 datasynth_core::accounts::control_accounts::AR_CONTROL,
4874 )
4875 .map(|b| b.closing_balance)
4876 .unwrap_or_default();
4877 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4878 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4879 }
4880
4881 if !subledger.ap_invoices.is_empty() {
4883 let gl_balance = tracker
4884 .get_account_balance(
4885 company_code,
4886 datasynth_core::accounts::control_accounts::AP_CONTROL,
4887 )
4888 .map(|b| b.closing_balance)
4889 .unwrap_or_default();
4890 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4891 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4892 }
4893
4894 if !subledger.fa_records.is_empty() {
4896 let gl_asset_balance = tracker
4897 .get_account_balance(
4898 company_code,
4899 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4900 )
4901 .map(|b| b.closing_balance)
4902 .unwrap_or_default();
4903 let gl_accum_depr_balance = tracker
4904 .get_account_balance(
4905 company_code,
4906 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4907 )
4908 .map(|b| b.closing_balance)
4909 .unwrap_or_default();
4910 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4911 subledger.fa_records.iter().collect();
4912 let (asset_recon, depr_recon) = engine.reconcile_fa(
4913 company_code,
4914 end_date,
4915 gl_asset_balance,
4916 gl_accum_depr_balance,
4917 &fa_refs,
4918 );
4919 results.push(asset_recon);
4920 results.push(depr_recon);
4921 }
4922
4923 if !subledger.inventory_positions.is_empty() {
4925 let gl_balance = tracker
4926 .get_account_balance(
4927 company_code,
4928 datasynth_core::accounts::control_accounts::INVENTORY,
4929 )
4930 .map(|b| b.closing_balance)
4931 .unwrap_or_default();
4932 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4933 subledger.inventory_positions.iter().collect();
4934 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4935 }
4936
4937 stats.subledger_reconciliation_count = results.len();
4938 info!(
4939 "Subledger reconciliation complete: {} reconciliations",
4940 results.len()
4941 );
4942 self.check_resources_with_log("post-subledger-reconciliation")?;
4943
4944 Ok(results)
4945 }
4946
4947 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4949 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4950
4951 let coa_framework = self.resolve_coa_framework();
4952
4953 let mut gen = ChartOfAccountsGenerator::new(
4954 self.config.chart_of_accounts.complexity,
4955 self.config.global.industry,
4956 self.seed,
4957 )
4958 .with_coa_framework(coa_framework);
4959
4960 let coa = Arc::new(gen.generate());
4961 self.coa = Some(Arc::clone(&coa));
4962
4963 if let Some(pb) = pb {
4964 pb.finish_with_message("Chart of Accounts complete");
4965 }
4966
4967 Ok(coa)
4968 }
4969
4970 fn generate_master_data(&mut self) -> SynthResult<()> {
4972 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4973 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4974 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4975
4976 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
4978
4979 let pack = self.primary_pack().clone();
4981
4982 let vendors_per_company = self.phase_config.vendors_per_company;
4984 let customers_per_company = self.phase_config.customers_per_company;
4985 let materials_per_company = self.phase_config.materials_per_company;
4986 let assets_per_company = self.phase_config.assets_per_company;
4987 let coa_framework = self.resolve_coa_framework();
4988
4989 let per_company_results: Vec<_> = self
4992 .config
4993 .companies
4994 .par_iter()
4995 .enumerate()
4996 .map(|(i, company)| {
4997 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
4998 let pack = pack.clone();
4999
5000 let mut vendor_gen = VendorGenerator::new(company_seed);
5002 vendor_gen.set_country_pack(pack.clone());
5003 vendor_gen.set_coa_framework(coa_framework);
5004 let vendor_pool =
5005 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
5006
5007 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5009 customer_gen.set_country_pack(pack.clone());
5010 customer_gen.set_coa_framework(coa_framework);
5011 let customer_pool = customer_gen.generate_customer_pool(
5012 customers_per_company,
5013 &company.code,
5014 start_date,
5015 );
5016
5017 let mut material_gen = MaterialGenerator::new(company_seed + 200);
5019 material_gen.set_country_pack(pack);
5020 let material_pool = material_gen.generate_material_pool(
5021 materials_per_company,
5022 &company.code,
5023 start_date,
5024 );
5025
5026 let mut asset_gen = AssetGenerator::new(company_seed + 300);
5028 let asset_pool = asset_gen.generate_asset_pool(
5029 assets_per_company,
5030 &company.code,
5031 (start_date, end_date),
5032 );
5033
5034 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5036 let employee_pool =
5037 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5038
5039 (
5040 vendor_pool.vendors,
5041 customer_pool.customers,
5042 material_pool.materials,
5043 asset_pool.assets,
5044 employee_pool.employees,
5045 )
5046 })
5047 .collect();
5048
5049 for (vendors, customers, materials, assets, employees) in per_company_results {
5051 self.master_data.vendors.extend(vendors);
5052 self.master_data.customers.extend(customers);
5053 self.master_data.materials.extend(materials);
5054 self.master_data.assets.extend(assets);
5055 self.master_data.employees.extend(employees);
5056 }
5057
5058 if let Some(pb) = &pb {
5059 pb.inc(total);
5060 }
5061 if let Some(pb) = pb {
5062 pb.finish_with_message("Master data generation complete");
5063 }
5064
5065 Ok(())
5066 }
5067
5068 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5070 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5071 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5072
5073 let months = (self.config.global.period_months as usize).max(1);
5076 let p2p_count = self
5077 .phase_config
5078 .p2p_chains
5079 .min(self.master_data.vendors.len() * 2 * months);
5080 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5081
5082 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5084 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5085 p2p_gen.set_country_pack(self.primary_pack().clone());
5086
5087 for i in 0..p2p_count {
5088 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5089 let materials: Vec<&Material> = self
5090 .master_data
5091 .materials
5092 .iter()
5093 .skip(i % self.master_data.materials.len().max(1))
5094 .take(2.min(self.master_data.materials.len()))
5095 .collect();
5096
5097 if materials.is_empty() {
5098 continue;
5099 }
5100
5101 let company = &self.config.companies[i % self.config.companies.len()];
5102 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5103 let fiscal_period = po_date.month() as u8;
5104 let created_by = if self.master_data.employees.is_empty() {
5105 "SYSTEM"
5106 } else {
5107 self.master_data.employees[i % self.master_data.employees.len()]
5108 .user_id
5109 .as_str()
5110 };
5111
5112 let chain = p2p_gen.generate_chain(
5113 &company.code,
5114 vendor,
5115 &materials,
5116 po_date,
5117 start_date.year() as u16,
5118 fiscal_period,
5119 created_by,
5120 );
5121
5122 flows.purchase_orders.push(chain.purchase_order.clone());
5124 flows.goods_receipts.extend(chain.goods_receipts.clone());
5125 if let Some(vi) = &chain.vendor_invoice {
5126 flows.vendor_invoices.push(vi.clone());
5127 }
5128 if let Some(payment) = &chain.payment {
5129 flows.payments.push(payment.clone());
5130 }
5131 flows.p2p_chains.push(chain);
5132
5133 if let Some(pb) = &pb {
5134 pb.inc(1);
5135 }
5136 }
5137
5138 if let Some(pb) = pb {
5139 pb.finish_with_message("P2P document flows complete");
5140 }
5141
5142 let o2c_count = self
5145 .phase_config
5146 .o2c_chains
5147 .min(self.master_data.customers.len() * 2 * months);
5148 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5149
5150 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5152 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5153 o2c_gen.set_country_pack(self.primary_pack().clone());
5154
5155 for i in 0..o2c_count {
5156 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5157 let materials: Vec<&Material> = self
5158 .master_data
5159 .materials
5160 .iter()
5161 .skip(i % self.master_data.materials.len().max(1))
5162 .take(2.min(self.master_data.materials.len()))
5163 .collect();
5164
5165 if materials.is_empty() {
5166 continue;
5167 }
5168
5169 let company = &self.config.companies[i % self.config.companies.len()];
5170 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5171 let fiscal_period = so_date.month() as u8;
5172 let created_by = if self.master_data.employees.is_empty() {
5173 "SYSTEM"
5174 } else {
5175 self.master_data.employees[i % self.master_data.employees.len()]
5176 .user_id
5177 .as_str()
5178 };
5179
5180 let chain = o2c_gen.generate_chain(
5181 &company.code,
5182 customer,
5183 &materials,
5184 so_date,
5185 start_date.year() as u16,
5186 fiscal_period,
5187 created_by,
5188 );
5189
5190 flows.sales_orders.push(chain.sales_order.clone());
5192 flows.deliveries.extend(chain.deliveries.clone());
5193 if let Some(ci) = &chain.customer_invoice {
5194 flows.customer_invoices.push(ci.clone());
5195 }
5196 if let Some(receipt) = &chain.customer_receipt {
5197 flows.payments.push(receipt.clone());
5198 }
5199 flows.o2c_chains.push(chain);
5200
5201 if let Some(pb) = &pb {
5202 pb.inc(1);
5203 }
5204 }
5205
5206 if let Some(pb) = pb {
5207 pb.finish_with_message("O2C document flows complete");
5208 }
5209
5210 Ok(())
5211 }
5212
5213 fn generate_journal_entries(
5215 &mut self,
5216 coa: &Arc<ChartOfAccounts>,
5217 ) -> SynthResult<Vec<JournalEntry>> {
5218 use datasynth_core::traits::ParallelGenerator;
5219
5220 let total = self.calculate_total_transactions();
5221 let pb = self.create_progress_bar(total, "Generating Journal Entries");
5222
5223 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5224 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5225 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5226
5227 let company_codes: Vec<String> = self
5228 .config
5229 .companies
5230 .iter()
5231 .map(|c| c.code.clone())
5232 .collect();
5233
5234 let generator = JournalEntryGenerator::new_with_params(
5235 self.config.transactions.clone(),
5236 Arc::clone(coa),
5237 company_codes,
5238 start_date,
5239 end_date,
5240 self.seed,
5241 );
5242
5243 let je_pack = self.primary_pack();
5247
5248 let mut generator = generator
5249 .with_master_data(
5250 &self.master_data.vendors,
5251 &self.master_data.customers,
5252 &self.master_data.materials,
5253 )
5254 .with_country_pack_names(je_pack)
5255 .with_country_pack_temporal(
5256 self.config.temporal_patterns.clone(),
5257 self.seed + 200,
5258 je_pack,
5259 )
5260 .with_persona_errors(true)
5261 .with_fraud_config(self.config.fraud.clone());
5262
5263 if self.config.temporal.enabled {
5265 let drift_config = self.config.temporal.to_core_config();
5266 generator = generator.with_drift_config(drift_config, self.seed + 100);
5267 }
5268
5269 self.check_memory_limit()?;
5271
5272 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5274
5275 let entries = if total >= 10_000 && num_threads > 1 {
5279 let sub_generators = generator.split(num_threads);
5282 let entries_per_thread = total as usize / num_threads;
5283 let remainder = total as usize % num_threads;
5284
5285 let batches: Vec<Vec<JournalEntry>> = sub_generators
5286 .into_par_iter()
5287 .enumerate()
5288 .map(|(i, mut gen)| {
5289 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5290 gen.generate_batch(count)
5291 })
5292 .collect();
5293
5294 let entries = JournalEntryGenerator::merge_results(batches);
5296
5297 if let Some(pb) = &pb {
5298 pb.inc(total);
5299 }
5300 entries
5301 } else {
5302 let mut entries = Vec::with_capacity(total as usize);
5304 for _ in 0..total {
5305 let entry = generator.generate();
5306 entries.push(entry);
5307 if let Some(pb) = &pb {
5308 pb.inc(1);
5309 }
5310 }
5311 entries
5312 };
5313
5314 if let Some(pb) = pb {
5315 pb.finish_with_message("Journal entries complete");
5316 }
5317
5318 Ok(entries)
5319 }
5320
5321 fn generate_jes_from_document_flows(
5326 &mut self,
5327 flows: &DocumentFlowSnapshot,
5328 ) -> SynthResult<Vec<JournalEntry>> {
5329 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5330 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5331
5332 let je_config = match self.resolve_coa_framework() {
5333 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5334 CoAFramework::GermanSkr04 => {
5335 let fa = datasynth_core::FrameworkAccounts::german_gaap();
5336 DocumentFlowJeConfig::from(&fa)
5337 }
5338 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5339 };
5340
5341 let populate_fec = je_config.populate_fec_fields;
5342 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5343
5344 if populate_fec {
5348 let mut aux_lookup = std::collections::HashMap::new();
5349 for vendor in &self.master_data.vendors {
5350 if let Some(ref aux) = vendor.auxiliary_gl_account {
5351 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5352 }
5353 }
5354 for customer in &self.master_data.customers {
5355 if let Some(ref aux) = customer.auxiliary_gl_account {
5356 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5357 }
5358 }
5359 if !aux_lookup.is_empty() {
5360 generator.set_auxiliary_account_lookup(aux_lookup);
5361 }
5362 }
5363
5364 let mut entries = Vec::new();
5365
5366 for chain in &flows.p2p_chains {
5368 let chain_entries = generator.generate_from_p2p_chain(chain);
5369 entries.extend(chain_entries);
5370 if let Some(pb) = &pb {
5371 pb.inc(1);
5372 }
5373 }
5374
5375 for chain in &flows.o2c_chains {
5377 let chain_entries = generator.generate_from_o2c_chain(chain);
5378 entries.extend(chain_entries);
5379 if let Some(pb) = &pb {
5380 pb.inc(1);
5381 }
5382 }
5383
5384 if let Some(pb) = pb {
5385 pb.finish_with_message(format!(
5386 "Generated {} JEs from document flows",
5387 entries.len()
5388 ));
5389 }
5390
5391 Ok(entries)
5392 }
5393
5394 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5400 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5401
5402 let mut jes = Vec::with_capacity(payroll_runs.len());
5403
5404 for run in payroll_runs {
5405 let mut je = JournalEntry::new_simple(
5406 format!("JE-PAYROLL-{}", run.payroll_id),
5407 run.company_code.clone(),
5408 run.run_date,
5409 format!("Payroll {}", run.payroll_id),
5410 );
5411
5412 je.add_line(JournalEntryLine {
5414 line_number: 1,
5415 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5416 debit_amount: run.total_gross,
5417 reference: Some(run.payroll_id.clone()),
5418 text: Some(format!(
5419 "Payroll {} ({} employees)",
5420 run.payroll_id, run.employee_count
5421 )),
5422 ..Default::default()
5423 });
5424
5425 je.add_line(JournalEntryLine {
5427 line_number: 2,
5428 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5429 credit_amount: run.total_gross,
5430 reference: Some(run.payroll_id.clone()),
5431 ..Default::default()
5432 });
5433
5434 jes.push(je);
5435 }
5436
5437 jes
5438 }
5439
5440 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5446 use datasynth_core::accounts::{control_accounts, expense_accounts};
5447 use datasynth_core::models::ProductionOrderStatus;
5448
5449 let mut jes = Vec::new();
5450
5451 for order in production_orders {
5452 if !matches!(
5454 order.status,
5455 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5456 ) {
5457 continue;
5458 }
5459
5460 let mut je = JournalEntry::new_simple(
5461 format!("JE-MFG-{}", order.order_id),
5462 order.company_code.clone(),
5463 order.actual_end.unwrap_or(order.planned_end),
5464 format!(
5465 "Production Order {} - {}",
5466 order.order_id, order.material_description
5467 ),
5468 );
5469
5470 je.add_line(JournalEntryLine {
5472 line_number: 1,
5473 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5474 debit_amount: order.actual_cost,
5475 reference: Some(order.order_id.clone()),
5476 text: Some(format!(
5477 "Material consumption for {}",
5478 order.material_description
5479 )),
5480 quantity: Some(order.actual_quantity),
5481 unit: Some("EA".to_string()),
5482 ..Default::default()
5483 });
5484
5485 je.add_line(JournalEntryLine {
5487 line_number: 2,
5488 gl_account: control_accounts::INVENTORY.to_string(),
5489 credit_amount: order.actual_cost,
5490 reference: Some(order.order_id.clone()),
5491 ..Default::default()
5492 });
5493
5494 jes.push(je);
5495 }
5496
5497 jes
5498 }
5499
5500 fn link_document_flows_to_subledgers(
5505 &mut self,
5506 flows: &DocumentFlowSnapshot,
5507 ) -> SynthResult<SubledgerSnapshot> {
5508 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5509 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5510
5511 let vendor_names: std::collections::HashMap<String, String> = self
5513 .master_data
5514 .vendors
5515 .iter()
5516 .map(|v| (v.vendor_id.clone(), v.name.clone()))
5517 .collect();
5518 let customer_names: std::collections::HashMap<String, String> = self
5519 .master_data
5520 .customers
5521 .iter()
5522 .map(|c| (c.customer_id.clone(), c.name.clone()))
5523 .collect();
5524
5525 let mut linker = DocumentFlowLinker::new()
5526 .with_vendor_names(vendor_names)
5527 .with_customer_names(customer_names);
5528
5529 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5531 if let Some(pb) = &pb {
5532 pb.inc(flows.vendor_invoices.len() as u64);
5533 }
5534
5535 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5537 if let Some(pb) = &pb {
5538 pb.inc(flows.customer_invoices.len() as u64);
5539 }
5540
5541 if let Some(pb) = pb {
5542 pb.finish_with_message(format!(
5543 "Linked {} AP and {} AR invoices",
5544 ap_invoices.len(),
5545 ar_invoices.len()
5546 ));
5547 }
5548
5549 Ok(SubledgerSnapshot {
5550 ap_invoices,
5551 ar_invoices,
5552 fa_records: Vec::new(),
5553 inventory_positions: Vec::new(),
5554 inventory_movements: Vec::new(),
5555 })
5556 }
5557
5558 #[allow(clippy::too_many_arguments)]
5563 fn generate_ocpm_events(
5564 &mut self,
5565 flows: &DocumentFlowSnapshot,
5566 sourcing: &SourcingSnapshot,
5567 hr: &HrSnapshot,
5568 manufacturing: &ManufacturingSnapshot,
5569 banking: &BankingSnapshot,
5570 audit: &AuditSnapshot,
5571 financial_reporting: &FinancialReportingSnapshot,
5572 ) -> SynthResult<OcpmSnapshot> {
5573 let total_chains = flows.p2p_chains.len()
5574 + flows.o2c_chains.len()
5575 + sourcing.sourcing_projects.len()
5576 + hr.payroll_runs.len()
5577 + manufacturing.production_orders.len()
5578 + banking.customers.len()
5579 + audit.engagements.len()
5580 + financial_reporting.bank_reconciliations.len();
5581 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5582
5583 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5585 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5586
5587 let ocpm_config = OcpmGeneratorConfig {
5589 generate_p2p: true,
5590 generate_o2c: true,
5591 generate_s2c: !sourcing.sourcing_projects.is_empty(),
5592 generate_h2r: !hr.payroll_runs.is_empty(),
5593 generate_mfg: !manufacturing.production_orders.is_empty(),
5594 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5595 generate_bank: !banking.customers.is_empty(),
5596 generate_audit: !audit.engagements.is_empty(),
5597 happy_path_rate: 0.75,
5598 exception_path_rate: 0.20,
5599 error_path_rate: 0.05,
5600 add_duration_variability: true,
5601 duration_std_dev_factor: 0.3,
5602 };
5603 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5604 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5605
5606 let available_users: Vec<String> = self
5608 .master_data
5609 .employees
5610 .iter()
5611 .take(20)
5612 .map(|e| e.user_id.clone())
5613 .collect();
5614
5615 let fallback_date =
5617 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5618 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5619 .unwrap_or(fallback_date);
5620 let base_midnight = base_date
5621 .and_hms_opt(0, 0, 0)
5622 .expect("midnight is always valid");
5623 let base_datetime =
5624 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5625
5626 let add_result = |event_log: &mut OcpmEventLog,
5628 result: datasynth_ocpm::CaseGenerationResult| {
5629 for event in result.events {
5630 event_log.add_event(event);
5631 }
5632 for object in result.objects {
5633 event_log.add_object(object);
5634 }
5635 for relationship in result.relationships {
5636 event_log.add_relationship(relationship);
5637 }
5638 event_log.add_case(result.case_trace);
5639 };
5640
5641 for chain in &flows.p2p_chains {
5643 let po = &chain.purchase_order;
5644 let documents = P2pDocuments::new(
5645 &po.header.document_id,
5646 &po.vendor_id,
5647 &po.header.company_code,
5648 po.total_net_amount,
5649 &po.header.currency,
5650 &ocpm_uuid_factory,
5651 )
5652 .with_goods_receipt(
5653 chain
5654 .goods_receipts
5655 .first()
5656 .map(|gr| gr.header.document_id.as_str())
5657 .unwrap_or(""),
5658 &ocpm_uuid_factory,
5659 )
5660 .with_invoice(
5661 chain
5662 .vendor_invoice
5663 .as_ref()
5664 .map(|vi| vi.header.document_id.as_str())
5665 .unwrap_or(""),
5666 &ocpm_uuid_factory,
5667 )
5668 .with_payment(
5669 chain
5670 .payment
5671 .as_ref()
5672 .map(|p| p.header.document_id.as_str())
5673 .unwrap_or(""),
5674 &ocpm_uuid_factory,
5675 );
5676
5677 let start_time =
5678 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5679 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5680 add_result(&mut event_log, result);
5681
5682 if let Some(pb) = &pb {
5683 pb.inc(1);
5684 }
5685 }
5686
5687 for chain in &flows.o2c_chains {
5689 let so = &chain.sales_order;
5690 let documents = O2cDocuments::new(
5691 &so.header.document_id,
5692 &so.customer_id,
5693 &so.header.company_code,
5694 so.total_net_amount,
5695 &so.header.currency,
5696 &ocpm_uuid_factory,
5697 )
5698 .with_delivery(
5699 chain
5700 .deliveries
5701 .first()
5702 .map(|d| d.header.document_id.as_str())
5703 .unwrap_or(""),
5704 &ocpm_uuid_factory,
5705 )
5706 .with_invoice(
5707 chain
5708 .customer_invoice
5709 .as_ref()
5710 .map(|ci| ci.header.document_id.as_str())
5711 .unwrap_or(""),
5712 &ocpm_uuid_factory,
5713 )
5714 .with_receipt(
5715 chain
5716 .customer_receipt
5717 .as_ref()
5718 .map(|r| r.header.document_id.as_str())
5719 .unwrap_or(""),
5720 &ocpm_uuid_factory,
5721 );
5722
5723 let start_time =
5724 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5725 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5726 add_result(&mut event_log, result);
5727
5728 if let Some(pb) = &pb {
5729 pb.inc(1);
5730 }
5731 }
5732
5733 for project in &sourcing.sourcing_projects {
5735 let vendor_id = sourcing
5737 .contracts
5738 .iter()
5739 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5740 .map(|c| c.vendor_id.clone())
5741 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5742 .or_else(|| {
5743 self.master_data
5744 .vendors
5745 .first()
5746 .map(|v| v.vendor_id.clone())
5747 })
5748 .unwrap_or_else(|| "V000".to_string());
5749 let mut docs = S2cDocuments::new(
5750 &project.project_id,
5751 &vendor_id,
5752 &project.company_code,
5753 project.estimated_annual_spend,
5754 &ocpm_uuid_factory,
5755 );
5756 if let Some(rfx) = sourcing
5758 .rfx_events
5759 .iter()
5760 .find(|r| r.sourcing_project_id == project.project_id)
5761 {
5762 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5763 if let Some(bid) = sourcing.bids.iter().find(|b| {
5765 b.rfx_id == rfx.rfx_id
5766 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5767 }) {
5768 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5769 }
5770 }
5771 if let Some(contract) = sourcing
5773 .contracts
5774 .iter()
5775 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5776 {
5777 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5778 }
5779 let start_time = base_datetime - chrono::Duration::days(90);
5780 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5781 add_result(&mut event_log, result);
5782
5783 if let Some(pb) = &pb {
5784 pb.inc(1);
5785 }
5786 }
5787
5788 for run in &hr.payroll_runs {
5790 let employee_id = hr
5792 .payroll_line_items
5793 .iter()
5794 .find(|li| li.payroll_id == run.payroll_id)
5795 .map(|li| li.employee_id.as_str())
5796 .unwrap_or("EMP000");
5797 let docs = H2rDocuments::new(
5798 &run.payroll_id,
5799 employee_id,
5800 &run.company_code,
5801 run.total_gross,
5802 &ocpm_uuid_factory,
5803 )
5804 .with_time_entries(
5805 hr.time_entries
5806 .iter()
5807 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5808 .take(5)
5809 .map(|t| t.entry_id.as_str())
5810 .collect(),
5811 );
5812 let start_time = base_datetime - chrono::Duration::days(30);
5813 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5814 add_result(&mut event_log, result);
5815
5816 if let Some(pb) = &pb {
5817 pb.inc(1);
5818 }
5819 }
5820
5821 for order in &manufacturing.production_orders {
5823 let mut docs = MfgDocuments::new(
5824 &order.order_id,
5825 &order.material_id,
5826 &order.company_code,
5827 order.planned_quantity,
5828 &ocpm_uuid_factory,
5829 )
5830 .with_operations(
5831 order
5832 .operations
5833 .iter()
5834 .map(|o| format!("OP-{:04}", o.operation_number))
5835 .collect::<Vec<_>>()
5836 .iter()
5837 .map(|s| s.as_str())
5838 .collect(),
5839 );
5840 if let Some(insp) = manufacturing
5842 .quality_inspections
5843 .iter()
5844 .find(|i| i.reference_id == order.order_id)
5845 {
5846 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5847 }
5848 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5850 cc.items
5851 .iter()
5852 .any(|item| item.material_id == order.material_id)
5853 }) {
5854 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5855 }
5856 let start_time = base_datetime - chrono::Duration::days(60);
5857 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5858 add_result(&mut event_log, result);
5859
5860 if let Some(pb) = &pb {
5861 pb.inc(1);
5862 }
5863 }
5864
5865 for customer in &banking.customers {
5867 let customer_id_str = customer.customer_id.to_string();
5868 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5869 if let Some(account) = banking
5871 .accounts
5872 .iter()
5873 .find(|a| a.primary_owner_id == customer.customer_id)
5874 {
5875 let account_id_str = account.account_id.to_string();
5876 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5877 let txn_strs: Vec<String> = banking
5879 .transactions
5880 .iter()
5881 .filter(|t| t.account_id == account.account_id)
5882 .take(10)
5883 .map(|t| t.transaction_id.to_string())
5884 .collect();
5885 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5886 let txn_amounts: Vec<rust_decimal::Decimal> = banking
5887 .transactions
5888 .iter()
5889 .filter(|t| t.account_id == account.account_id)
5890 .take(10)
5891 .map(|t| t.amount)
5892 .collect();
5893 if !txn_ids.is_empty() {
5894 docs = docs.with_transactions(txn_ids, txn_amounts);
5895 }
5896 }
5897 let start_time = base_datetime - chrono::Duration::days(180);
5898 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5899 add_result(&mut event_log, result);
5900
5901 if let Some(pb) = &pb {
5902 pb.inc(1);
5903 }
5904 }
5905
5906 for engagement in &audit.engagements {
5908 let engagement_id_str = engagement.engagement_id.to_string();
5909 let docs = AuditDocuments::new(
5910 &engagement_id_str,
5911 &engagement.client_entity_id,
5912 &ocpm_uuid_factory,
5913 )
5914 .with_workpapers(
5915 audit
5916 .workpapers
5917 .iter()
5918 .filter(|w| w.engagement_id == engagement.engagement_id)
5919 .take(10)
5920 .map(|w| w.workpaper_id.to_string())
5921 .collect::<Vec<_>>()
5922 .iter()
5923 .map(|s| s.as_str())
5924 .collect(),
5925 )
5926 .with_evidence(
5927 audit
5928 .evidence
5929 .iter()
5930 .filter(|e| e.engagement_id == engagement.engagement_id)
5931 .take(10)
5932 .map(|e| e.evidence_id.to_string())
5933 .collect::<Vec<_>>()
5934 .iter()
5935 .map(|s| s.as_str())
5936 .collect(),
5937 )
5938 .with_risks(
5939 audit
5940 .risk_assessments
5941 .iter()
5942 .filter(|r| r.engagement_id == engagement.engagement_id)
5943 .take(5)
5944 .map(|r| r.risk_id.to_string())
5945 .collect::<Vec<_>>()
5946 .iter()
5947 .map(|s| s.as_str())
5948 .collect(),
5949 )
5950 .with_findings(
5951 audit
5952 .findings
5953 .iter()
5954 .filter(|f| f.engagement_id == engagement.engagement_id)
5955 .take(5)
5956 .map(|f| f.finding_id.to_string())
5957 .collect::<Vec<_>>()
5958 .iter()
5959 .map(|s| s.as_str())
5960 .collect(),
5961 )
5962 .with_judgments(
5963 audit
5964 .judgments
5965 .iter()
5966 .filter(|j| j.engagement_id == engagement.engagement_id)
5967 .take(5)
5968 .map(|j| j.judgment_id.to_string())
5969 .collect::<Vec<_>>()
5970 .iter()
5971 .map(|s| s.as_str())
5972 .collect(),
5973 );
5974 let start_time = base_datetime - chrono::Duration::days(120);
5975 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5976 add_result(&mut event_log, result);
5977
5978 if let Some(pb) = &pb {
5979 pb.inc(1);
5980 }
5981 }
5982
5983 for recon in &financial_reporting.bank_reconciliations {
5985 let docs = BankReconDocuments::new(
5986 &recon.reconciliation_id,
5987 &recon.bank_account_id,
5988 &recon.company_code,
5989 recon.bank_ending_balance,
5990 &ocpm_uuid_factory,
5991 )
5992 .with_statement_lines(
5993 recon
5994 .statement_lines
5995 .iter()
5996 .take(20)
5997 .map(|l| l.line_id.as_str())
5998 .collect(),
5999 )
6000 .with_reconciling_items(
6001 recon
6002 .reconciling_items
6003 .iter()
6004 .take(10)
6005 .map(|i| i.item_id.as_str())
6006 .collect(),
6007 );
6008 let start_time = base_datetime - chrono::Duration::days(30);
6009 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6010 add_result(&mut event_log, result);
6011
6012 if let Some(pb) = &pb {
6013 pb.inc(1);
6014 }
6015 }
6016
6017 event_log.compute_variants();
6019
6020 let summary = event_log.summary();
6021
6022 if let Some(pb) = pb {
6023 pb.finish_with_message(format!(
6024 "Generated {} OCPM events, {} objects",
6025 summary.event_count, summary.object_count
6026 ));
6027 }
6028
6029 Ok(OcpmSnapshot {
6030 event_count: summary.event_count,
6031 object_count: summary.object_count,
6032 case_count: summary.case_count,
6033 event_log: Some(event_log),
6034 })
6035 }
6036
6037 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6039 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6040
6041 let total_rate = if self.config.anomaly_injection.enabled {
6044 self.config.anomaly_injection.rates.total_rate
6045 } else if self.config.fraud.enabled {
6046 self.config.fraud.fraud_rate
6047 } else {
6048 0.02
6049 };
6050
6051 let fraud_rate = if self.config.anomaly_injection.enabled {
6052 self.config.anomaly_injection.rates.fraud_rate
6053 } else {
6054 AnomalyRateConfig::default().fraud_rate
6055 };
6056
6057 let error_rate = if self.config.anomaly_injection.enabled {
6058 self.config.anomaly_injection.rates.error_rate
6059 } else {
6060 AnomalyRateConfig::default().error_rate
6061 };
6062
6063 let process_issue_rate = if self.config.anomaly_injection.enabled {
6064 self.config.anomaly_injection.rates.process_rate
6065 } else {
6066 AnomalyRateConfig::default().process_issue_rate
6067 };
6068
6069 let anomaly_config = AnomalyInjectorConfig {
6070 rates: AnomalyRateConfig {
6071 total_rate,
6072 fraud_rate,
6073 error_rate,
6074 process_issue_rate,
6075 ..Default::default()
6076 },
6077 seed: self.seed + 5000,
6078 ..Default::default()
6079 };
6080
6081 let mut injector = AnomalyInjector::new(anomaly_config);
6082 let result = injector.process_entries(entries);
6083
6084 if let Some(pb) = &pb {
6085 pb.inc(entries.len() as u64);
6086 pb.finish_with_message("Anomaly injection complete");
6087 }
6088
6089 let mut by_type = HashMap::new();
6090 for label in &result.labels {
6091 *by_type
6092 .entry(format!("{:?}", label.anomaly_type))
6093 .or_insert(0) += 1;
6094 }
6095
6096 Ok(AnomalyLabels {
6097 labels: result.labels,
6098 summary: Some(result.summary),
6099 by_type,
6100 })
6101 }
6102
6103 fn validate_journal_entries(
6112 &mut self,
6113 entries: &[JournalEntry],
6114 ) -> SynthResult<BalanceValidationResult> {
6115 let clean_entries: Vec<&JournalEntry> = entries
6117 .iter()
6118 .filter(|e| {
6119 e.header
6120 .header_text
6121 .as_ref()
6122 .map(|t| !t.contains("[HUMAN_ERROR:"))
6123 .unwrap_or(true)
6124 })
6125 .collect();
6126
6127 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6128
6129 let config = BalanceTrackerConfig {
6131 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
6135 };
6136 let validation_currency = self
6137 .config
6138 .companies
6139 .first()
6140 .map(|c| c.currency.clone())
6141 .unwrap_or_else(|| "USD".to_string());
6142
6143 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6144
6145 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6147 let errors = tracker.apply_entries(&clean_refs);
6148
6149 if let Some(pb) = &pb {
6150 pb.inc(entries.len() as u64);
6151 }
6152
6153 let has_unbalanced = tracker
6156 .get_validation_errors()
6157 .iter()
6158 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6159
6160 let mut all_errors = errors;
6163 all_errors.extend(tracker.get_validation_errors().iter().cloned());
6164 let company_codes: Vec<String> = self
6165 .config
6166 .companies
6167 .iter()
6168 .map(|c| c.code.clone())
6169 .collect();
6170
6171 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6172 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6173 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6174
6175 for company_code in &company_codes {
6176 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6177 all_errors.push(e);
6178 }
6179 }
6180
6181 let stats = tracker.get_statistics();
6183
6184 let is_balanced = all_errors.is_empty();
6186
6187 if let Some(pb) = pb {
6188 let msg = if is_balanced {
6189 "Balance validation passed"
6190 } else {
6191 "Balance validation completed with errors"
6192 };
6193 pb.finish_with_message(msg);
6194 }
6195
6196 Ok(BalanceValidationResult {
6197 validated: true,
6198 is_balanced,
6199 entries_processed: stats.entries_processed,
6200 total_debits: stats.total_debits,
6201 total_credits: stats.total_credits,
6202 accounts_tracked: stats.accounts_tracked,
6203 companies_tracked: stats.companies_tracked,
6204 validation_errors: all_errors,
6205 has_unbalanced_entries: has_unbalanced,
6206 })
6207 }
6208
6209 fn inject_data_quality(
6214 &mut self,
6215 entries: &mut [JournalEntry],
6216 ) -> SynthResult<DataQualityStats> {
6217 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6218
6219 let config = if self.config.data_quality.enabled {
6222 let dq = &self.config.data_quality;
6223 DataQualityConfig {
6224 enable_missing_values: dq.missing_values.enabled,
6225 missing_values: datasynth_generators::MissingValueConfig {
6226 global_rate: dq.effective_missing_rate(),
6227 ..Default::default()
6228 },
6229 enable_format_variations: dq.format_variations.enabled,
6230 format_variations: datasynth_generators::FormatVariationConfig {
6231 date_variation_rate: dq.format_variations.dates.rate,
6232 amount_variation_rate: dq.format_variations.amounts.rate,
6233 identifier_variation_rate: dq.format_variations.identifiers.rate,
6234 ..Default::default()
6235 },
6236 enable_duplicates: dq.duplicates.enabled,
6237 duplicates: datasynth_generators::DuplicateConfig {
6238 duplicate_rate: dq.effective_duplicate_rate(),
6239 ..Default::default()
6240 },
6241 enable_typos: dq.typos.enabled,
6242 typos: datasynth_generators::TypoConfig {
6243 char_error_rate: dq.effective_typo_rate(),
6244 ..Default::default()
6245 },
6246 enable_encoding_issues: dq.encoding_issues.enabled,
6247 encoding_issue_rate: dq.encoding_issues.rate,
6248 seed: self.seed.wrapping_add(77), track_statistics: true,
6250 }
6251 } else {
6252 DataQualityConfig::minimal()
6253 };
6254 let mut injector = DataQualityInjector::new(config);
6255
6256 injector.set_country_pack(self.primary_pack().clone());
6258
6259 let context = HashMap::new();
6261
6262 for entry in entries.iter_mut() {
6263 if let Some(text) = &entry.header.header_text {
6265 let processed = injector.process_text_field(
6266 "header_text",
6267 text,
6268 &entry.header.document_id.to_string(),
6269 &context,
6270 );
6271 match processed {
6272 Some(new_text) if new_text != *text => {
6273 entry.header.header_text = Some(new_text);
6274 }
6275 None => {
6276 entry.header.header_text = None; }
6278 _ => {}
6279 }
6280 }
6281
6282 if let Some(ref_text) = &entry.header.reference {
6284 let processed = injector.process_text_field(
6285 "reference",
6286 ref_text,
6287 &entry.header.document_id.to_string(),
6288 &context,
6289 );
6290 match processed {
6291 Some(new_text) if new_text != *ref_text => {
6292 entry.header.reference = Some(new_text);
6293 }
6294 None => {
6295 entry.header.reference = None;
6296 }
6297 _ => {}
6298 }
6299 }
6300
6301 let user_persona = entry.header.user_persona.clone();
6303 if let Some(processed) = injector.process_text_field(
6304 "user_persona",
6305 &user_persona,
6306 &entry.header.document_id.to_string(),
6307 &context,
6308 ) {
6309 if processed != user_persona {
6310 entry.header.user_persona = processed;
6311 }
6312 }
6313
6314 for line in &mut entry.lines {
6316 if let Some(ref text) = line.line_text {
6318 let processed = injector.process_text_field(
6319 "line_text",
6320 text,
6321 &entry.header.document_id.to_string(),
6322 &context,
6323 );
6324 match processed {
6325 Some(new_text) if new_text != *text => {
6326 line.line_text = Some(new_text);
6327 }
6328 None => {
6329 line.line_text = None;
6330 }
6331 _ => {}
6332 }
6333 }
6334
6335 if let Some(cc) = &line.cost_center {
6337 let processed = injector.process_text_field(
6338 "cost_center",
6339 cc,
6340 &entry.header.document_id.to_string(),
6341 &context,
6342 );
6343 match processed {
6344 Some(new_cc) if new_cc != *cc => {
6345 line.cost_center = Some(new_cc);
6346 }
6347 None => {
6348 line.cost_center = None;
6349 }
6350 _ => {}
6351 }
6352 }
6353 }
6354
6355 if let Some(pb) = &pb {
6356 pb.inc(1);
6357 }
6358 }
6359
6360 if let Some(pb) = pb {
6361 pb.finish_with_message("Data quality injection complete");
6362 }
6363
6364 Ok(injector.stats().clone())
6365 }
6366
6367 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6378 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6379 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6380 let fiscal_year = start_date.year() as u16;
6381 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6382
6383 let total_revenue: rust_decimal::Decimal = entries
6385 .iter()
6386 .flat_map(|e| e.lines.iter())
6387 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6388 .map(|l| l.credit_amount)
6389 .sum();
6390
6391 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6393
6394 let mut snapshot = AuditSnapshot::default();
6395
6396 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6398 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6399 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6400 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6401 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6402 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6403
6404 let accounts: Vec<String> = self
6406 .coa
6407 .as_ref()
6408 .map(|coa| {
6409 coa.get_postable_accounts()
6410 .iter()
6411 .map(|acc| acc.account_code().to_string())
6412 .collect()
6413 })
6414 .unwrap_or_default();
6415
6416 for (i, company) in self.config.companies.iter().enumerate() {
6418 let company_revenue = total_revenue
6420 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6421
6422 let engagements_for_company =
6424 self.phase_config.audit_engagements / self.config.companies.len().max(1);
6425 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6426 1
6427 } else {
6428 0
6429 };
6430
6431 for _eng_idx in 0..(engagements_for_company + extra) {
6432 let mut engagement = engagement_gen.generate_engagement(
6434 &company.code,
6435 &company.name,
6436 fiscal_year,
6437 period_end,
6438 company_revenue,
6439 None, );
6441
6442 if !self.master_data.employees.is_empty() {
6444 let emp_count = self.master_data.employees.len();
6445 let base = (i * 10 + _eng_idx) % emp_count;
6447 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6448 .employee_id
6449 .clone();
6450 engagement.engagement_manager_id = self.master_data.employees
6451 [(base + 1) % emp_count]
6452 .employee_id
6453 .clone();
6454 let real_team: Vec<String> = engagement
6455 .team_member_ids
6456 .iter()
6457 .enumerate()
6458 .map(|(j, _)| {
6459 self.master_data.employees[(base + 2 + j) % emp_count]
6460 .employee_id
6461 .clone()
6462 })
6463 .collect();
6464 engagement.team_member_ids = real_team;
6465 }
6466
6467 if let Some(pb) = &pb {
6468 pb.inc(1);
6469 }
6470
6471 let team_members: Vec<String> = engagement.team_member_ids.clone();
6473
6474 let workpapers =
6476 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6477
6478 for wp in &workpapers {
6479 if let Some(pb) = &pb {
6480 pb.inc(1);
6481 }
6482
6483 let evidence = evidence_gen.generate_evidence_for_workpaper(
6485 wp,
6486 &team_members,
6487 wp.preparer_date,
6488 );
6489
6490 for _ in &evidence {
6491 if let Some(pb) = &pb {
6492 pb.inc(1);
6493 }
6494 }
6495
6496 snapshot.evidence.extend(evidence);
6497 }
6498
6499 let risks =
6501 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6502
6503 for _ in &risks {
6504 if let Some(pb) = &pb {
6505 pb.inc(1);
6506 }
6507 }
6508 snapshot.risk_assessments.extend(risks);
6509
6510 let findings = finding_gen.generate_findings_for_engagement(
6512 &engagement,
6513 &workpapers,
6514 &team_members,
6515 );
6516
6517 for _ in &findings {
6518 if let Some(pb) = &pb {
6519 pb.inc(1);
6520 }
6521 }
6522 snapshot.findings.extend(findings);
6523
6524 let judgments =
6526 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6527
6528 for _ in &judgments {
6529 if let Some(pb) = &pb {
6530 pb.inc(1);
6531 }
6532 }
6533 snapshot.judgments.extend(judgments);
6534
6535 snapshot.workpapers.extend(workpapers);
6537 snapshot.engagements.push(engagement);
6538 }
6539 }
6540
6541 if let Some(pb) = pb {
6542 pb.finish_with_message(format!(
6543 "Audit data: {} engagements, {} workpapers, {} evidence",
6544 snapshot.engagements.len(),
6545 snapshot.workpapers.len(),
6546 snapshot.evidence.len()
6547 ));
6548 }
6549
6550 Ok(snapshot)
6551 }
6552
6553 fn export_graphs(
6560 &mut self,
6561 entries: &[JournalEntry],
6562 _coa: &Arc<ChartOfAccounts>,
6563 stats: &mut EnhancedGenerationStatistics,
6564 ) -> SynthResult<GraphExportSnapshot> {
6565 let pb = self.create_progress_bar(100, "Exporting Graphs");
6566
6567 let mut snapshot = GraphExportSnapshot::default();
6568
6569 let output_dir = self
6571 .output_path
6572 .clone()
6573 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6574 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6575
6576 for graph_type in &self.config.graph_export.graph_types {
6578 if let Some(pb) = &pb {
6579 pb.inc(10);
6580 }
6581
6582 let graph_config = TransactionGraphConfig {
6584 include_vendors: false,
6585 include_customers: false,
6586 create_debit_credit_edges: true,
6587 include_document_nodes: graph_type.include_document_nodes,
6588 min_edge_weight: graph_type.min_edge_weight,
6589 aggregate_parallel_edges: graph_type.aggregate_edges,
6590 framework: None,
6591 };
6592
6593 let mut builder = TransactionGraphBuilder::new(graph_config);
6594 builder.add_journal_entries(entries);
6595 let graph = builder.build();
6596
6597 stats.graph_node_count += graph.node_count();
6599 stats.graph_edge_count += graph.edge_count();
6600
6601 if let Some(pb) = &pb {
6602 pb.inc(40);
6603 }
6604
6605 for format in &self.config.graph_export.formats {
6607 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6608
6609 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6611 warn!("Failed to create graph output directory: {}", e);
6612 continue;
6613 }
6614
6615 match format {
6616 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6617 let pyg_config = PyGExportConfig {
6618 common: datasynth_graph::CommonExportConfig {
6619 export_node_features: true,
6620 export_edge_features: true,
6621 export_node_labels: true,
6622 export_edge_labels: true,
6623 export_masks: true,
6624 train_ratio: self.config.graph_export.train_ratio,
6625 val_ratio: self.config.graph_export.validation_ratio,
6626 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6627 },
6628 one_hot_categoricals: false,
6629 };
6630
6631 let exporter = PyGExporter::new(pyg_config);
6632 match exporter.export(&graph, &format_dir) {
6633 Ok(metadata) => {
6634 snapshot.exports.insert(
6635 format!("{}_{}", graph_type.name, "pytorch_geometric"),
6636 GraphExportInfo {
6637 name: graph_type.name.clone(),
6638 format: "pytorch_geometric".to_string(),
6639 output_path: format_dir.clone(),
6640 node_count: metadata.num_nodes,
6641 edge_count: metadata.num_edges,
6642 },
6643 );
6644 snapshot.graph_count += 1;
6645 }
6646 Err(e) => {
6647 warn!("Failed to export PyTorch Geometric graph: {}", e);
6648 }
6649 }
6650 }
6651 datasynth_config::schema::GraphExportFormat::Neo4j => {
6652 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
6653
6654 let neo4j_config = Neo4jExportConfig {
6655 export_node_properties: true,
6656 export_edge_properties: true,
6657 export_features: true,
6658 generate_cypher: true,
6659 generate_admin_import: true,
6660 database_name: "synth".to_string(),
6661 cypher_batch_size: 1000,
6662 };
6663
6664 let exporter = Neo4jExporter::new(neo4j_config);
6665 match exporter.export(&graph, &format_dir) {
6666 Ok(metadata) => {
6667 snapshot.exports.insert(
6668 format!("{}_{}", graph_type.name, "neo4j"),
6669 GraphExportInfo {
6670 name: graph_type.name.clone(),
6671 format: "neo4j".to_string(),
6672 output_path: format_dir.clone(),
6673 node_count: metadata.num_nodes,
6674 edge_count: metadata.num_edges,
6675 },
6676 );
6677 snapshot.graph_count += 1;
6678 }
6679 Err(e) => {
6680 warn!("Failed to export Neo4j graph: {}", e);
6681 }
6682 }
6683 }
6684 datasynth_config::schema::GraphExportFormat::Dgl => {
6685 use datasynth_graph::{DGLExportConfig, DGLExporter};
6686
6687 let dgl_config = DGLExportConfig {
6688 common: datasynth_graph::CommonExportConfig {
6689 export_node_features: true,
6690 export_edge_features: true,
6691 export_node_labels: true,
6692 export_edge_labels: true,
6693 export_masks: true,
6694 train_ratio: self.config.graph_export.train_ratio,
6695 val_ratio: self.config.graph_export.validation_ratio,
6696 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6697 },
6698 heterogeneous: false,
6699 include_pickle_script: true, };
6701
6702 let exporter = DGLExporter::new(dgl_config);
6703 match exporter.export(&graph, &format_dir) {
6704 Ok(metadata) => {
6705 snapshot.exports.insert(
6706 format!("{}_{}", graph_type.name, "dgl"),
6707 GraphExportInfo {
6708 name: graph_type.name.clone(),
6709 format: "dgl".to_string(),
6710 output_path: format_dir.clone(),
6711 node_count: metadata.common.num_nodes,
6712 edge_count: metadata.common.num_edges,
6713 },
6714 );
6715 snapshot.graph_count += 1;
6716 }
6717 Err(e) => {
6718 warn!("Failed to export DGL graph: {}", e);
6719 }
6720 }
6721 }
6722 datasynth_config::schema::GraphExportFormat::RustGraph => {
6723 use datasynth_graph::{
6724 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6725 };
6726
6727 let rustgraph_config = RustGraphExportConfig {
6728 include_features: true,
6729 include_temporal: true,
6730 include_labels: true,
6731 source_name: "datasynth".to_string(),
6732 batch_id: None,
6733 output_format: RustGraphOutputFormat::JsonLines,
6734 export_node_properties: true,
6735 export_edge_properties: true,
6736 pretty_print: false,
6737 };
6738
6739 let exporter = RustGraphExporter::new(rustgraph_config);
6740 match exporter.export(&graph, &format_dir) {
6741 Ok(metadata) => {
6742 snapshot.exports.insert(
6743 format!("{}_{}", graph_type.name, "rustgraph"),
6744 GraphExportInfo {
6745 name: graph_type.name.clone(),
6746 format: "rustgraph".to_string(),
6747 output_path: format_dir.clone(),
6748 node_count: metadata.num_nodes,
6749 edge_count: metadata.num_edges,
6750 },
6751 );
6752 snapshot.graph_count += 1;
6753 }
6754 Err(e) => {
6755 warn!("Failed to export RustGraph: {}", e);
6756 }
6757 }
6758 }
6759 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6760 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6762 }
6763 }
6764 }
6765
6766 if let Some(pb) = &pb {
6767 pb.inc(40);
6768 }
6769 }
6770
6771 stats.graph_export_count = snapshot.graph_count;
6772 snapshot.exported = snapshot.graph_count > 0;
6773
6774 if let Some(pb) = pb {
6775 pb.finish_with_message(format!(
6776 "Graphs exported: {} graphs ({} nodes, {} edges)",
6777 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6778 ));
6779 }
6780
6781 Ok(snapshot)
6782 }
6783
6784 fn build_additional_graphs(
6789 &self,
6790 banking: &BankingSnapshot,
6791 _intercompany: &IntercompanySnapshot,
6792 entries: &[JournalEntry],
6793 stats: &mut EnhancedGenerationStatistics,
6794 ) {
6795 let output_dir = self
6796 .output_path
6797 .clone()
6798 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6799 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6800
6801 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6803 info!("Phase 10c: Building banking network graph");
6804 let config = BankingGraphConfig::default();
6805 let mut builder = BankingGraphBuilder::new(config);
6806 builder.add_customers(&banking.customers);
6807 builder.add_accounts(&banking.accounts, &banking.customers);
6808 builder.add_transactions(&banking.transactions);
6809 let graph = builder.build();
6810
6811 let node_count = graph.node_count();
6812 let edge_count = graph.edge_count();
6813 stats.graph_node_count += node_count;
6814 stats.graph_edge_count += edge_count;
6815
6816 for format in &self.config.graph_export.formats {
6818 if matches!(
6819 format,
6820 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6821 ) {
6822 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6823 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6824 warn!("Failed to create banking graph output dir: {}", e);
6825 continue;
6826 }
6827 let pyg_config = PyGExportConfig::default();
6828 let exporter = PyGExporter::new(pyg_config);
6829 if let Err(e) = exporter.export(&graph, &format_dir) {
6830 warn!("Failed to export banking graph as PyG: {}", e);
6831 } else {
6832 info!(
6833 "Banking network graph exported: {} nodes, {} edges",
6834 node_count, edge_count
6835 );
6836 }
6837 }
6838 }
6839 }
6840
6841 let approval_entries: Vec<_> = entries
6843 .iter()
6844 .filter(|je| je.header.approval_workflow.is_some())
6845 .collect();
6846
6847 if !approval_entries.is_empty() {
6848 info!(
6849 "Phase 10c: Building approval network graph ({} entries with approvals)",
6850 approval_entries.len()
6851 );
6852 let config = ApprovalGraphConfig::default();
6853 let mut builder = ApprovalGraphBuilder::new(config);
6854
6855 for je in &approval_entries {
6856 if let Some(ref wf) = je.header.approval_workflow {
6857 for action in &wf.actions {
6858 let record = datasynth_core::models::ApprovalRecord {
6859 approval_id: format!(
6860 "APR-{}-{}",
6861 je.header.document_id, action.approval_level
6862 ),
6863 document_number: je.header.document_id.to_string(),
6864 document_type: "JE".to_string(),
6865 company_code: je.company_code().to_string(),
6866 requester_id: wf.preparer_id.clone(),
6867 requester_name: Some(wf.preparer_name.clone()),
6868 approver_id: action.actor_id.clone(),
6869 approver_name: action.actor_name.clone(),
6870 approval_date: je.posting_date(),
6871 action: format!("{:?}", action.action),
6872 amount: wf.amount,
6873 approval_limit: None,
6874 comments: action.comments.clone(),
6875 delegation_from: None,
6876 is_auto_approved: false,
6877 };
6878 builder.add_approval(&record);
6879 }
6880 }
6881 }
6882
6883 let graph = builder.build();
6884 let node_count = graph.node_count();
6885 let edge_count = graph.edge_count();
6886 stats.graph_node_count += node_count;
6887 stats.graph_edge_count += edge_count;
6888
6889 for format in &self.config.graph_export.formats {
6891 if matches!(
6892 format,
6893 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6894 ) {
6895 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6896 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6897 warn!("Failed to create approval graph output dir: {}", e);
6898 continue;
6899 }
6900 let pyg_config = PyGExportConfig::default();
6901 let exporter = PyGExporter::new(pyg_config);
6902 if let Err(e) = exporter.export(&graph, &format_dir) {
6903 warn!("Failed to export approval graph as PyG: {}", e);
6904 } else {
6905 info!(
6906 "Approval network graph exported: {} nodes, {} edges",
6907 node_count, edge_count
6908 );
6909 }
6910 }
6911 }
6912 }
6913
6914 debug!(
6917 "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6918 available when intercompany relationships are modeled as IntercompanyRelationship)"
6919 );
6920 }
6921
6922 #[allow(clippy::too_many_arguments)]
6929 fn export_hypergraph(
6930 &self,
6931 coa: &Arc<ChartOfAccounts>,
6932 entries: &[JournalEntry],
6933 document_flows: &DocumentFlowSnapshot,
6934 sourcing: &SourcingSnapshot,
6935 hr: &HrSnapshot,
6936 manufacturing: &ManufacturingSnapshot,
6937 banking: &BankingSnapshot,
6938 audit: &AuditSnapshot,
6939 financial_reporting: &FinancialReportingSnapshot,
6940 ocpm: &OcpmSnapshot,
6941 stats: &mut EnhancedGenerationStatistics,
6942 ) -> SynthResult<HypergraphExportInfo> {
6943 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6944 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6945 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6946 use datasynth_graph::models::hypergraph::AggregationStrategy;
6947
6948 let hg_settings = &self.config.graph_export.hypergraph;
6949
6950 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6952 "truncate" => AggregationStrategy::Truncate,
6953 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6954 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6955 "importance_sample" => AggregationStrategy::ImportanceSample,
6956 _ => AggregationStrategy::PoolByCounterparty,
6957 };
6958
6959 let builder_config = HypergraphConfig {
6960 max_nodes: hg_settings.max_nodes,
6961 aggregation_strategy,
6962 include_coso: hg_settings.governance_layer.include_coso,
6963 include_controls: hg_settings.governance_layer.include_controls,
6964 include_sox: hg_settings.governance_layer.include_sox,
6965 include_vendors: hg_settings.governance_layer.include_vendors,
6966 include_customers: hg_settings.governance_layer.include_customers,
6967 include_employees: hg_settings.governance_layer.include_employees,
6968 include_p2p: hg_settings.process_layer.include_p2p,
6969 include_o2c: hg_settings.process_layer.include_o2c,
6970 include_s2c: hg_settings.process_layer.include_s2c,
6971 include_h2r: hg_settings.process_layer.include_h2r,
6972 include_mfg: hg_settings.process_layer.include_mfg,
6973 include_bank: hg_settings.process_layer.include_bank,
6974 include_audit: hg_settings.process_layer.include_audit,
6975 include_r2r: hg_settings.process_layer.include_r2r,
6976 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6977 docs_per_counterparty_threshold: hg_settings
6978 .process_layer
6979 .docs_per_counterparty_threshold,
6980 include_accounts: hg_settings.accounting_layer.include_accounts,
6981 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6982 include_cross_layer_edges: hg_settings.cross_layer.enabled,
6983 };
6984
6985 let mut builder = HypergraphBuilder::new(builder_config);
6986
6987 builder.add_coso_framework();
6989
6990 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
6993 let controls = InternalControl::standard_controls();
6994 builder.add_controls(&controls);
6995 }
6996
6997 builder.add_vendors(&self.master_data.vendors);
6999 builder.add_customers(&self.master_data.customers);
7000 builder.add_employees(&self.master_data.employees);
7001
7002 builder.add_p2p_documents(
7004 &document_flows.purchase_orders,
7005 &document_flows.goods_receipts,
7006 &document_flows.vendor_invoices,
7007 &document_flows.payments,
7008 );
7009 builder.add_o2c_documents(
7010 &document_flows.sales_orders,
7011 &document_flows.deliveries,
7012 &document_flows.customer_invoices,
7013 );
7014 builder.add_s2c_documents(
7015 &sourcing.sourcing_projects,
7016 &sourcing.qualifications,
7017 &sourcing.rfx_events,
7018 &sourcing.bids,
7019 &sourcing.bid_evaluations,
7020 &sourcing.contracts,
7021 );
7022 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
7023 builder.add_mfg_documents(
7024 &manufacturing.production_orders,
7025 &manufacturing.quality_inspections,
7026 &manufacturing.cycle_counts,
7027 );
7028 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
7029 builder.add_audit_documents(
7030 &audit.engagements,
7031 &audit.workpapers,
7032 &audit.findings,
7033 &audit.evidence,
7034 &audit.risk_assessments,
7035 &audit.judgments,
7036 );
7037 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
7038
7039 if let Some(ref event_log) = ocpm.event_log {
7041 builder.add_ocpm_events(event_log);
7042 }
7043
7044 builder.add_accounts(coa);
7046 builder.add_journal_entries_as_hyperedges(entries);
7047
7048 let hypergraph = builder.build();
7050
7051 let output_dir = self
7053 .output_path
7054 .clone()
7055 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7056 let hg_dir = output_dir
7057 .join(&self.config.graph_export.output_subdirectory)
7058 .join(&hg_settings.output_subdirectory);
7059
7060 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
7062 "unified" => {
7063 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7064 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7065 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
7066 })?;
7067 (
7068 metadata.num_nodes,
7069 metadata.num_edges,
7070 metadata.num_hyperedges,
7071 )
7072 }
7073 _ => {
7074 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7076 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7077 SynthError::generation(format!("Hypergraph export failed: {}", e))
7078 })?;
7079 (
7080 metadata.num_nodes,
7081 metadata.num_edges,
7082 metadata.num_hyperedges,
7083 )
7084 }
7085 };
7086
7087 #[cfg(feature = "streaming")]
7089 if let Some(ref target_url) = hg_settings.stream_target {
7090 use crate::stream_client::{StreamClient, StreamConfig};
7091 use std::io::Write as _;
7092
7093 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7094 let stream_config = StreamConfig {
7095 target_url: target_url.clone(),
7096 batch_size: hg_settings.stream_batch_size,
7097 api_key,
7098 ..StreamConfig::default()
7099 };
7100
7101 match StreamClient::new(stream_config) {
7102 Ok(mut client) => {
7103 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7104 match exporter.export_to_writer(&hypergraph, &mut client) {
7105 Ok(_) => {
7106 if let Err(e) = client.flush() {
7107 warn!("Failed to flush stream client: {}", e);
7108 } else {
7109 info!("Streamed {} records to {}", client.total_sent(), target_url);
7110 }
7111 }
7112 Err(e) => {
7113 warn!("Streaming export failed: {}", e);
7114 }
7115 }
7116 }
7117 Err(e) => {
7118 warn!("Failed to create stream client: {}", e);
7119 }
7120 }
7121 }
7122
7123 stats.graph_node_count += num_nodes;
7125 stats.graph_edge_count += num_edges;
7126 stats.graph_export_count += 1;
7127
7128 Ok(HypergraphExportInfo {
7129 node_count: num_nodes,
7130 edge_count: num_edges,
7131 hyperedge_count: num_hyperedges,
7132 output_path: hg_dir,
7133 })
7134 }
7135
7136 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7141 let pb = self.create_progress_bar(100, "Generating Banking Data");
7142
7143 let orchestrator = BankingOrchestratorBuilder::new()
7145 .config(self.config.banking.clone())
7146 .seed(self.seed + 9000)
7147 .country_pack(self.primary_pack().clone())
7148 .build();
7149
7150 if let Some(pb) = &pb {
7151 pb.inc(10);
7152 }
7153
7154 let result = orchestrator.generate();
7156
7157 if let Some(pb) = &pb {
7158 pb.inc(90);
7159 pb.finish_with_message(format!(
7160 "Banking: {} customers, {} transactions",
7161 result.customers.len(),
7162 result.transactions.len()
7163 ));
7164 }
7165
7166 let mut banking_customers = result.customers;
7171 let core_customers = &self.master_data.customers;
7172 if !core_customers.is_empty() {
7173 for (i, bc) in banking_customers.iter_mut().enumerate() {
7174 let core = &core_customers[i % core_customers.len()];
7175 bc.name = CustomerName::business(&core.name);
7176 bc.residence_country = core.country.clone();
7177 bc.enterprise_customer_id = Some(core.customer_id.clone());
7178 }
7179 debug!(
7180 "Cross-referenced {} banking customers with {} core customers",
7181 banking_customers.len(),
7182 core_customers.len()
7183 );
7184 }
7185
7186 Ok(BankingSnapshot {
7187 customers: banking_customers,
7188 accounts: result.accounts,
7189 transactions: result.transactions,
7190 transaction_labels: result.transaction_labels,
7191 customer_labels: result.customer_labels,
7192 account_labels: result.account_labels,
7193 relationship_labels: result.relationship_labels,
7194 narratives: result.narratives,
7195 suspicious_count: result.stats.suspicious_count,
7196 scenario_count: result.scenarios.len(),
7197 })
7198 }
7199
7200 fn calculate_total_transactions(&self) -> u64 {
7202 let months = self.config.global.period_months as f64;
7203 self.config
7204 .companies
7205 .iter()
7206 .map(|c| {
7207 let annual = c.annual_transaction_volume.count() as f64;
7208 let weighted = annual * c.volume_weight;
7209 (weighted * months / 12.0) as u64
7210 })
7211 .sum()
7212 }
7213
7214 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7216 if !self.phase_config.show_progress {
7217 return None;
7218 }
7219
7220 let pb = if let Some(mp) = &self.multi_progress {
7221 mp.add(ProgressBar::new(total))
7222 } else {
7223 ProgressBar::new(total)
7224 };
7225
7226 pb.set_style(
7227 ProgressStyle::default_bar()
7228 .template(&format!(
7229 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7230 message
7231 ))
7232 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7233 .progress_chars("#>-"),
7234 );
7235
7236 Some(pb)
7237 }
7238
7239 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7241 self.coa.clone()
7242 }
7243
7244 pub fn get_master_data(&self) -> &MasterDataSnapshot {
7246 &self.master_data
7247 }
7248
7249 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7251 use super::lineage::LineageGraphBuilder;
7252
7253 let mut builder = LineageGraphBuilder::new();
7254
7255 builder.add_config_section("config:global", "Global Config");
7257 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7258 builder.add_config_section("config:transactions", "Transaction Config");
7259
7260 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7262 builder.add_generator_phase("phase:je", "Journal Entry Generation");
7263
7264 builder.configured_by("phase:coa", "config:chart_of_accounts");
7266 builder.configured_by("phase:je", "config:transactions");
7267
7268 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7270 builder.produced_by("output:je", "phase:je");
7271
7272 if self.phase_config.generate_master_data {
7274 builder.add_config_section("config:master_data", "Master Data Config");
7275 builder.add_generator_phase("phase:master_data", "Master Data Generation");
7276 builder.configured_by("phase:master_data", "config:master_data");
7277 builder.input_to("phase:master_data", "phase:je");
7278 }
7279
7280 if self.phase_config.generate_document_flows {
7281 builder.add_config_section("config:document_flows", "Document Flow Config");
7282 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7283 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7284 builder.configured_by("phase:p2p", "config:document_flows");
7285 builder.configured_by("phase:o2c", "config:document_flows");
7286
7287 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7288 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7289 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7290 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7291 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7292
7293 builder.produced_by("output:po", "phase:p2p");
7294 builder.produced_by("output:gr", "phase:p2p");
7295 builder.produced_by("output:vi", "phase:p2p");
7296 builder.produced_by("output:so", "phase:o2c");
7297 builder.produced_by("output:ci", "phase:o2c");
7298 }
7299
7300 if self.phase_config.inject_anomalies {
7301 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7302 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7303 builder.configured_by("phase:anomaly", "config:fraud");
7304 builder.add_output_file(
7305 "output:labels",
7306 "Anomaly Labels",
7307 "labels/anomaly_labels.csv",
7308 );
7309 builder.produced_by("output:labels", "phase:anomaly");
7310 }
7311
7312 if self.phase_config.generate_audit {
7313 builder.add_config_section("config:audit", "Audit Config");
7314 builder.add_generator_phase("phase:audit", "Audit Data Generation");
7315 builder.configured_by("phase:audit", "config:audit");
7316 }
7317
7318 if self.phase_config.generate_banking {
7319 builder.add_config_section("config:banking", "Banking Config");
7320 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7321 builder.configured_by("phase:banking", "config:banking");
7322 }
7323
7324 if self.config.llm.enabled {
7325 builder.add_config_section("config:llm", "LLM Enrichment Config");
7326 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7327 builder.configured_by("phase:llm_enrichment", "config:llm");
7328 }
7329
7330 if self.config.diffusion.enabled {
7331 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7332 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7333 builder.configured_by("phase:diffusion", "config:diffusion");
7334 }
7335
7336 if self.config.causal.enabled {
7337 builder.add_config_section("config:causal", "Causal Generation Config");
7338 builder.add_generator_phase("phase:causal", "Causal Overlay");
7339 builder.configured_by("phase:causal", "config:causal");
7340 }
7341
7342 builder.build()
7343 }
7344}
7345
7346fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7348 match format {
7349 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7350 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7351 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7352 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7353 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7354 }
7355}
7356
7357#[cfg(test)]
7358#[allow(clippy::unwrap_used)]
7359mod tests {
7360 use super::*;
7361 use datasynth_config::schema::*;
7362
7363 fn create_test_config() -> GeneratorConfig {
7364 GeneratorConfig {
7365 global: GlobalConfig {
7366 industry: IndustrySector::Manufacturing,
7367 start_date: "2024-01-01".to_string(),
7368 period_months: 1,
7369 seed: Some(42),
7370 parallel: false,
7371 group_currency: "USD".to_string(),
7372 worker_threads: 0,
7373 memory_limit_mb: 0,
7374 },
7375 companies: vec![CompanyConfig {
7376 code: "1000".to_string(),
7377 name: "Test Company".to_string(),
7378 currency: "USD".to_string(),
7379 country: "US".to_string(),
7380 annual_transaction_volume: TransactionVolume::TenK,
7381 volume_weight: 1.0,
7382 fiscal_year_variant: "K4".to_string(),
7383 }],
7384 chart_of_accounts: ChartOfAccountsConfig {
7385 complexity: CoAComplexity::Small,
7386 industry_specific: true,
7387 custom_accounts: None,
7388 min_hierarchy_depth: 2,
7389 max_hierarchy_depth: 4,
7390 },
7391 transactions: TransactionConfig::default(),
7392 output: OutputConfig::default(),
7393 fraud: FraudConfig::default(),
7394 internal_controls: InternalControlsConfig::default(),
7395 business_processes: BusinessProcessConfig::default(),
7396 user_personas: UserPersonaConfig::default(),
7397 templates: TemplateConfig::default(),
7398 approval: ApprovalConfig::default(),
7399 departments: DepartmentConfig::default(),
7400 master_data: MasterDataConfig::default(),
7401 document_flows: DocumentFlowConfig::default(),
7402 intercompany: IntercompanyConfig::default(),
7403 balance: BalanceConfig::default(),
7404 ocpm: OcpmConfig::default(),
7405 audit: AuditGenerationConfig::default(),
7406 banking: datasynth_banking::BankingConfig::default(),
7407 data_quality: DataQualitySchemaConfig::default(),
7408 scenario: ScenarioConfig::default(),
7409 temporal: TemporalDriftConfig::default(),
7410 graph_export: GraphExportConfig::default(),
7411 streaming: StreamingSchemaConfig::default(),
7412 rate_limit: RateLimitSchemaConfig::default(),
7413 temporal_attributes: TemporalAttributeSchemaConfig::default(),
7414 relationships: RelationshipSchemaConfig::default(),
7415 accounting_standards: AccountingStandardsConfig::default(),
7416 audit_standards: AuditStandardsConfig::default(),
7417 distributions: Default::default(),
7418 temporal_patterns: Default::default(),
7419 vendor_network: VendorNetworkSchemaConfig::default(),
7420 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7421 relationship_strength: RelationshipStrengthSchemaConfig::default(),
7422 cross_process_links: CrossProcessLinksSchemaConfig::default(),
7423 organizational_events: OrganizationalEventsSchemaConfig::default(),
7424 behavioral_drift: BehavioralDriftSchemaConfig::default(),
7425 market_drift: MarketDriftSchemaConfig::default(),
7426 drift_labeling: DriftLabelingSchemaConfig::default(),
7427 anomaly_injection: Default::default(),
7428 industry_specific: Default::default(),
7429 fingerprint_privacy: Default::default(),
7430 quality_gates: Default::default(),
7431 compliance: Default::default(),
7432 webhooks: Default::default(),
7433 llm: Default::default(),
7434 diffusion: Default::default(),
7435 causal: Default::default(),
7436 source_to_pay: Default::default(),
7437 financial_reporting: Default::default(),
7438 hr: Default::default(),
7439 manufacturing: Default::default(),
7440 sales_quotes: Default::default(),
7441 tax: Default::default(),
7442 treasury: Default::default(),
7443 project_accounting: Default::default(),
7444 esg: Default::default(),
7445 country_packs: None,
7446 }
7447 }
7448
7449 #[test]
7450 fn test_enhanced_orchestrator_creation() {
7451 let config = create_test_config();
7452 let orchestrator = EnhancedOrchestrator::with_defaults(config);
7453 assert!(orchestrator.is_ok());
7454 }
7455
7456 #[test]
7457 fn test_minimal_generation() {
7458 let config = create_test_config();
7459 let phase_config = PhaseConfig {
7460 generate_master_data: false,
7461 generate_document_flows: false,
7462 generate_journal_entries: true,
7463 inject_anomalies: false,
7464 show_progress: false,
7465 ..Default::default()
7466 };
7467
7468 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7469 let result = orchestrator.generate();
7470
7471 assert!(result.is_ok());
7472 let result = result.unwrap();
7473 assert!(!result.journal_entries.is_empty());
7474 }
7475
7476 #[test]
7477 fn test_master_data_generation() {
7478 let config = create_test_config();
7479 let phase_config = PhaseConfig {
7480 generate_master_data: true,
7481 generate_document_flows: false,
7482 generate_journal_entries: false,
7483 inject_anomalies: false,
7484 show_progress: false,
7485 vendors_per_company: 5,
7486 customers_per_company: 5,
7487 materials_per_company: 10,
7488 assets_per_company: 5,
7489 employees_per_company: 10,
7490 ..Default::default()
7491 };
7492
7493 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7494 let result = orchestrator.generate().unwrap();
7495
7496 assert!(!result.master_data.vendors.is_empty());
7497 assert!(!result.master_data.customers.is_empty());
7498 assert!(!result.master_data.materials.is_empty());
7499 }
7500
7501 #[test]
7502 fn test_document_flow_generation() {
7503 let config = create_test_config();
7504 let phase_config = PhaseConfig {
7505 generate_master_data: true,
7506 generate_document_flows: true,
7507 generate_journal_entries: false,
7508 inject_anomalies: false,
7509 inject_data_quality: false,
7510 validate_balances: false,
7511 generate_ocpm_events: false,
7512 show_progress: false,
7513 vendors_per_company: 5,
7514 customers_per_company: 5,
7515 materials_per_company: 10,
7516 assets_per_company: 5,
7517 employees_per_company: 10,
7518 p2p_chains: 5,
7519 o2c_chains: 5,
7520 ..Default::default()
7521 };
7522
7523 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7524 let result = orchestrator.generate().unwrap();
7525
7526 assert!(!result.document_flows.p2p_chains.is_empty());
7528 assert!(!result.document_flows.o2c_chains.is_empty());
7529
7530 assert!(!result.document_flows.purchase_orders.is_empty());
7532 assert!(!result.document_flows.sales_orders.is_empty());
7533 }
7534
7535 #[test]
7536 fn test_anomaly_injection() {
7537 let config = create_test_config();
7538 let phase_config = PhaseConfig {
7539 generate_master_data: false,
7540 generate_document_flows: false,
7541 generate_journal_entries: true,
7542 inject_anomalies: true,
7543 show_progress: false,
7544 ..Default::default()
7545 };
7546
7547 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7548 let result = orchestrator.generate().unwrap();
7549
7550 assert!(!result.journal_entries.is_empty());
7552
7553 assert!(result.anomaly_labels.summary.is_some());
7556 }
7557
7558 #[test]
7559 fn test_full_generation_pipeline() {
7560 let config = create_test_config();
7561 let phase_config = PhaseConfig {
7562 generate_master_data: true,
7563 generate_document_flows: true,
7564 generate_journal_entries: true,
7565 inject_anomalies: false,
7566 inject_data_quality: false,
7567 validate_balances: true,
7568 generate_ocpm_events: false,
7569 show_progress: false,
7570 vendors_per_company: 3,
7571 customers_per_company: 3,
7572 materials_per_company: 5,
7573 assets_per_company: 3,
7574 employees_per_company: 5,
7575 p2p_chains: 3,
7576 o2c_chains: 3,
7577 ..Default::default()
7578 };
7579
7580 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7581 let result = orchestrator.generate().unwrap();
7582
7583 assert!(!result.master_data.vendors.is_empty());
7585 assert!(!result.master_data.customers.is_empty());
7586 assert!(!result.document_flows.p2p_chains.is_empty());
7587 assert!(!result.document_flows.o2c_chains.is_empty());
7588 assert!(!result.journal_entries.is_empty());
7589 assert!(result.statistics.accounts_count > 0);
7590
7591 assert!(!result.subledger.ap_invoices.is_empty());
7593 assert!(!result.subledger.ar_invoices.is_empty());
7594
7595 assert!(result.balance_validation.validated);
7597 assert!(result.balance_validation.entries_processed > 0);
7598 }
7599
7600 #[test]
7601 fn test_subledger_linking() {
7602 let config = create_test_config();
7603 let phase_config = PhaseConfig {
7604 generate_master_data: true,
7605 generate_document_flows: true,
7606 generate_journal_entries: false,
7607 inject_anomalies: false,
7608 inject_data_quality: false,
7609 validate_balances: false,
7610 generate_ocpm_events: false,
7611 show_progress: false,
7612 vendors_per_company: 5,
7613 customers_per_company: 5,
7614 materials_per_company: 10,
7615 assets_per_company: 3,
7616 employees_per_company: 5,
7617 p2p_chains: 5,
7618 o2c_chains: 5,
7619 ..Default::default()
7620 };
7621
7622 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7623 let result = orchestrator.generate().unwrap();
7624
7625 assert!(!result.document_flows.vendor_invoices.is_empty());
7627 assert!(!result.document_flows.customer_invoices.is_empty());
7628
7629 assert!(!result.subledger.ap_invoices.is_empty());
7631 assert!(!result.subledger.ar_invoices.is_empty());
7632
7633 assert_eq!(
7635 result.subledger.ap_invoices.len(),
7636 result.document_flows.vendor_invoices.len()
7637 );
7638
7639 assert_eq!(
7641 result.subledger.ar_invoices.len(),
7642 result.document_flows.customer_invoices.len()
7643 );
7644
7645 assert_eq!(
7647 result.statistics.ap_invoice_count,
7648 result.subledger.ap_invoices.len()
7649 );
7650 assert_eq!(
7651 result.statistics.ar_invoice_count,
7652 result.subledger.ar_invoices.len()
7653 );
7654 }
7655
7656 #[test]
7657 fn test_balance_validation() {
7658 let config = create_test_config();
7659 let phase_config = PhaseConfig {
7660 generate_master_data: false,
7661 generate_document_flows: false,
7662 generate_journal_entries: true,
7663 inject_anomalies: false,
7664 validate_balances: true,
7665 show_progress: false,
7666 ..Default::default()
7667 };
7668
7669 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7670 let result = orchestrator.generate().unwrap();
7671
7672 assert!(result.balance_validation.validated);
7674 assert!(result.balance_validation.entries_processed > 0);
7675
7676 assert!(!result.balance_validation.has_unbalanced_entries);
7678
7679 assert_eq!(
7681 result.balance_validation.total_debits,
7682 result.balance_validation.total_credits
7683 );
7684 }
7685
7686 #[test]
7687 fn test_statistics_accuracy() {
7688 let config = create_test_config();
7689 let phase_config = PhaseConfig {
7690 generate_master_data: true,
7691 generate_document_flows: false,
7692 generate_journal_entries: true,
7693 inject_anomalies: false,
7694 show_progress: false,
7695 vendors_per_company: 10,
7696 customers_per_company: 20,
7697 materials_per_company: 15,
7698 assets_per_company: 5,
7699 employees_per_company: 8,
7700 ..Default::default()
7701 };
7702
7703 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7704 let result = orchestrator.generate().unwrap();
7705
7706 assert_eq!(
7708 result.statistics.vendor_count,
7709 result.master_data.vendors.len()
7710 );
7711 assert_eq!(
7712 result.statistics.customer_count,
7713 result.master_data.customers.len()
7714 );
7715 assert_eq!(
7716 result.statistics.material_count,
7717 result.master_data.materials.len()
7718 );
7719 assert_eq!(
7720 result.statistics.total_entries as usize,
7721 result.journal_entries.len()
7722 );
7723 }
7724
7725 #[test]
7726 fn test_phase_config_defaults() {
7727 let config = PhaseConfig::default();
7728 assert!(config.generate_master_data);
7729 assert!(config.generate_document_flows);
7730 assert!(config.generate_journal_entries);
7731 assert!(!config.inject_anomalies);
7732 assert!(config.validate_balances);
7733 assert!(config.show_progress);
7734 assert!(config.vendors_per_company > 0);
7735 assert!(config.customers_per_company > 0);
7736 }
7737
7738 #[test]
7739 fn test_get_coa_before_generation() {
7740 let config = create_test_config();
7741 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7742
7743 assert!(orchestrator.get_coa().is_none());
7745 }
7746
7747 #[test]
7748 fn test_get_coa_after_generation() {
7749 let config = create_test_config();
7750 let phase_config = PhaseConfig {
7751 generate_master_data: false,
7752 generate_document_flows: false,
7753 generate_journal_entries: true,
7754 inject_anomalies: false,
7755 show_progress: false,
7756 ..Default::default()
7757 };
7758
7759 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7760 let _ = orchestrator.generate().unwrap();
7761
7762 assert!(orchestrator.get_coa().is_some());
7764 }
7765
7766 #[test]
7767 fn test_get_master_data() {
7768 let config = create_test_config();
7769 let phase_config = PhaseConfig {
7770 generate_master_data: true,
7771 generate_document_flows: false,
7772 generate_journal_entries: false,
7773 inject_anomalies: false,
7774 show_progress: false,
7775 vendors_per_company: 5,
7776 customers_per_company: 5,
7777 materials_per_company: 5,
7778 assets_per_company: 5,
7779 employees_per_company: 5,
7780 ..Default::default()
7781 };
7782
7783 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7784 let _ = orchestrator.generate().unwrap();
7785
7786 let master_data = orchestrator.get_master_data();
7787 assert!(!master_data.vendors.is_empty());
7788 }
7789
7790 #[test]
7791 fn test_with_progress_builder() {
7792 let config = create_test_config();
7793 let orchestrator = EnhancedOrchestrator::with_defaults(config)
7794 .unwrap()
7795 .with_progress(false);
7796
7797 assert!(!orchestrator.phase_config.show_progress);
7799 }
7800
7801 #[test]
7802 fn test_multi_company_generation() {
7803 let mut config = create_test_config();
7804 config.companies.push(CompanyConfig {
7805 code: "2000".to_string(),
7806 name: "Subsidiary".to_string(),
7807 currency: "EUR".to_string(),
7808 country: "DE".to_string(),
7809 annual_transaction_volume: TransactionVolume::TenK,
7810 volume_weight: 0.5,
7811 fiscal_year_variant: "K4".to_string(),
7812 });
7813
7814 let phase_config = PhaseConfig {
7815 generate_master_data: true,
7816 generate_document_flows: false,
7817 generate_journal_entries: true,
7818 inject_anomalies: false,
7819 show_progress: false,
7820 vendors_per_company: 5,
7821 customers_per_company: 5,
7822 materials_per_company: 5,
7823 assets_per_company: 5,
7824 employees_per_company: 5,
7825 ..Default::default()
7826 };
7827
7828 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7829 let result = orchestrator.generate().unwrap();
7830
7831 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
7834 assert!(result.statistics.companies_count == 2);
7835 }
7836
7837 #[test]
7838 fn test_empty_master_data_skips_document_flows() {
7839 let config = create_test_config();
7840 let phase_config = PhaseConfig {
7841 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
7844 inject_anomalies: false,
7845 show_progress: false,
7846 ..Default::default()
7847 };
7848
7849 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7850 let result = orchestrator.generate().unwrap();
7851
7852 assert!(result.document_flows.p2p_chains.is_empty());
7854 assert!(result.document_flows.o2c_chains.is_empty());
7855 }
7856
7857 #[test]
7858 fn test_journal_entry_line_item_count() {
7859 let config = create_test_config();
7860 let phase_config = PhaseConfig {
7861 generate_master_data: false,
7862 generate_document_flows: false,
7863 generate_journal_entries: true,
7864 inject_anomalies: false,
7865 show_progress: false,
7866 ..Default::default()
7867 };
7868
7869 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7870 let result = orchestrator.generate().unwrap();
7871
7872 let calculated_line_items: u64 = result
7874 .journal_entries
7875 .iter()
7876 .map(|e| e.line_count() as u64)
7877 .sum();
7878 assert_eq!(result.statistics.total_line_items, calculated_line_items);
7879 }
7880
7881 #[test]
7882 fn test_audit_generation() {
7883 let config = create_test_config();
7884 let phase_config = PhaseConfig {
7885 generate_master_data: false,
7886 generate_document_flows: false,
7887 generate_journal_entries: true,
7888 inject_anomalies: false,
7889 show_progress: false,
7890 generate_audit: true,
7891 audit_engagements: 2,
7892 workpapers_per_engagement: 5,
7893 evidence_per_workpaper: 2,
7894 risks_per_engagement: 3,
7895 findings_per_engagement: 2,
7896 judgments_per_engagement: 2,
7897 ..Default::default()
7898 };
7899
7900 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7901 let result = orchestrator.generate().unwrap();
7902
7903 assert_eq!(result.audit.engagements.len(), 2);
7905 assert!(!result.audit.workpapers.is_empty());
7906 assert!(!result.audit.evidence.is_empty());
7907 assert!(!result.audit.risk_assessments.is_empty());
7908 assert!(!result.audit.findings.is_empty());
7909 assert!(!result.audit.judgments.is_empty());
7910
7911 assert_eq!(
7913 result.statistics.audit_engagement_count,
7914 result.audit.engagements.len()
7915 );
7916 assert_eq!(
7917 result.statistics.audit_workpaper_count,
7918 result.audit.workpapers.len()
7919 );
7920 assert_eq!(
7921 result.statistics.audit_evidence_count,
7922 result.audit.evidence.len()
7923 );
7924 assert_eq!(
7925 result.statistics.audit_risk_count,
7926 result.audit.risk_assessments.len()
7927 );
7928 assert_eq!(
7929 result.statistics.audit_finding_count,
7930 result.audit.findings.len()
7931 );
7932 assert_eq!(
7933 result.statistics.audit_judgment_count,
7934 result.audit.judgments.len()
7935 );
7936 }
7937
7938 #[test]
7939 fn test_new_phases_disabled_by_default() {
7940 let config = create_test_config();
7941 assert!(!config.llm.enabled);
7943 assert!(!config.diffusion.enabled);
7944 assert!(!config.causal.enabled);
7945
7946 let phase_config = PhaseConfig {
7947 generate_master_data: false,
7948 generate_document_flows: false,
7949 generate_journal_entries: true,
7950 inject_anomalies: false,
7951 show_progress: false,
7952 ..Default::default()
7953 };
7954
7955 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7956 let result = orchestrator.generate().unwrap();
7957
7958 assert_eq!(result.statistics.llm_enrichment_ms, 0);
7960 assert_eq!(result.statistics.llm_vendors_enriched, 0);
7961 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7962 assert_eq!(result.statistics.diffusion_samples_generated, 0);
7963 assert_eq!(result.statistics.causal_generation_ms, 0);
7964 assert_eq!(result.statistics.causal_samples_generated, 0);
7965 assert!(result.statistics.causal_validation_passed.is_none());
7966 }
7967
7968 #[test]
7969 fn test_llm_enrichment_enabled() {
7970 let mut config = create_test_config();
7971 config.llm.enabled = true;
7972 config.llm.max_vendor_enrichments = 3;
7973
7974 let phase_config = PhaseConfig {
7975 generate_master_data: true,
7976 generate_document_flows: false,
7977 generate_journal_entries: false,
7978 inject_anomalies: false,
7979 show_progress: false,
7980 vendors_per_company: 5,
7981 customers_per_company: 3,
7982 materials_per_company: 3,
7983 assets_per_company: 3,
7984 employees_per_company: 3,
7985 ..Default::default()
7986 };
7987
7988 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7989 let result = orchestrator.generate().unwrap();
7990
7991 assert!(result.statistics.llm_vendors_enriched > 0);
7993 assert!(result.statistics.llm_vendors_enriched <= 3);
7994 }
7995
7996 #[test]
7997 fn test_diffusion_enhancement_enabled() {
7998 let mut config = create_test_config();
7999 config.diffusion.enabled = true;
8000 config.diffusion.n_steps = 50;
8001 config.diffusion.sample_size = 20;
8002
8003 let phase_config = PhaseConfig {
8004 generate_master_data: false,
8005 generate_document_flows: false,
8006 generate_journal_entries: true,
8007 inject_anomalies: false,
8008 show_progress: false,
8009 ..Default::default()
8010 };
8011
8012 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8013 let result = orchestrator.generate().unwrap();
8014
8015 assert_eq!(result.statistics.diffusion_samples_generated, 20);
8017 }
8018
8019 #[test]
8020 fn test_causal_overlay_enabled() {
8021 let mut config = create_test_config();
8022 config.causal.enabled = true;
8023 config.causal.template = "fraud_detection".to_string();
8024 config.causal.sample_size = 100;
8025 config.causal.validate = true;
8026
8027 let phase_config = PhaseConfig {
8028 generate_master_data: false,
8029 generate_document_flows: false,
8030 generate_journal_entries: true,
8031 inject_anomalies: false,
8032 show_progress: false,
8033 ..Default::default()
8034 };
8035
8036 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8037 let result = orchestrator.generate().unwrap();
8038
8039 assert_eq!(result.statistics.causal_samples_generated, 100);
8041 assert!(result.statistics.causal_validation_passed.is_some());
8043 }
8044
8045 #[test]
8046 fn test_causal_overlay_revenue_cycle_template() {
8047 let mut config = create_test_config();
8048 config.causal.enabled = true;
8049 config.causal.template = "revenue_cycle".to_string();
8050 config.causal.sample_size = 50;
8051 config.causal.validate = false;
8052
8053 let phase_config = PhaseConfig {
8054 generate_master_data: false,
8055 generate_document_flows: false,
8056 generate_journal_entries: true,
8057 inject_anomalies: false,
8058 show_progress: false,
8059 ..Default::default()
8060 };
8061
8062 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8063 let result = orchestrator.generate().unwrap();
8064
8065 assert_eq!(result.statistics.causal_samples_generated, 50);
8067 assert!(result.statistics.causal_validation_passed.is_none());
8069 }
8070
8071 #[test]
8072 fn test_all_new_phases_enabled_together() {
8073 let mut config = create_test_config();
8074 config.llm.enabled = true;
8075 config.llm.max_vendor_enrichments = 2;
8076 config.diffusion.enabled = true;
8077 config.diffusion.n_steps = 20;
8078 config.diffusion.sample_size = 10;
8079 config.causal.enabled = true;
8080 config.causal.sample_size = 50;
8081 config.causal.validate = true;
8082
8083 let phase_config = PhaseConfig {
8084 generate_master_data: true,
8085 generate_document_flows: false,
8086 generate_journal_entries: true,
8087 inject_anomalies: false,
8088 show_progress: false,
8089 vendors_per_company: 5,
8090 customers_per_company: 3,
8091 materials_per_company: 3,
8092 assets_per_company: 3,
8093 employees_per_company: 3,
8094 ..Default::default()
8095 };
8096
8097 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8098 let result = orchestrator.generate().unwrap();
8099
8100 assert!(result.statistics.llm_vendors_enriched > 0);
8102 assert_eq!(result.statistics.diffusion_samples_generated, 10);
8103 assert_eq!(result.statistics.causal_samples_generated, 50);
8104 assert!(result.statistics.causal_validation_passed.is_some());
8105 }
8106
8107 #[test]
8108 fn test_statistics_serialization_with_new_fields() {
8109 let stats = EnhancedGenerationStatistics {
8110 total_entries: 100,
8111 total_line_items: 500,
8112 llm_enrichment_ms: 42,
8113 llm_vendors_enriched: 10,
8114 diffusion_enhancement_ms: 100,
8115 diffusion_samples_generated: 50,
8116 causal_generation_ms: 200,
8117 causal_samples_generated: 100,
8118 causal_validation_passed: Some(true),
8119 ..Default::default()
8120 };
8121
8122 let json = serde_json::to_string(&stats).unwrap();
8123 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8124
8125 assert_eq!(deserialized.llm_enrichment_ms, 42);
8126 assert_eq!(deserialized.llm_vendors_enriched, 10);
8127 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8128 assert_eq!(deserialized.diffusion_samples_generated, 50);
8129 assert_eq!(deserialized.causal_generation_ms, 200);
8130 assert_eq!(deserialized.causal_samples_generated, 100);
8131 assert_eq!(deserialized.causal_validation_passed, Some(true));
8132 }
8133
8134 #[test]
8135 fn test_statistics_backward_compat_deserialization() {
8136 let old_json = r#"{
8138 "total_entries": 100,
8139 "total_line_items": 500,
8140 "accounts_count": 50,
8141 "companies_count": 1,
8142 "period_months": 12,
8143 "vendor_count": 10,
8144 "customer_count": 20,
8145 "material_count": 15,
8146 "asset_count": 5,
8147 "employee_count": 8,
8148 "p2p_chain_count": 5,
8149 "o2c_chain_count": 5,
8150 "ap_invoice_count": 5,
8151 "ar_invoice_count": 5,
8152 "ocpm_event_count": 0,
8153 "ocpm_object_count": 0,
8154 "ocpm_case_count": 0,
8155 "audit_engagement_count": 0,
8156 "audit_workpaper_count": 0,
8157 "audit_evidence_count": 0,
8158 "audit_risk_count": 0,
8159 "audit_finding_count": 0,
8160 "audit_judgment_count": 0,
8161 "anomalies_injected": 0,
8162 "data_quality_issues": 0,
8163 "banking_customer_count": 0,
8164 "banking_account_count": 0,
8165 "banking_transaction_count": 0,
8166 "banking_suspicious_count": 0,
8167 "graph_export_count": 0,
8168 "graph_node_count": 0,
8169 "graph_edge_count": 0
8170 }"#;
8171
8172 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8173
8174 assert_eq!(stats.llm_enrichment_ms, 0);
8176 assert_eq!(stats.llm_vendors_enriched, 0);
8177 assert_eq!(stats.diffusion_enhancement_ms, 0);
8178 assert_eq!(stats.diffusion_samples_generated, 0);
8179 assert_eq!(stats.causal_generation_ms, 0);
8180 assert_eq!(stats.causal_samples_generated, 0);
8181 assert!(stats.causal_validation_passed.is_none());
8182 }
8183}