1use std::collections::HashMap;
22use std::path::PathBuf;
23use std::sync::Arc;
24
25use chrono::{Datelike, NaiveDate};
26use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
27use rand::SeedableRng;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, info, warn};
30
31use datasynth_banking::{
32 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
33 BankingOrchestratorBuilder,
34};
35use datasynth_config::schema::GeneratorConfig;
36use datasynth_core::error::{SynthError, SynthResult};
37use datasynth_core::models::audit::{
38 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
39};
40use datasynth_core::models::sourcing::{
41 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
42 SupplierBid, SupplierQualification, SupplierScorecard,
43};
44use datasynth_core::models::subledger::ap::APInvoice;
45use datasynth_core::models::subledger::ar::ARInvoice;
46use datasynth_core::models::*;
47use datasynth_core::traits::Generator;
48use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
49use datasynth_fingerprint::{
50 io::FingerprintReader,
51 models::Fingerprint,
52 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
53};
54use datasynth_generators::{
55 AnomalyInjector,
57 AnomalyInjectorConfig,
58 AssetGenerator,
59 AuditEngagementGenerator,
61 BalanceTrackerConfig,
62 BankReconciliationGenerator,
64 BidEvaluationGenerator,
66 BidGenerator,
67 CatalogGenerator,
68 ChartOfAccountsGenerator,
70 ContractGenerator,
71 CustomerGenerator,
72 DataQualityConfig,
73 DataQualityInjector,
75 DataQualityStats,
76 DocumentFlowJeConfig,
78 DocumentFlowJeGenerator,
79 DocumentFlowLinker,
81 EmployeeGenerator,
82 EsgAnomalyLabel,
84 EvidenceGenerator,
85 FinancialStatementGenerator,
87 FindingGenerator,
88 JournalEntryGenerator,
89 JudgmentGenerator,
90 LatePaymentDistribution,
91 MaterialGenerator,
92 O2CDocumentChain,
93 O2CGenerator,
94 O2CGeneratorConfig,
95 O2CPaymentBehavior,
96 P2PDocumentChain,
97 P2PGenerator,
99 P2PGeneratorConfig,
100 P2PPaymentBehavior,
101 PaymentReference,
102 QualificationGenerator,
103 RfxGenerator,
104 RiskAssessmentGenerator,
105 RunningBalanceTracker,
107 ScorecardGenerator,
108 SourcingProjectGenerator,
109 SpendAnalysisGenerator,
110 ValidationError,
111 VendorGenerator,
113 WorkpaperGenerator,
114};
115use datasynth_graph::{
116 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
117 PyGExportConfig, PyGExporter, TransactionGraphBuilder, TransactionGraphConfig,
118};
119use datasynth_ocpm::{
120 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
121 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
122 OcpmUuidFactory, P2pDocuments, S2cDocuments,
123};
124
125use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
126use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
127use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
128use datasynth_core::llm::MockLlmProvider;
129use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
130use datasynth_core::models::documents::PaymentMethod;
131use datasynth_core::models::IndustrySector;
132use datasynth_generators::coa_generator::CoAFramework;
133use datasynth_generators::llm_enrichment::VendorLlmEnricher;
134use rayon::prelude::*;
135
136fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
142 let payment_behavior = &schema_config.payment_behavior;
143 let late_dist = &payment_behavior.late_payment_days_distribution;
144
145 P2PGeneratorConfig {
146 three_way_match_rate: schema_config.three_way_match_rate,
147 partial_delivery_rate: schema_config.partial_delivery_rate,
148 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
149 price_variance_rate: schema_config.price_variance_rate,
150 max_price_variance_percent: schema_config.max_price_variance_percent,
151 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
152 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
153 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
154 payment_method_distribution: vec![
155 (PaymentMethod::BankTransfer, 0.60),
156 (PaymentMethod::Check, 0.25),
157 (PaymentMethod::Wire, 0.10),
158 (PaymentMethod::CreditCard, 0.05),
159 ],
160 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
161 payment_behavior: P2PPaymentBehavior {
162 late_payment_rate: payment_behavior.late_payment_rate,
163 late_payment_distribution: LatePaymentDistribution {
164 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
165 late_8_to_14: late_dist.late_8_to_14,
166 very_late_15_to_30: late_dist.very_late_15_to_30,
167 severely_late_31_to_60: late_dist.severely_late_31_to_60,
168 extremely_late_over_60: late_dist.extremely_late_over_60,
169 },
170 partial_payment_rate: payment_behavior.partial_payment_rate,
171 payment_correction_rate: payment_behavior.payment_correction_rate,
172 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
173 },
174 }
175}
176
177fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
179 let payment_behavior = &schema_config.payment_behavior;
180
181 O2CGeneratorConfig {
182 credit_check_failure_rate: schema_config.credit_check_failure_rate,
183 partial_shipment_rate: schema_config.partial_shipment_rate,
184 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
185 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
186 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
187 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
188 bad_debt_rate: schema_config.bad_debt_rate,
189 returns_rate: schema_config.return_rate,
190 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
191 payment_method_distribution: vec![
192 (PaymentMethod::BankTransfer, 0.50),
193 (PaymentMethod::Check, 0.30),
194 (PaymentMethod::Wire, 0.15),
195 (PaymentMethod::CreditCard, 0.05),
196 ],
197 payment_behavior: O2CPaymentBehavior {
198 partial_payment_rate: payment_behavior.partial_payments.rate,
199 short_payment_rate: payment_behavior.short_payments.rate,
200 max_short_percent: payment_behavior.short_payments.max_short_percent,
201 on_account_rate: payment_behavior.on_account_payments.rate,
202 payment_correction_rate: payment_behavior.payment_corrections.rate,
203 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
204 },
205 }
206}
207
208#[derive(Debug, Clone)]
210pub struct PhaseConfig {
211 pub generate_master_data: bool,
213 pub generate_document_flows: bool,
215 pub generate_ocpm_events: bool,
217 pub generate_journal_entries: bool,
219 pub inject_anomalies: bool,
221 pub inject_data_quality: bool,
223 pub validate_balances: bool,
225 pub show_progress: bool,
227 pub vendors_per_company: usize,
229 pub customers_per_company: usize,
231 pub materials_per_company: usize,
233 pub assets_per_company: usize,
235 pub employees_per_company: usize,
237 pub p2p_chains: usize,
239 pub o2c_chains: usize,
241 pub generate_audit: bool,
243 pub audit_engagements: usize,
245 pub workpapers_per_engagement: usize,
247 pub evidence_per_workpaper: usize,
249 pub risks_per_engagement: usize,
251 pub findings_per_engagement: usize,
253 pub judgments_per_engagement: usize,
255 pub generate_banking: bool,
257 pub generate_graph_export: bool,
259 pub generate_sourcing: bool,
261 pub generate_bank_reconciliation: bool,
263 pub generate_financial_statements: bool,
265 pub generate_accounting_standards: bool,
267 pub generate_manufacturing: bool,
269 pub generate_sales_kpi_budgets: bool,
271 pub generate_tax: bool,
273 pub generate_esg: bool,
275 pub generate_intercompany: bool,
277}
278
279impl Default for PhaseConfig {
280 fn default() -> Self {
281 Self {
282 generate_master_data: true,
283 generate_document_flows: true,
284 generate_ocpm_events: false, generate_journal_entries: true,
286 inject_anomalies: false,
287 inject_data_quality: false, validate_balances: true,
289 show_progress: true,
290 vendors_per_company: 50,
291 customers_per_company: 100,
292 materials_per_company: 200,
293 assets_per_company: 50,
294 employees_per_company: 100,
295 p2p_chains: 100,
296 o2c_chains: 100,
297 generate_audit: false, audit_engagements: 5,
299 workpapers_per_engagement: 20,
300 evidence_per_workpaper: 5,
301 risks_per_engagement: 15,
302 findings_per_engagement: 8,
303 judgments_per_engagement: 10,
304 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, }
316 }
317}
318
319#[derive(Debug, Clone, Default)]
321pub struct MasterDataSnapshot {
322 pub vendors: Vec<Vendor>,
324 pub customers: Vec<Customer>,
326 pub materials: Vec<Material>,
328 pub assets: Vec<FixedAsset>,
330 pub employees: Vec<Employee>,
332}
333
334#[derive(Debug, Clone)]
336pub struct HypergraphExportInfo {
337 pub node_count: usize,
339 pub edge_count: usize,
341 pub hyperedge_count: usize,
343 pub output_path: PathBuf,
345}
346
347#[derive(Debug, Clone, Default)]
349pub struct DocumentFlowSnapshot {
350 pub p2p_chains: Vec<P2PDocumentChain>,
352 pub o2c_chains: Vec<O2CDocumentChain>,
354 pub purchase_orders: Vec<documents::PurchaseOrder>,
356 pub goods_receipts: Vec<documents::GoodsReceipt>,
358 pub vendor_invoices: Vec<documents::VendorInvoice>,
360 pub sales_orders: Vec<documents::SalesOrder>,
362 pub deliveries: Vec<documents::Delivery>,
364 pub customer_invoices: Vec<documents::CustomerInvoice>,
366 pub payments: Vec<documents::Payment>,
368}
369
370#[derive(Debug, Clone, Default)]
372pub struct SubledgerSnapshot {
373 pub ap_invoices: Vec<APInvoice>,
375 pub ar_invoices: Vec<ARInvoice>,
377 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
379 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
381 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
383}
384
385#[derive(Debug, Clone, Default)]
387pub struct OcpmSnapshot {
388 pub event_log: Option<OcpmEventLog>,
390 pub event_count: usize,
392 pub object_count: usize,
394 pub case_count: usize,
396}
397
398#[derive(Debug, Clone, Default)]
400pub struct AuditSnapshot {
401 pub engagements: Vec<AuditEngagement>,
403 pub workpapers: Vec<Workpaper>,
405 pub evidence: Vec<AuditEvidence>,
407 pub risk_assessments: Vec<RiskAssessment>,
409 pub findings: Vec<AuditFinding>,
411 pub judgments: Vec<ProfessionalJudgment>,
413}
414
415#[derive(Debug, Clone, Default)]
417pub struct BankingSnapshot {
418 pub customers: Vec<BankingCustomer>,
420 pub accounts: Vec<BankAccount>,
422 pub transactions: Vec<BankTransaction>,
424 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
426 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
428 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
430 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
432 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
434 pub suspicious_count: usize,
436 pub scenario_count: usize,
438}
439
440#[derive(Debug, Clone, Default, Serialize)]
442pub struct GraphExportSnapshot {
443 pub exported: bool,
445 pub graph_count: usize,
447 pub exports: HashMap<String, GraphExportInfo>,
449}
450
451#[derive(Debug, Clone, Serialize)]
453pub struct GraphExportInfo {
454 pub name: String,
456 pub format: String,
458 pub output_path: PathBuf,
460 pub node_count: usize,
462 pub edge_count: usize,
464}
465
466#[derive(Debug, Clone, Default)]
468pub struct SourcingSnapshot {
469 pub spend_analyses: Vec<SpendAnalysis>,
471 pub sourcing_projects: Vec<SourcingProject>,
473 pub qualifications: Vec<SupplierQualification>,
475 pub rfx_events: Vec<RfxEvent>,
477 pub bids: Vec<SupplierBid>,
479 pub bid_evaluations: Vec<BidEvaluation>,
481 pub contracts: Vec<ProcurementContract>,
483 pub catalog_items: Vec<CatalogItem>,
485 pub scorecards: Vec<SupplierScorecard>,
487}
488
489#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct PeriodTrialBalance {
492 pub fiscal_year: u16,
494 pub fiscal_period: u8,
496 pub period_start: NaiveDate,
498 pub period_end: NaiveDate,
500 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
502}
503
504#[derive(Debug, Clone, Default)]
506pub struct FinancialReportingSnapshot {
507 pub financial_statements: Vec<FinancialStatement>,
509 pub bank_reconciliations: Vec<BankReconciliation>,
511 pub trial_balances: Vec<PeriodTrialBalance>,
513}
514
515#[derive(Debug, Clone, Default)]
517pub struct HrSnapshot {
518 pub payroll_runs: Vec<PayrollRun>,
520 pub payroll_line_items: Vec<PayrollLineItem>,
522 pub time_entries: Vec<TimeEntry>,
524 pub expense_reports: Vec<ExpenseReport>,
526 pub payroll_run_count: usize,
528 pub payroll_line_item_count: usize,
530 pub time_entry_count: usize,
532 pub expense_report_count: usize,
534}
535
536#[derive(Debug, Clone, Default)]
538pub struct AccountingStandardsSnapshot {
539 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
541 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
543 pub revenue_contract_count: usize,
545 pub impairment_test_count: usize,
547}
548
549#[derive(Debug, Clone, Default)]
551pub struct ManufacturingSnapshot {
552 pub production_orders: Vec<ProductionOrder>,
554 pub quality_inspections: Vec<QualityInspection>,
556 pub cycle_counts: Vec<CycleCount>,
558 pub production_order_count: usize,
560 pub quality_inspection_count: usize,
562 pub cycle_count_count: usize,
564}
565
566#[derive(Debug, Clone, Default)]
568pub struct SalesKpiBudgetsSnapshot {
569 pub sales_quotes: Vec<SalesQuote>,
571 pub kpis: Vec<ManagementKpi>,
573 pub budgets: Vec<Budget>,
575 pub sales_quote_count: usize,
577 pub kpi_count: usize,
579 pub budget_line_count: usize,
581}
582
583#[derive(Debug, Clone, Default)]
585pub struct AnomalyLabels {
586 pub labels: Vec<LabeledAnomaly>,
588 pub summary: Option<AnomalySummary>,
590 pub by_type: HashMap<String, usize>,
592}
593
594#[derive(Debug, Clone, Default)]
596pub struct BalanceValidationResult {
597 pub validated: bool,
599 pub is_balanced: bool,
601 pub entries_processed: u64,
603 pub total_debits: rust_decimal::Decimal,
605 pub total_credits: rust_decimal::Decimal,
607 pub accounts_tracked: usize,
609 pub companies_tracked: usize,
611 pub validation_errors: Vec<ValidationError>,
613 pub has_unbalanced_entries: bool,
615}
616
617#[derive(Debug, Clone, Default)]
619pub struct TaxSnapshot {
620 pub jurisdictions: Vec<TaxJurisdiction>,
622 pub codes: Vec<TaxCode>,
624 pub tax_lines: Vec<TaxLine>,
626 pub tax_returns: Vec<TaxReturn>,
628 pub tax_provisions: Vec<TaxProvision>,
630 pub withholding_records: Vec<WithholdingTaxRecord>,
632 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
634 pub jurisdiction_count: usize,
636 pub code_count: usize,
638}
639
640#[derive(Debug, Clone, Default, Serialize, Deserialize)]
642pub struct IntercompanySnapshot {
643 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
645 pub seller_journal_entries: Vec<JournalEntry>,
647 pub buyer_journal_entries: Vec<JournalEntry>,
649 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
651 pub matched_pair_count: usize,
653 pub elimination_entry_count: usize,
655 pub match_rate: f64,
657}
658
659#[derive(Debug, Clone, Default)]
661pub struct EsgSnapshot {
662 pub emissions: Vec<EmissionRecord>,
664 pub energy: Vec<EnergyConsumption>,
666 pub water: Vec<WaterUsage>,
668 pub waste: Vec<WasteRecord>,
670 pub diversity: Vec<WorkforceDiversityMetric>,
672 pub pay_equity: Vec<PayEquityMetric>,
674 pub safety_incidents: Vec<SafetyIncident>,
676 pub safety_metrics: Vec<SafetyMetric>,
678 pub governance: Vec<GovernanceMetric>,
680 pub supplier_assessments: Vec<SupplierEsgAssessment>,
682 pub materiality: Vec<MaterialityAssessment>,
684 pub disclosures: Vec<EsgDisclosure>,
686 pub climate_scenarios: Vec<ClimateScenario>,
688 pub anomaly_labels: Vec<EsgAnomalyLabel>,
690 pub emission_count: usize,
692 pub disclosure_count: usize,
694}
695
696#[derive(Debug, Clone, Default)]
698pub struct TreasurySnapshot {
699 pub cash_positions: Vec<CashPosition>,
701 pub cash_forecasts: Vec<CashForecast>,
703 pub cash_pools: Vec<CashPool>,
705 pub cash_pool_sweeps: Vec<CashPoolSweep>,
707 pub hedging_instruments: Vec<HedgingInstrument>,
709 pub hedge_relationships: Vec<HedgeRelationship>,
711 pub debt_instruments: Vec<DebtInstrument>,
713 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
715}
716
717#[derive(Debug, Clone, Default)]
719pub struct ProjectAccountingSnapshot {
720 pub projects: Vec<Project>,
722 pub cost_lines: Vec<ProjectCostLine>,
724 pub revenue_records: Vec<ProjectRevenue>,
726 pub earned_value_metrics: Vec<EarnedValueMetric>,
728 pub change_orders: Vec<ChangeOrder>,
730 pub milestones: Vec<ProjectMilestone>,
732}
733
734#[derive(Debug)]
736pub struct EnhancedGenerationResult {
737 pub chart_of_accounts: ChartOfAccounts,
739 pub master_data: MasterDataSnapshot,
741 pub document_flows: DocumentFlowSnapshot,
743 pub subledger: SubledgerSnapshot,
745 pub ocpm: OcpmSnapshot,
747 pub audit: AuditSnapshot,
749 pub banking: BankingSnapshot,
751 pub graph_export: GraphExportSnapshot,
753 pub sourcing: SourcingSnapshot,
755 pub financial_reporting: FinancialReportingSnapshot,
757 pub hr: HrSnapshot,
759 pub accounting_standards: AccountingStandardsSnapshot,
761 pub manufacturing: ManufacturingSnapshot,
763 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
765 pub tax: TaxSnapshot,
767 pub esg: EsgSnapshot,
769 pub treasury: TreasurySnapshot,
771 pub project_accounting: ProjectAccountingSnapshot,
773 pub intercompany: IntercompanySnapshot,
775 pub journal_entries: Vec<JournalEntry>,
777 pub anomaly_labels: AnomalyLabels,
779 pub balance_validation: BalanceValidationResult,
781 pub data_quality_stats: DataQualityStats,
783 pub statistics: EnhancedGenerationStatistics,
785 pub lineage: Option<super::lineage::LineageGraph>,
787 pub gate_result: Option<datasynth_eval::gates::GateResult>,
789 pub internal_controls: Vec<InternalControl>,
791 pub opening_balances: Vec<GeneratedOpeningBalance>,
793 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
795}
796
797#[derive(Debug, Clone, Default, Serialize, Deserialize)]
799pub struct EnhancedGenerationStatistics {
800 pub total_entries: u64,
802 pub total_line_items: u64,
804 pub accounts_count: usize,
806 pub companies_count: usize,
808 pub period_months: u32,
810 pub vendor_count: usize,
812 pub customer_count: usize,
813 pub material_count: usize,
814 pub asset_count: usize,
815 pub employee_count: usize,
816 pub p2p_chain_count: usize,
818 pub o2c_chain_count: usize,
819 pub ap_invoice_count: usize,
821 pub ar_invoice_count: usize,
822 pub ocpm_event_count: usize,
824 pub ocpm_object_count: usize,
825 pub ocpm_case_count: usize,
826 pub audit_engagement_count: usize,
828 pub audit_workpaper_count: usize,
829 pub audit_evidence_count: usize,
830 pub audit_risk_count: usize,
831 pub audit_finding_count: usize,
832 pub audit_judgment_count: usize,
833 pub anomalies_injected: usize,
835 pub data_quality_issues: usize,
837 pub banking_customer_count: usize,
839 pub banking_account_count: usize,
840 pub banking_transaction_count: usize,
841 pub banking_suspicious_count: usize,
842 pub graph_export_count: usize,
844 pub graph_node_count: usize,
845 pub graph_edge_count: usize,
846 #[serde(default)]
848 pub llm_enrichment_ms: u64,
849 #[serde(default)]
851 pub llm_vendors_enriched: usize,
852 #[serde(default)]
854 pub diffusion_enhancement_ms: u64,
855 #[serde(default)]
857 pub diffusion_samples_generated: usize,
858 #[serde(default)]
860 pub causal_generation_ms: u64,
861 #[serde(default)]
863 pub causal_samples_generated: usize,
864 #[serde(default)]
866 pub causal_validation_passed: Option<bool>,
867 #[serde(default)]
869 pub sourcing_project_count: usize,
870 #[serde(default)]
871 pub rfx_event_count: usize,
872 #[serde(default)]
873 pub bid_count: usize,
874 #[serde(default)]
875 pub contract_count: usize,
876 #[serde(default)]
877 pub catalog_item_count: usize,
878 #[serde(default)]
879 pub scorecard_count: usize,
880 #[serde(default)]
882 pub financial_statement_count: usize,
883 #[serde(default)]
884 pub bank_reconciliation_count: usize,
885 #[serde(default)]
887 pub payroll_run_count: usize,
888 #[serde(default)]
889 pub time_entry_count: usize,
890 #[serde(default)]
891 pub expense_report_count: usize,
892 #[serde(default)]
894 pub revenue_contract_count: usize,
895 #[serde(default)]
896 pub impairment_test_count: usize,
897 #[serde(default)]
899 pub production_order_count: usize,
900 #[serde(default)]
901 pub quality_inspection_count: usize,
902 #[serde(default)]
903 pub cycle_count_count: usize,
904 #[serde(default)]
906 pub sales_quote_count: usize,
907 #[serde(default)]
908 pub kpi_count: usize,
909 #[serde(default)]
910 pub budget_line_count: usize,
911 #[serde(default)]
913 pub tax_jurisdiction_count: usize,
914 #[serde(default)]
915 pub tax_code_count: usize,
916 #[serde(default)]
918 pub esg_emission_count: usize,
919 #[serde(default)]
920 pub esg_disclosure_count: usize,
921 #[serde(default)]
923 pub ic_matched_pair_count: usize,
924 #[serde(default)]
925 pub ic_elimination_count: usize,
926 #[serde(default)]
928 pub ic_transaction_count: usize,
929 #[serde(default)]
931 pub fa_subledger_count: usize,
932 #[serde(default)]
934 pub inventory_subledger_count: usize,
935 #[serde(default)]
937 pub treasury_debt_instrument_count: usize,
938 #[serde(default)]
940 pub treasury_hedging_instrument_count: usize,
941 #[serde(default)]
943 pub project_count: usize,
944 #[serde(default)]
946 pub project_change_order_count: usize,
947 #[serde(default)]
949 pub tax_provision_count: usize,
950 #[serde(default)]
952 pub opening_balance_count: usize,
953 #[serde(default)]
955 pub subledger_reconciliation_count: usize,
956 #[serde(default)]
958 pub tax_line_count: usize,
959 #[serde(default)]
961 pub project_cost_line_count: usize,
962 #[serde(default)]
964 pub cash_position_count: usize,
965}
966
967pub struct EnhancedOrchestrator {
969 config: GeneratorConfig,
970 phase_config: PhaseConfig,
971 coa: Option<Arc<ChartOfAccounts>>,
972 master_data: MasterDataSnapshot,
973 seed: u64,
974 multi_progress: Option<MultiProgress>,
975 resource_guard: ResourceGuard,
977 output_path: Option<PathBuf>,
979 copula_generators: Vec<CopulaGeneratorSpec>,
981 country_pack_registry: datasynth_core::CountryPackRegistry,
983}
984
985impl EnhancedOrchestrator {
986 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
988 datasynth_config::validate_config(&config)?;
989
990 let seed = config.global.seed.unwrap_or_else(rand::random);
991
992 let resource_guard = Self::build_resource_guard(&config, None);
994
995 let country_pack_registry = match &config.country_packs {
997 Some(cp) => {
998 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
999 .map_err(|e| SynthError::config(e.to_string()))?
1000 }
1001 None => datasynth_core::CountryPackRegistry::builtin_only()
1002 .map_err(|e| SynthError::config(e.to_string()))?,
1003 };
1004
1005 Ok(Self {
1006 config,
1007 phase_config,
1008 coa: None,
1009 master_data: MasterDataSnapshot::default(),
1010 seed,
1011 multi_progress: None,
1012 resource_guard,
1013 output_path: None,
1014 copula_generators: Vec::new(),
1015 country_pack_registry,
1016 })
1017 }
1018
1019 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1021 Self::new(config, PhaseConfig::default())
1022 }
1023
1024 pub fn with_progress(mut self, show: bool) -> Self {
1026 self.phase_config.show_progress = show;
1027 if show {
1028 self.multi_progress = Some(MultiProgress::new());
1029 }
1030 self
1031 }
1032
1033 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1035 let path = path.into();
1036 self.output_path = Some(path.clone());
1037 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1039 self
1040 }
1041
1042 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1044 &self.country_pack_registry
1045 }
1046
1047 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1049 self.country_pack_registry.get_by_str(country)
1050 }
1051
1052 fn primary_country_code(&self) -> &str {
1055 self.config
1056 .companies
1057 .first()
1058 .map(|c| c.country.as_str())
1059 .unwrap_or("US")
1060 }
1061
1062 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1064 self.country_pack_for(self.primary_country_code())
1065 }
1066
1067 fn resolve_coa_framework(&self) -> CoAFramework {
1069 if self.config.accounting_standards.enabled {
1070 match self.config.accounting_standards.framework {
1071 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1072 return CoAFramework::FrenchPcg;
1073 }
1074 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1075 return CoAFramework::GermanSkr04;
1076 }
1077 _ => {}
1078 }
1079 }
1080 let pack = self.primary_pack();
1082 match pack.accounting.framework.as_str() {
1083 "french_gaap" => CoAFramework::FrenchPcg,
1084 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1085 _ => CoAFramework::UsGaap,
1086 }
1087 }
1088
1089 pub fn has_copulas(&self) -> bool {
1094 !self.copula_generators.is_empty()
1095 }
1096
1097 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1103 &self.copula_generators
1104 }
1105
1106 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1110 &mut self.copula_generators
1111 }
1112
1113 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1117 self.copula_generators
1118 .iter_mut()
1119 .find(|c| c.name == copula_name)
1120 .map(|c| c.generator.sample())
1121 }
1122
1123 pub fn from_fingerprint(
1146 fingerprint_path: &std::path::Path,
1147 phase_config: PhaseConfig,
1148 scale: f64,
1149 ) -> SynthResult<Self> {
1150 info!("Loading fingerprint from: {}", fingerprint_path.display());
1151
1152 let reader = FingerprintReader::new();
1154 let fingerprint = reader
1155 .read_from_file(fingerprint_path)
1156 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {}", e)))?;
1157
1158 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1159 }
1160
1161 pub fn from_fingerprint_data(
1168 fingerprint: Fingerprint,
1169 phase_config: PhaseConfig,
1170 scale: f64,
1171 ) -> SynthResult<Self> {
1172 info!(
1173 "Synthesizing config from fingerprint (version: {}, tables: {})",
1174 fingerprint.manifest.version,
1175 fingerprint.schema.tables.len()
1176 );
1177
1178 let seed: u64 = rand::random();
1180
1181 let options = SynthesisOptions {
1183 scale,
1184 seed: Some(seed),
1185 preserve_correlations: true,
1186 inject_anomalies: true,
1187 };
1188 let synthesizer = ConfigSynthesizer::with_options(options);
1189
1190 let synthesis_result = synthesizer
1192 .synthesize_full(&fingerprint, seed)
1193 .map_err(|e| {
1194 SynthError::config(format!(
1195 "Failed to synthesize config from fingerprint: {}",
1196 e
1197 ))
1198 })?;
1199
1200 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1202 Self::base_config_for_industry(industry)
1203 } else {
1204 Self::base_config_for_industry("manufacturing")
1205 };
1206
1207 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1209
1210 info!(
1212 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1213 fingerprint.schema.tables.len(),
1214 scale,
1215 synthesis_result.copula_generators.len()
1216 );
1217
1218 if !synthesis_result.copula_generators.is_empty() {
1219 for spec in &synthesis_result.copula_generators {
1220 info!(
1221 " Copula '{}' for table '{}': {} columns",
1222 spec.name,
1223 spec.table,
1224 spec.columns.len()
1225 );
1226 }
1227 }
1228
1229 let mut orchestrator = Self::new(config, phase_config)?;
1231
1232 orchestrator.copula_generators = synthesis_result.copula_generators;
1234
1235 Ok(orchestrator)
1236 }
1237
1238 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1240 use datasynth_config::presets::create_preset;
1241 use datasynth_config::TransactionVolume;
1242 use datasynth_core::models::{CoAComplexity, IndustrySector};
1243
1244 let sector = match industry.to_lowercase().as_str() {
1245 "manufacturing" => IndustrySector::Manufacturing,
1246 "retail" => IndustrySector::Retail,
1247 "financial" | "financial_services" => IndustrySector::FinancialServices,
1248 "healthcare" => IndustrySector::Healthcare,
1249 "technology" | "tech" => IndustrySector::Technology,
1250 _ => IndustrySector::Manufacturing,
1251 };
1252
1253 create_preset(
1255 sector,
1256 1, 12, CoAComplexity::Medium,
1259 TransactionVolume::TenK,
1260 )
1261 }
1262
1263 fn apply_config_patch(
1265 mut config: GeneratorConfig,
1266 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1267 ) -> GeneratorConfig {
1268 use datasynth_fingerprint::synthesis::ConfigValue;
1269
1270 for (key, value) in patch.values() {
1271 match (key.as_str(), value) {
1272 ("transactions.count", ConfigValue::Integer(n)) => {
1275 info!(
1276 "Fingerprint suggests {} transactions (apply via company volumes)",
1277 n
1278 );
1279 }
1280 ("global.period_months", ConfigValue::Integer(n)) => {
1281 config.global.period_months = *n as u32;
1282 }
1283 ("global.start_date", ConfigValue::String(s)) => {
1284 config.global.start_date = s.clone();
1285 }
1286 ("global.seed", ConfigValue::Integer(n)) => {
1287 config.global.seed = Some(*n as u64);
1288 }
1289 ("fraud.enabled", ConfigValue::Bool(b)) => {
1290 config.fraud.enabled = *b;
1291 }
1292 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1293 config.fraud.fraud_rate = *f;
1294 }
1295 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1296 config.data_quality.enabled = *b;
1297 }
1298 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1300 config.fraud.enabled = *b;
1301 }
1302 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1303 config.fraud.fraud_rate = *f;
1304 }
1305 _ => {
1306 debug!("Ignoring unknown config patch key: {}", key);
1307 }
1308 }
1309 }
1310
1311 config
1312 }
1313
1314 fn build_resource_guard(
1316 config: &GeneratorConfig,
1317 output_path: Option<PathBuf>,
1318 ) -> ResourceGuard {
1319 let mut builder = ResourceGuardBuilder::new();
1320
1321 if config.global.memory_limit_mb > 0 {
1323 builder = builder.memory_limit(config.global.memory_limit_mb);
1324 }
1325
1326 if let Some(path) = output_path {
1328 builder = builder.output_path(path).min_free_disk(100); }
1330
1331 builder = builder.conservative();
1333
1334 builder.build()
1335 }
1336
1337 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1342 self.resource_guard.check()
1343 }
1344
1345 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1347 let level = self.resource_guard.check()?;
1348
1349 if level != DegradationLevel::Normal {
1350 warn!(
1351 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1352 phase,
1353 level,
1354 self.resource_guard.current_memory_mb(),
1355 self.resource_guard.available_disk_mb()
1356 );
1357 }
1358
1359 Ok(level)
1360 }
1361
1362 fn get_degradation_actions(&self) -> DegradationActions {
1364 self.resource_guard.get_actions()
1365 }
1366
1367 fn check_memory_limit(&self) -> SynthResult<()> {
1369 self.check_resources()?;
1370 Ok(())
1371 }
1372
1373 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1375 info!("Starting enhanced generation workflow");
1376 info!(
1377 "Config: industry={:?}, period_months={}, companies={}",
1378 self.config.global.industry,
1379 self.config.global.period_months,
1380 self.config.companies.len()
1381 );
1382
1383 let initial_level = self.check_resources_with_log("initial")?;
1385 if initial_level == DegradationLevel::Emergency {
1386 return Err(SynthError::resource(
1387 "Insufficient resources to start generation",
1388 ));
1389 }
1390
1391 let mut stats = EnhancedGenerationStatistics {
1392 companies_count: self.config.companies.len(),
1393 period_months: self.config.global.period_months,
1394 ..Default::default()
1395 };
1396
1397 let coa = self.phase_chart_of_accounts(&mut stats)?;
1399
1400 self.phase_master_data(&mut stats)?;
1402
1403 let (mut document_flows, subledger, fa_journal_entries) =
1405 self.phase_document_flows(&mut stats)?;
1406
1407 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1409
1410 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1418
1419 if !fa_journal_entries.is_empty() {
1421 debug!(
1422 "Appending {} FA acquisition JEs to main entries",
1423 fa_journal_entries.len()
1424 );
1425 entries.extend(fa_journal_entries);
1426 }
1427
1428 let actions = self.get_degradation_actions();
1430
1431 let sourcing = self.phase_sourcing_data(&mut stats)?;
1433
1434 if !sourcing.contracts.is_empty() {
1436 let mut linked_count = 0usize;
1437 for chain in &mut document_flows.p2p_chains {
1438 if chain.purchase_order.contract_id.is_none() {
1439 if let Some(contract) = sourcing
1440 .contracts
1441 .iter()
1442 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1443 {
1444 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1445 linked_count += 1;
1446 }
1447 }
1448 }
1449 if linked_count > 0 {
1450 debug!(
1451 "Linked {} purchase orders to S2C contracts by vendor match",
1452 linked_count
1453 );
1454 }
1455 }
1456
1457 let intercompany = self.phase_intercompany(&mut stats)?;
1459
1460 if !intercompany.seller_journal_entries.is_empty()
1462 || !intercompany.buyer_journal_entries.is_empty()
1463 {
1464 let ic_je_count = intercompany.seller_journal_entries.len()
1465 + intercompany.buyer_journal_entries.len();
1466 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1467 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1468 debug!(
1469 "Appended {} IC journal entries to main entries",
1470 ic_je_count
1471 );
1472 }
1473
1474 let hr = self.phase_hr_data(&mut stats)?;
1476
1477 if !hr.payroll_runs.is_empty() {
1479 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1480 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1481 entries.extend(payroll_jes);
1482 }
1483
1484 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1486
1487 if !manufacturing_snap.production_orders.is_empty() {
1489 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1490 debug!("Generated {} JEs from production orders", mfg_jes.len());
1491 entries.extend(mfg_jes);
1492 }
1493
1494 if !entries.is_empty() {
1497 stats.total_entries = entries.len() as u64;
1498 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1499 debug!(
1500 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1501 stats.total_entries, stats.total_line_items
1502 );
1503 }
1504
1505 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1507
1508 let balance_validation = self.phase_balance_validation(&entries)?;
1510
1511 let subledger_reconciliation =
1513 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1514
1515 let data_quality_stats =
1517 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1518
1519 let audit = self.phase_audit_data(&entries, &mut stats)?;
1521
1522 let banking = self.phase_banking_data(&mut stats)?;
1524
1525 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1527
1528 self.phase_llm_enrichment(&mut stats);
1530
1531 self.phase_diffusion_enhancement(&mut stats);
1533
1534 self.phase_causal_overlay(&mut stats);
1536
1537 let financial_reporting =
1539 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1540
1541 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1543
1544 let ocpm = self.phase_ocpm_events(
1546 &document_flows,
1547 &sourcing,
1548 &hr,
1549 &manufacturing_snap,
1550 &banking,
1551 &audit,
1552 &financial_reporting,
1553 &mut stats,
1554 )?;
1555
1556 let sales_kpi_budgets =
1558 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1559
1560 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1562
1563 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1565
1566 let treasury = self.phase_treasury_data(&document_flows, &mut stats)?;
1568
1569 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1571
1572 self.phase_hypergraph_export(
1574 &coa,
1575 &entries,
1576 &document_flows,
1577 &sourcing,
1578 &hr,
1579 &manufacturing_snap,
1580 &banking,
1581 &audit,
1582 &financial_reporting,
1583 &ocpm,
1584 &mut stats,
1585 )?;
1586
1587 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1590 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1591 }
1592
1593 if self.config.streaming.enabled {
1595 info!("Note: streaming config is enabled but batch mode does not use it");
1596 }
1597 if self.config.vendor_network.enabled {
1598 debug!("Vendor network config available; relationship graph generation is partial");
1599 }
1600 if self.config.customer_segmentation.enabled {
1601 debug!("Customer segmentation config available; segment-aware generation is partial");
1602 }
1603
1604 let resource_stats = self.resource_guard.stats();
1606 info!(
1607 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1608 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1609 resource_stats.disk.estimated_bytes_written,
1610 resource_stats.degradation_level
1611 );
1612
1613 let lineage = self.build_lineage_graph();
1615
1616 let gate_result = if self.config.quality_gates.enabled {
1618 let profile_name = &self.config.quality_gates.profile;
1619 match datasynth_eval::gates::get_profile(profile_name) {
1620 Some(profile) => {
1621 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1623
1624 if balance_validation.validated {
1626 eval.coherence.balance =
1627 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1628 equation_balanced: balance_validation.is_balanced,
1629 max_imbalance: (balance_validation.total_debits
1630 - balance_validation.total_credits)
1631 .abs(),
1632 periods_evaluated: 1,
1633 periods_imbalanced: if balance_validation.is_balanced {
1634 0
1635 } else {
1636 1
1637 },
1638 period_results: Vec::new(),
1639 companies_evaluated: self.config.companies.len(),
1640 });
1641 }
1642
1643 eval.coherence.passes = balance_validation.is_balanced;
1645 if !balance_validation.is_balanced {
1646 eval.coherence
1647 .failures
1648 .push("Balance sheet equation not satisfied".to_string());
1649 }
1650
1651 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1653 eval.statistical.passes = !entries.is_empty();
1654
1655 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1658
1659 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1660 info!(
1661 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1662 profile_name, result.gates_passed, result.gates_total, result.summary
1663 );
1664 Some(result)
1665 }
1666 None => {
1667 warn!(
1668 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1669 profile_name
1670 );
1671 None
1672 }
1673 }
1674 } else {
1675 None
1676 };
1677
1678 let internal_controls = if self.config.internal_controls.enabled {
1680 InternalControl::standard_controls()
1681 } else {
1682 Vec::new()
1683 };
1684
1685 Ok(EnhancedGenerationResult {
1686 chart_of_accounts: (*coa).clone(),
1687 master_data: self.master_data.clone(),
1688 document_flows,
1689 subledger,
1690 ocpm,
1691 audit,
1692 banking,
1693 graph_export,
1694 sourcing,
1695 financial_reporting,
1696 hr,
1697 accounting_standards,
1698 manufacturing: manufacturing_snap,
1699 sales_kpi_budgets,
1700 tax,
1701 esg: esg_snap,
1702 treasury,
1703 project_accounting,
1704 intercompany,
1705 journal_entries: entries,
1706 anomaly_labels,
1707 balance_validation,
1708 data_quality_stats,
1709 statistics: stats,
1710 lineage: Some(lineage),
1711 gate_result,
1712 internal_controls,
1713 opening_balances,
1714 subledger_reconciliation,
1715 })
1716 }
1717
1718 fn phase_chart_of_accounts(
1724 &mut self,
1725 stats: &mut EnhancedGenerationStatistics,
1726 ) -> SynthResult<Arc<ChartOfAccounts>> {
1727 info!("Phase 1: Generating Chart of Accounts");
1728 let coa = self.generate_coa()?;
1729 stats.accounts_count = coa.account_count();
1730 info!(
1731 "Chart of Accounts generated: {} accounts",
1732 stats.accounts_count
1733 );
1734 self.check_resources_with_log("post-coa")?;
1735 Ok(coa)
1736 }
1737
1738 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1740 if self.phase_config.generate_master_data {
1741 info!("Phase 2: Generating Master Data");
1742 self.generate_master_data()?;
1743 stats.vendor_count = self.master_data.vendors.len();
1744 stats.customer_count = self.master_data.customers.len();
1745 stats.material_count = self.master_data.materials.len();
1746 stats.asset_count = self.master_data.assets.len();
1747 stats.employee_count = self.master_data.employees.len();
1748 info!(
1749 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1750 stats.vendor_count, stats.customer_count, stats.material_count,
1751 stats.asset_count, stats.employee_count
1752 );
1753 self.check_resources_with_log("post-master-data")?;
1754 } else {
1755 debug!("Phase 2: Skipped (master data generation disabled)");
1756 }
1757 Ok(())
1758 }
1759
1760 fn phase_document_flows(
1762 &mut self,
1763 stats: &mut EnhancedGenerationStatistics,
1764 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1765 let mut document_flows = DocumentFlowSnapshot::default();
1766 let mut subledger = SubledgerSnapshot::default();
1767
1768 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
1769 info!("Phase 3: Generating Document Flows");
1770 self.generate_document_flows(&mut document_flows)?;
1771 stats.p2p_chain_count = document_flows.p2p_chains.len();
1772 stats.o2c_chain_count = document_flows.o2c_chains.len();
1773 info!(
1774 "Document flows generated: {} P2P chains, {} O2C chains",
1775 stats.p2p_chain_count, stats.o2c_chain_count
1776 );
1777
1778 debug!("Phase 3b: Linking document flows to subledgers");
1780 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
1781 stats.ap_invoice_count = subledger.ap_invoices.len();
1782 stats.ar_invoice_count = subledger.ar_invoices.len();
1783 debug!(
1784 "Subledgers linked: {} AP invoices, {} AR invoices",
1785 stats.ap_invoice_count, stats.ar_invoice_count
1786 );
1787
1788 self.check_resources_with_log("post-document-flows")?;
1789 } else {
1790 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
1791 }
1792
1793 let mut fa_journal_entries = Vec::new();
1795 if !self.master_data.assets.is_empty() {
1796 debug!("Generating FA subledger records");
1797 let company_code = self
1798 .config
1799 .companies
1800 .first()
1801 .map(|c| c.code.as_str())
1802 .unwrap_or("1000");
1803 let currency = self
1804 .config
1805 .companies
1806 .first()
1807 .map(|c| c.currency.as_str())
1808 .unwrap_or("USD");
1809
1810 let mut fa_gen = datasynth_generators::FAGenerator::new(
1811 datasynth_generators::FAGeneratorConfig::default(),
1812 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
1813 );
1814
1815 for asset in &self.master_data.assets {
1816 let (record, je) = fa_gen.generate_asset_acquisition(
1817 company_code,
1818 &format!("{:?}", asset.asset_class),
1819 &asset.description,
1820 asset.acquisition_date,
1821 currency,
1822 asset.cost_center.as_deref(),
1823 );
1824 subledger.fa_records.push(record);
1825 fa_journal_entries.push(je);
1826 }
1827
1828 stats.fa_subledger_count = subledger.fa_records.len();
1829 debug!(
1830 "FA subledger records generated: {} (with {} acquisition JEs)",
1831 stats.fa_subledger_count,
1832 fa_journal_entries.len()
1833 );
1834 }
1835
1836 if !self.master_data.materials.is_empty() {
1838 debug!("Generating Inventory subledger records");
1839 let first_company = self.config.companies.first();
1840 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
1841 let inv_currency = first_company
1842 .map(|c| c.currency.clone())
1843 .unwrap_or_else(|| "USD".to_string());
1844
1845 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
1846 datasynth_generators::InventoryGeneratorConfig::default(),
1847 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
1848 inv_currency.clone(),
1849 );
1850
1851 for (i, material) in self.master_data.materials.iter().enumerate() {
1852 let plant = format!("PLANT{:02}", (i % 3) + 1);
1853 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
1854 let initial_qty = rust_decimal::Decimal::from(
1855 material
1856 .safety_stock
1857 .to_string()
1858 .parse::<i64>()
1859 .unwrap_or(100),
1860 );
1861
1862 let position = inv_gen.generate_position(
1863 company_code,
1864 &plant,
1865 &storage_loc,
1866 &material.material_id,
1867 &material.description,
1868 initial_qty,
1869 Some(material.standard_cost),
1870 &inv_currency,
1871 );
1872 subledger.inventory_positions.push(position);
1873 }
1874
1875 stats.inventory_subledger_count = subledger.inventory_positions.len();
1876 debug!(
1877 "Inventory subledger records generated: {}",
1878 stats.inventory_subledger_count
1879 );
1880 }
1881
1882 Ok((document_flows, subledger, fa_journal_entries))
1883 }
1884
1885 #[allow(clippy::too_many_arguments)]
1887 fn phase_ocpm_events(
1888 &mut self,
1889 document_flows: &DocumentFlowSnapshot,
1890 sourcing: &SourcingSnapshot,
1891 hr: &HrSnapshot,
1892 manufacturing: &ManufacturingSnapshot,
1893 banking: &BankingSnapshot,
1894 audit: &AuditSnapshot,
1895 financial_reporting: &FinancialReportingSnapshot,
1896 stats: &mut EnhancedGenerationStatistics,
1897 ) -> SynthResult<OcpmSnapshot> {
1898 if self.phase_config.generate_ocpm_events {
1899 info!("Phase 3c: Generating OCPM Events");
1900 let ocpm_snapshot = self.generate_ocpm_events(
1901 document_flows,
1902 sourcing,
1903 hr,
1904 manufacturing,
1905 banking,
1906 audit,
1907 financial_reporting,
1908 )?;
1909 stats.ocpm_event_count = ocpm_snapshot.event_count;
1910 stats.ocpm_object_count = ocpm_snapshot.object_count;
1911 stats.ocpm_case_count = ocpm_snapshot.case_count;
1912 info!(
1913 "OCPM events generated: {} events, {} objects, {} cases",
1914 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
1915 );
1916 self.check_resources_with_log("post-ocpm")?;
1917 Ok(ocpm_snapshot)
1918 } else {
1919 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
1920 Ok(OcpmSnapshot::default())
1921 }
1922 }
1923
1924 fn phase_journal_entries(
1926 &mut self,
1927 coa: &Arc<ChartOfAccounts>,
1928 document_flows: &DocumentFlowSnapshot,
1929 _stats: &mut EnhancedGenerationStatistics,
1930 ) -> SynthResult<Vec<JournalEntry>> {
1931 let mut entries = Vec::new();
1932
1933 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
1935 debug!("Phase 4a: Generating JEs from document flows");
1936 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
1937 debug!("Generated {} JEs from document flows", flow_entries.len());
1938 entries.extend(flow_entries);
1939 }
1940
1941 if self.phase_config.generate_journal_entries {
1943 info!("Phase 4: Generating Journal Entries");
1944 let je_entries = self.generate_journal_entries(coa)?;
1945 info!("Generated {} standalone journal entries", je_entries.len());
1946 entries.extend(je_entries);
1947 } else {
1948 debug!("Phase 4: Skipped (journal entry generation disabled)");
1949 }
1950
1951 if !entries.is_empty() {
1952 self.check_resources_with_log("post-journal-entries")?;
1955 }
1956
1957 Ok(entries)
1958 }
1959
1960 fn phase_anomaly_injection(
1962 &mut self,
1963 entries: &mut [JournalEntry],
1964 actions: &DegradationActions,
1965 stats: &mut EnhancedGenerationStatistics,
1966 ) -> SynthResult<AnomalyLabels> {
1967 if self.phase_config.inject_anomalies
1968 && !entries.is_empty()
1969 && !actions.skip_anomaly_injection
1970 {
1971 info!("Phase 5: Injecting Anomalies");
1972 let result = self.inject_anomalies(entries)?;
1973 stats.anomalies_injected = result.labels.len();
1974 info!("Injected {} anomalies", stats.anomalies_injected);
1975 self.check_resources_with_log("post-anomaly-injection")?;
1976 Ok(result)
1977 } else if actions.skip_anomaly_injection {
1978 warn!("Phase 5: Skipped due to resource degradation");
1979 Ok(AnomalyLabels::default())
1980 } else {
1981 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
1982 Ok(AnomalyLabels::default())
1983 }
1984 }
1985
1986 fn phase_balance_validation(
1988 &mut self,
1989 entries: &[JournalEntry],
1990 ) -> SynthResult<BalanceValidationResult> {
1991 if self.phase_config.validate_balances && !entries.is_empty() {
1992 debug!("Phase 6: Validating Balances");
1993 let balance_validation = self.validate_journal_entries(entries)?;
1994 if balance_validation.is_balanced {
1995 debug!("Balance validation passed");
1996 } else {
1997 warn!(
1998 "Balance validation found {} errors",
1999 balance_validation.validation_errors.len()
2000 );
2001 }
2002 Ok(balance_validation)
2003 } else {
2004 Ok(BalanceValidationResult::default())
2005 }
2006 }
2007
2008 fn phase_data_quality_injection(
2010 &mut self,
2011 entries: &mut [JournalEntry],
2012 actions: &DegradationActions,
2013 stats: &mut EnhancedGenerationStatistics,
2014 ) -> SynthResult<DataQualityStats> {
2015 if self.phase_config.inject_data_quality
2016 && !entries.is_empty()
2017 && !actions.skip_data_quality
2018 {
2019 info!("Phase 7: Injecting Data Quality Variations");
2020 let dq_stats = self.inject_data_quality(entries)?;
2021 stats.data_quality_issues = dq_stats.records_with_issues;
2022 info!("Injected {} data quality issues", stats.data_quality_issues);
2023 self.check_resources_with_log("post-data-quality")?;
2024 Ok(dq_stats)
2025 } else if actions.skip_data_quality {
2026 warn!("Phase 7: Skipped due to resource degradation");
2027 Ok(DataQualityStats::default())
2028 } else {
2029 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2030 Ok(DataQualityStats::default())
2031 }
2032 }
2033
2034 fn phase_audit_data(
2036 &mut self,
2037 entries: &[JournalEntry],
2038 stats: &mut EnhancedGenerationStatistics,
2039 ) -> SynthResult<AuditSnapshot> {
2040 if self.phase_config.generate_audit {
2041 info!("Phase 8: Generating Audit Data");
2042 let audit_snapshot = self.generate_audit_data(entries)?;
2043 stats.audit_engagement_count = audit_snapshot.engagements.len();
2044 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2045 stats.audit_evidence_count = audit_snapshot.evidence.len();
2046 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2047 stats.audit_finding_count = audit_snapshot.findings.len();
2048 stats.audit_judgment_count = audit_snapshot.judgments.len();
2049 info!(
2050 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2051 stats.audit_engagement_count, stats.audit_workpaper_count,
2052 stats.audit_evidence_count, stats.audit_risk_count,
2053 stats.audit_finding_count, stats.audit_judgment_count
2054 );
2055 self.check_resources_with_log("post-audit")?;
2056 Ok(audit_snapshot)
2057 } else {
2058 debug!("Phase 8: Skipped (audit generation disabled)");
2059 Ok(AuditSnapshot::default())
2060 }
2061 }
2062
2063 fn phase_banking_data(
2065 &mut self,
2066 stats: &mut EnhancedGenerationStatistics,
2067 ) -> SynthResult<BankingSnapshot> {
2068 if self.phase_config.generate_banking && self.config.banking.enabled {
2069 info!("Phase 9: Generating Banking KYC/AML Data");
2070 let banking_snapshot = self.generate_banking_data()?;
2071 stats.banking_customer_count = banking_snapshot.customers.len();
2072 stats.banking_account_count = banking_snapshot.accounts.len();
2073 stats.banking_transaction_count = banking_snapshot.transactions.len();
2074 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2075 info!(
2076 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2077 stats.banking_customer_count, stats.banking_account_count,
2078 stats.banking_transaction_count, stats.banking_suspicious_count
2079 );
2080 self.check_resources_with_log("post-banking")?;
2081 Ok(banking_snapshot)
2082 } else {
2083 debug!("Phase 9: Skipped (banking generation disabled)");
2084 Ok(BankingSnapshot::default())
2085 }
2086 }
2087
2088 fn phase_graph_export(
2090 &mut self,
2091 entries: &[JournalEntry],
2092 coa: &Arc<ChartOfAccounts>,
2093 stats: &mut EnhancedGenerationStatistics,
2094 ) -> SynthResult<GraphExportSnapshot> {
2095 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2096 && !entries.is_empty()
2097 {
2098 info!("Phase 10: Exporting Accounting Network Graphs");
2099 match self.export_graphs(entries, coa, stats) {
2100 Ok(snapshot) => {
2101 info!(
2102 "Graph export complete: {} graphs ({} nodes, {} edges)",
2103 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2104 );
2105 Ok(snapshot)
2106 }
2107 Err(e) => {
2108 warn!("Phase 10: Graph export failed: {}", e);
2109 Ok(GraphExportSnapshot::default())
2110 }
2111 }
2112 } else {
2113 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2114 Ok(GraphExportSnapshot::default())
2115 }
2116 }
2117
2118 #[allow(clippy::too_many_arguments)]
2120 fn phase_hypergraph_export(
2121 &self,
2122 coa: &Arc<ChartOfAccounts>,
2123 entries: &[JournalEntry],
2124 document_flows: &DocumentFlowSnapshot,
2125 sourcing: &SourcingSnapshot,
2126 hr: &HrSnapshot,
2127 manufacturing: &ManufacturingSnapshot,
2128 banking: &BankingSnapshot,
2129 audit: &AuditSnapshot,
2130 financial_reporting: &FinancialReportingSnapshot,
2131 ocpm: &OcpmSnapshot,
2132 stats: &mut EnhancedGenerationStatistics,
2133 ) -> SynthResult<()> {
2134 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2135 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2136 match self.export_hypergraph(
2137 coa,
2138 entries,
2139 document_flows,
2140 sourcing,
2141 hr,
2142 manufacturing,
2143 banking,
2144 audit,
2145 financial_reporting,
2146 ocpm,
2147 stats,
2148 ) {
2149 Ok(info) => {
2150 info!(
2151 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2152 info.node_count, info.edge_count, info.hyperedge_count
2153 );
2154 }
2155 Err(e) => {
2156 warn!("Phase 10b: Hypergraph export failed: {}", e);
2157 }
2158 }
2159 } else {
2160 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2161 }
2162 Ok(())
2163 }
2164
2165 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2171 if !self.config.llm.enabled {
2172 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2173 return;
2174 }
2175
2176 info!("Phase 11: Starting LLM Enrichment");
2177 let start = std::time::Instant::now();
2178
2179 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2180 let provider = Arc::new(MockLlmProvider::new(self.seed));
2181 let enricher = VendorLlmEnricher::new(provider);
2182
2183 let industry = format!("{:?}", self.config.global.industry);
2184 let max_enrichments = self
2185 .config
2186 .llm
2187 .max_vendor_enrichments
2188 .min(self.master_data.vendors.len());
2189
2190 let mut enriched_count = 0usize;
2191 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2192 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2193 Ok(name) => {
2194 vendor.name = name;
2195 enriched_count += 1;
2196 }
2197 Err(e) => {
2198 warn!(
2199 "LLM vendor enrichment failed for {}: {}",
2200 vendor.vendor_id, e
2201 );
2202 }
2203 }
2204 }
2205
2206 enriched_count
2207 }));
2208
2209 match result {
2210 Ok(enriched_count) => {
2211 stats.llm_vendors_enriched = enriched_count;
2212 let elapsed = start.elapsed();
2213 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2214 info!(
2215 "Phase 11 complete: {} vendors enriched in {}ms",
2216 enriched_count, stats.llm_enrichment_ms
2217 );
2218 }
2219 Err(_) => {
2220 let elapsed = start.elapsed();
2221 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2222 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2223 }
2224 }
2225 }
2226
2227 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2233 if !self.config.diffusion.enabled {
2234 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2235 return;
2236 }
2237
2238 info!("Phase 12: Starting Diffusion Enhancement");
2239 let start = std::time::Instant::now();
2240
2241 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2242 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2245
2246 let diffusion_config = DiffusionConfig {
2247 n_steps: self.config.diffusion.n_steps,
2248 seed: self.seed,
2249 ..Default::default()
2250 };
2251
2252 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2253
2254 let n_samples = self.config.diffusion.sample_size;
2255 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2257
2258 samples.len()
2259 }));
2260
2261 match result {
2262 Ok(sample_count) => {
2263 stats.diffusion_samples_generated = sample_count;
2264 let elapsed = start.elapsed();
2265 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2266 info!(
2267 "Phase 12 complete: {} diffusion samples generated in {}ms",
2268 sample_count, stats.diffusion_enhancement_ms
2269 );
2270 }
2271 Err(_) => {
2272 let elapsed = start.elapsed();
2273 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2274 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2275 }
2276 }
2277 }
2278
2279 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2286 if !self.config.causal.enabled {
2287 debug!("Phase 13: Skipped (causal generation disabled)");
2288 return;
2289 }
2290
2291 info!("Phase 13: Starting Causal Overlay");
2292 let start = std::time::Instant::now();
2293
2294 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2295 let graph = match self.config.causal.template.as_str() {
2297 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2298 _ => CausalGraph::fraud_detection_template(),
2299 };
2300
2301 let scm = StructuralCausalModel::new(graph.clone())
2302 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {}", e)))?;
2303
2304 let n_samples = self.config.causal.sample_size;
2305 let samples = scm
2306 .generate(n_samples, self.seed)
2307 .map_err(|e| SynthError::generation(format!("SCM generation failed: {}", e)))?;
2308
2309 let validation_passed = if self.config.causal.validate {
2311 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2312 if report.valid {
2313 info!(
2314 "Causal validation passed: all {} checks OK",
2315 report.checks.len()
2316 );
2317 } else {
2318 warn!(
2319 "Causal validation: {} violations detected: {:?}",
2320 report.violations.len(),
2321 report.violations
2322 );
2323 }
2324 Some(report.valid)
2325 } else {
2326 None
2327 };
2328
2329 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2330 }));
2331
2332 match result {
2333 Ok(Ok((sample_count, validation_passed))) => {
2334 stats.causal_samples_generated = sample_count;
2335 stats.causal_validation_passed = validation_passed;
2336 let elapsed = start.elapsed();
2337 stats.causal_generation_ms = elapsed.as_millis() as u64;
2338 info!(
2339 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2340 sample_count, stats.causal_generation_ms, validation_passed,
2341 );
2342 }
2343 Ok(Err(e)) => {
2344 let elapsed = start.elapsed();
2345 stats.causal_generation_ms = elapsed.as_millis() as u64;
2346 warn!("Phase 13: Causal generation failed: {}", e);
2347 }
2348 Err(_) => {
2349 let elapsed = start.elapsed();
2350 stats.causal_generation_ms = elapsed.as_millis() as u64;
2351 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2352 }
2353 }
2354 }
2355
2356 fn phase_sourcing_data(
2358 &mut self,
2359 stats: &mut EnhancedGenerationStatistics,
2360 ) -> SynthResult<SourcingSnapshot> {
2361 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2362 debug!("Phase 14: Skipped (sourcing generation disabled)");
2363 return Ok(SourcingSnapshot::default());
2364 }
2365
2366 info!("Phase 14: Generating S2C Sourcing Data");
2367 let seed = self.seed;
2368
2369 let vendor_ids: Vec<String> = self
2371 .master_data
2372 .vendors
2373 .iter()
2374 .map(|v| v.vendor_id.clone())
2375 .collect();
2376 if vendor_ids.is_empty() {
2377 debug!("Phase 14: Skipped (no vendors available)");
2378 return Ok(SourcingSnapshot::default());
2379 }
2380
2381 let categories: Vec<(String, String)> = vec![
2382 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2383 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2384 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2385 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2386 ("CAT-LOG".to_string(), "Logistics".to_string()),
2387 ];
2388 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2389 .iter()
2390 .map(|(id, name)| {
2391 (
2392 id.clone(),
2393 name.clone(),
2394 rust_decimal::Decimal::from(100_000),
2395 )
2396 })
2397 .collect();
2398
2399 let company_code = self
2400 .config
2401 .companies
2402 .first()
2403 .map(|c| c.code.as_str())
2404 .unwrap_or("1000");
2405 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2406 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2407 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2408 let fiscal_year = start_date.year() as u16;
2409 let owner_ids: Vec<String> = self
2410 .master_data
2411 .employees
2412 .iter()
2413 .take(5)
2414 .map(|e| e.employee_id.clone())
2415 .collect();
2416 let owner_id = owner_ids.first().map(|s| s.as_str()).unwrap_or("BUYER-001");
2417
2418 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2420 let spend_analyses =
2421 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2422
2423 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2425 let sourcing_projects = if owner_ids.is_empty() {
2426 Vec::new()
2427 } else {
2428 project_gen.generate(
2429 company_code,
2430 &categories_with_spend,
2431 &owner_ids,
2432 start_date,
2433 self.config.global.period_months,
2434 )
2435 };
2436 stats.sourcing_project_count = sourcing_projects.len();
2437
2438 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2440 let mut qual_gen = QualificationGenerator::new(seed + 2);
2441 let qualifications = qual_gen.generate(
2442 company_code,
2443 &qual_vendor_ids,
2444 sourcing_projects.first().map(|p| p.project_id.as_str()),
2445 owner_id,
2446 start_date,
2447 );
2448
2449 let mut rfx_gen = RfxGenerator::new(seed + 3);
2451 let rfx_events: Vec<RfxEvent> = sourcing_projects
2452 .iter()
2453 .map(|proj| {
2454 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2455 rfx_gen.generate(
2456 company_code,
2457 &proj.project_id,
2458 &proj.category_id,
2459 &qualified_vids,
2460 owner_id,
2461 start_date,
2462 50000.0,
2463 )
2464 })
2465 .collect();
2466 stats.rfx_event_count = rfx_events.len();
2467
2468 let mut bid_gen = BidGenerator::new(seed + 4);
2470 let mut all_bids = Vec::new();
2471 for rfx in &rfx_events {
2472 let bidder_count = vendor_ids.len().clamp(2, 5);
2473 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2474 let bids = bid_gen.generate(rfx, &responding, start_date);
2475 all_bids.extend(bids);
2476 }
2477 stats.bid_count = all_bids.len();
2478
2479 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2481 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2482 .iter()
2483 .map(|rfx| {
2484 let rfx_bids: Vec<SupplierBid> = all_bids
2485 .iter()
2486 .filter(|b| b.rfx_id == rfx.rfx_id)
2487 .cloned()
2488 .collect();
2489 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2490 })
2491 .collect();
2492
2493 let mut contract_gen = ContractGenerator::new(seed + 6);
2495 let contracts: Vec<ProcurementContract> = bid_evaluations
2496 .iter()
2497 .zip(rfx_events.iter())
2498 .filter_map(|(eval, rfx)| {
2499 eval.ranked_bids.first().and_then(|winner| {
2500 all_bids
2501 .iter()
2502 .find(|b| b.bid_id == winner.bid_id)
2503 .map(|winning_bid| {
2504 contract_gen.generate_from_bid(
2505 winning_bid,
2506 Some(&rfx.sourcing_project_id),
2507 &rfx.category_id,
2508 owner_id,
2509 start_date,
2510 )
2511 })
2512 })
2513 })
2514 .collect();
2515 stats.contract_count = contracts.len();
2516
2517 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2519 let catalog_items = catalog_gen.generate(&contracts);
2520 stats.catalog_item_count = catalog_items.len();
2521
2522 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2524 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2525 .iter()
2526 .fold(
2527 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2528 |mut acc, c| {
2529 acc.entry(c.vendor_id.clone()).or_default().push(c);
2530 acc
2531 },
2532 )
2533 .into_iter()
2534 .collect();
2535 let scorecards = scorecard_gen.generate(
2536 company_code,
2537 &vendor_contracts,
2538 start_date,
2539 end_date,
2540 owner_id,
2541 );
2542 stats.scorecard_count = scorecards.len();
2543
2544 let mut sourcing_projects = sourcing_projects;
2547 for project in &mut sourcing_projects {
2548 project.rfx_ids = rfx_events
2550 .iter()
2551 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2552 .map(|rfx| rfx.rfx_id.clone())
2553 .collect();
2554
2555 project.contract_id = contracts
2557 .iter()
2558 .find(|c| {
2559 c.sourcing_project_id
2560 .as_deref()
2561 .is_some_and(|sp| sp == project.project_id)
2562 })
2563 .map(|c| c.contract_id.clone());
2564
2565 project.spend_analysis_id = spend_analyses
2567 .iter()
2568 .find(|sa| sa.category_id == project.category_id)
2569 .map(|sa| sa.category_id.clone());
2570 }
2571
2572 info!(
2573 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2574 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2575 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2576 );
2577 self.check_resources_with_log("post-sourcing")?;
2578
2579 Ok(SourcingSnapshot {
2580 spend_analyses,
2581 sourcing_projects,
2582 qualifications,
2583 rfx_events,
2584 bids: all_bids,
2585 bid_evaluations,
2586 contracts,
2587 catalog_items,
2588 scorecards,
2589 })
2590 }
2591
2592 fn phase_intercompany(
2594 &mut self,
2595 stats: &mut EnhancedGenerationStatistics,
2596 ) -> SynthResult<IntercompanySnapshot> {
2597 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2599 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2600 return Ok(IntercompanySnapshot::default());
2601 }
2602
2603 if self.config.companies.len() < 2 {
2605 debug!(
2606 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2607 self.config.companies.len()
2608 );
2609 return Ok(IntercompanySnapshot::default());
2610 }
2611
2612 info!("Phase 14b: Generating Intercompany Transactions");
2613
2614 let seed = self.seed;
2615 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2616 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2617 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2618
2619 let parent_code = self.config.companies[0].code.clone();
2622 let mut ownership_structure =
2623 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2624
2625 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2626 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2627 format!("REL{:03}", i + 1),
2628 parent_code.clone(),
2629 company.code.clone(),
2630 rust_decimal::Decimal::from(100), start_date,
2632 );
2633 ownership_structure.add_relationship(relationship);
2634 }
2635
2636 let tp_method = match self.config.intercompany.transfer_pricing_method {
2638 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2639 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2640 }
2641 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2642 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2643 }
2644 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2645 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2646 }
2647 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2648 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2649 }
2650 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2651 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2652 }
2653 };
2654
2655 let ic_currency = self
2657 .config
2658 .companies
2659 .first()
2660 .map(|c| c.currency.clone())
2661 .unwrap_or_else(|| "USD".to_string());
2662 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2663 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2664 transfer_pricing_method: tp_method,
2665 markup_percent: rust_decimal::Decimal::from_f64_retain(
2666 self.config.intercompany.markup_percent,
2667 )
2668 .unwrap_or(rust_decimal::Decimal::from(5)),
2669 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2670 default_currency: ic_currency,
2671 ..Default::default()
2672 };
2673
2674 let mut ic_generator = datasynth_generators::ICGenerator::new(
2676 ic_gen_config,
2677 ownership_structure.clone(),
2678 seed + 50,
2679 );
2680
2681 let transactions_per_day = 3;
2684 let matched_pairs = ic_generator.generate_transactions_for_period(
2685 start_date,
2686 end_date,
2687 transactions_per_day,
2688 );
2689
2690 let mut seller_entries = Vec::new();
2692 let mut buyer_entries = Vec::new();
2693 let fiscal_year = start_date.year();
2694
2695 for pair in &matched_pairs {
2696 let fiscal_period = pair.posting_date.month();
2697 let (seller_je, buyer_je) =
2698 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2699 seller_entries.push(seller_je);
2700 buyer_entries.push(buyer_je);
2701 }
2702
2703 let matching_config = datasynth_generators::ICMatchingConfig {
2705 base_currency: self
2706 .config
2707 .companies
2708 .first()
2709 .map(|c| c.currency.clone())
2710 .unwrap_or_else(|| "USD".to_string()),
2711 ..Default::default()
2712 };
2713 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2714 matching_engine.load_matched_pairs(&matched_pairs);
2715 let matching_result = matching_engine.run_matching(end_date);
2716
2717 let mut elimination_entries = Vec::new();
2719 if self.config.intercompany.generate_eliminations {
2720 let elim_config = datasynth_generators::EliminationConfig {
2721 consolidation_entity: "GROUP".to_string(),
2722 base_currency: self
2723 .config
2724 .companies
2725 .first()
2726 .map(|c| c.currency.clone())
2727 .unwrap_or_else(|| "USD".to_string()),
2728 ..Default::default()
2729 };
2730
2731 let mut elim_generator =
2732 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2733
2734 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2735 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2736 matching_result
2737 .matched_balances
2738 .iter()
2739 .chain(matching_result.unmatched_balances.iter())
2740 .cloned()
2741 .collect();
2742
2743 let journal = elim_generator.generate_eliminations(
2744 &fiscal_period,
2745 end_date,
2746 &all_balances,
2747 &matched_pairs,
2748 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
2751
2752 elimination_entries = journal.entries.clone();
2753 }
2754
2755 let matched_pair_count = matched_pairs.len();
2756 let elimination_entry_count = elimination_entries.len();
2757 let match_rate = matching_result.match_rate;
2758
2759 stats.ic_matched_pair_count = matched_pair_count;
2760 stats.ic_elimination_count = elimination_entry_count;
2761 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2762
2763 info!(
2764 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
2765 matched_pair_count,
2766 stats.ic_transaction_count,
2767 seller_entries.len(),
2768 buyer_entries.len(),
2769 elimination_entry_count,
2770 match_rate * 100.0
2771 );
2772 self.check_resources_with_log("post-intercompany")?;
2773
2774 Ok(IntercompanySnapshot {
2775 matched_pairs,
2776 seller_journal_entries: seller_entries,
2777 buyer_journal_entries: buyer_entries,
2778 elimination_entries,
2779 matched_pair_count,
2780 elimination_entry_count,
2781 match_rate,
2782 })
2783 }
2784
2785 fn phase_financial_reporting(
2787 &mut self,
2788 document_flows: &DocumentFlowSnapshot,
2789 journal_entries: &[JournalEntry],
2790 coa: &Arc<ChartOfAccounts>,
2791 stats: &mut EnhancedGenerationStatistics,
2792 ) -> SynthResult<FinancialReportingSnapshot> {
2793 let fs_enabled = self.phase_config.generate_financial_statements
2794 || self.config.financial_reporting.enabled;
2795 let br_enabled = self.phase_config.generate_bank_reconciliation;
2796
2797 if !fs_enabled && !br_enabled {
2798 debug!("Phase 15: Skipped (financial reporting disabled)");
2799 return Ok(FinancialReportingSnapshot::default());
2800 }
2801
2802 info!("Phase 15: Generating Financial Reporting Data");
2803
2804 let seed = self.seed;
2805 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2806 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
2807
2808 let mut financial_statements = Vec::new();
2809 let mut bank_reconciliations = Vec::new();
2810 let mut trial_balances = Vec::new();
2811
2812 if fs_enabled {
2820 let company_code = self
2821 .config
2822 .companies
2823 .first()
2824 .map(|c| c.code.as_str())
2825 .unwrap_or("1000");
2826 let currency = self
2827 .config
2828 .companies
2829 .first()
2830 .map(|c| c.currency.as_str())
2831 .unwrap_or("USD");
2832 let has_journal_entries = !journal_entries.is_empty();
2833
2834 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
2837
2838 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
2840 None;
2841
2842 for period in 0..self.config.global.period_months {
2844 let period_start = start_date + chrono::Months::new(period);
2845 let period_end =
2846 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2847 let fiscal_year = period_end.year() as u16;
2848 let fiscal_period = period_end.month() as u8;
2849
2850 if has_journal_entries {
2851 let tb_entries = Self::build_cumulative_trial_balance(
2854 journal_entries,
2855 coa,
2856 company_code,
2857 start_date,
2858 period_end,
2859 fiscal_year,
2860 fiscal_period,
2861 );
2862
2863 let prior_ref = prior_cumulative_tb.as_deref();
2866 let stmts = fs_gen.generate(
2867 company_code,
2868 currency,
2869 &tb_entries,
2870 period_start,
2871 period_end,
2872 fiscal_year,
2873 fiscal_period,
2874 prior_ref,
2875 "SYS-AUTOCLOSE",
2876 );
2877
2878 for stmt in stmts {
2880 if stmt.statement_type == StatementType::CashFlowStatement {
2881 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
2883 let cf_items = Self::build_cash_flow_from_trial_balances(
2884 &tb_entries,
2885 prior_ref,
2886 net_income,
2887 );
2888 financial_statements.push(FinancialStatement {
2889 cash_flow_items: cf_items,
2890 ..stmt
2891 });
2892 } else {
2893 financial_statements.push(stmt);
2894 }
2895 }
2896
2897 trial_balances.push(PeriodTrialBalance {
2899 fiscal_year,
2900 fiscal_period,
2901 period_start,
2902 period_end,
2903 entries: tb_entries.clone(),
2904 });
2905
2906 prior_cumulative_tb = Some(tb_entries);
2908 } else {
2909 let tb_entries = Self::build_trial_balance_from_entries(
2912 journal_entries,
2913 coa,
2914 company_code,
2915 fiscal_year,
2916 fiscal_period,
2917 );
2918
2919 let stmts = fs_gen.generate(
2920 company_code,
2921 currency,
2922 &tb_entries,
2923 period_start,
2924 period_end,
2925 fiscal_year,
2926 fiscal_period,
2927 None,
2928 "SYS-AUTOCLOSE",
2929 );
2930 financial_statements.extend(stmts);
2931
2932 if !tb_entries.is_empty() {
2934 trial_balances.push(PeriodTrialBalance {
2935 fiscal_year,
2936 fiscal_period,
2937 period_start,
2938 period_end,
2939 entries: tb_entries,
2940 });
2941 }
2942 }
2943 }
2944 stats.financial_statement_count = financial_statements.len();
2945 info!(
2946 "Financial statements generated: {} statements (JE-derived: {})",
2947 stats.financial_statement_count, has_journal_entries
2948 );
2949 }
2950
2951 if br_enabled && !document_flows.payments.is_empty() {
2953 let employee_ids: Vec<String> = self
2954 .master_data
2955 .employees
2956 .iter()
2957 .map(|e| e.employee_id.clone())
2958 .collect();
2959 let mut br_gen =
2960 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
2961
2962 for company in &self.config.companies {
2964 let company_payments: Vec<PaymentReference> = document_flows
2965 .payments
2966 .iter()
2967 .filter(|p| p.header.company_code == company.code)
2968 .map(|p| PaymentReference {
2969 id: p.header.document_id.clone(),
2970 amount: if p.is_vendor { p.amount } else { -p.amount },
2971 date: p.header.document_date,
2972 reference: p
2973 .check_number
2974 .clone()
2975 .or_else(|| p.wire_reference.clone())
2976 .unwrap_or_else(|| p.header.document_id.clone()),
2977 })
2978 .collect();
2979
2980 if company_payments.is_empty() {
2981 continue;
2982 }
2983
2984 let bank_account_id = format!("{}-MAIN", company.code);
2985
2986 for period in 0..self.config.global.period_months {
2988 let period_start = start_date + chrono::Months::new(period);
2989 let period_end =
2990 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
2991
2992 let period_payments: Vec<PaymentReference> = company_payments
2993 .iter()
2994 .filter(|p| p.date >= period_start && p.date <= period_end)
2995 .cloned()
2996 .collect();
2997
2998 let recon = br_gen.generate(
2999 &company.code,
3000 &bank_account_id,
3001 period_start,
3002 period_end,
3003 &company.currency,
3004 &period_payments,
3005 );
3006 bank_reconciliations.push(recon);
3007 }
3008 }
3009 info!(
3010 "Bank reconciliations generated: {} reconciliations",
3011 bank_reconciliations.len()
3012 );
3013 }
3014
3015 stats.bank_reconciliation_count = bank_reconciliations.len();
3016 self.check_resources_with_log("post-financial-reporting")?;
3017
3018 if !trial_balances.is_empty() {
3019 info!(
3020 "Period-close trial balances captured: {} periods",
3021 trial_balances.len()
3022 );
3023 }
3024
3025 Ok(FinancialReportingSnapshot {
3026 financial_statements,
3027 bank_reconciliations,
3028 trial_balances,
3029 })
3030 }
3031
3032 fn build_trial_balance_from_entries(
3038 journal_entries: &[JournalEntry],
3039 coa: &ChartOfAccounts,
3040 company_code: &str,
3041 fiscal_year: u16,
3042 fiscal_period: u8,
3043 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3044 use rust_decimal::Decimal;
3045
3046 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3048 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3049
3050 for je in journal_entries {
3051 if je.header.company_code != company_code
3053 || je.header.fiscal_year != fiscal_year
3054 || je.header.fiscal_period != fiscal_period
3055 {
3056 continue;
3057 }
3058
3059 for line in &je.lines {
3060 let acct = &line.gl_account;
3061 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3062 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3063 }
3064 }
3065
3066 let mut all_accounts: Vec<&String> = account_debits
3068 .keys()
3069 .chain(account_credits.keys())
3070 .collect::<std::collections::HashSet<_>>()
3071 .into_iter()
3072 .collect();
3073 all_accounts.sort();
3074
3075 let mut entries = Vec::new();
3076
3077 for acct_number in all_accounts {
3078 let debit = account_debits
3079 .get(acct_number)
3080 .copied()
3081 .unwrap_or(Decimal::ZERO);
3082 let credit = account_credits
3083 .get(acct_number)
3084 .copied()
3085 .unwrap_or(Decimal::ZERO);
3086
3087 if debit.is_zero() && credit.is_zero() {
3088 continue;
3089 }
3090
3091 let account_name = coa
3093 .get_account(acct_number)
3094 .map(|gl| gl.short_description.clone())
3095 .unwrap_or_else(|| format!("Account {}", acct_number));
3096
3097 let category = Self::category_from_account_code(acct_number);
3102
3103 entries.push(datasynth_generators::TrialBalanceEntry {
3104 account_code: acct_number.clone(),
3105 account_name,
3106 category,
3107 debit_balance: debit,
3108 credit_balance: credit,
3109 });
3110 }
3111
3112 entries
3113 }
3114
3115 fn build_cumulative_trial_balance(
3122 journal_entries: &[JournalEntry],
3123 coa: &ChartOfAccounts,
3124 company_code: &str,
3125 start_date: NaiveDate,
3126 period_end: NaiveDate,
3127 fiscal_year: u16,
3128 fiscal_period: u8,
3129 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3130 use rust_decimal::Decimal;
3131
3132 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3134 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3135
3136 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3138 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3139
3140 for je in journal_entries {
3141 if je.header.company_code != company_code {
3142 continue;
3143 }
3144
3145 for line in &je.lines {
3146 let acct = &line.gl_account;
3147 let category = Self::category_from_account_code(acct);
3148 let is_bs_account = matches!(
3149 category.as_str(),
3150 "Cash"
3151 | "Receivables"
3152 | "Inventory"
3153 | "FixedAssets"
3154 | "Payables"
3155 | "AccruedLiabilities"
3156 | "LongTermDebt"
3157 | "Equity"
3158 );
3159
3160 if is_bs_account {
3161 if je.header.document_date <= period_end
3163 && je.header.document_date >= start_date
3164 {
3165 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3166 line.debit_amount;
3167 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3168 line.credit_amount;
3169 }
3170 } else {
3171 if je.header.fiscal_year == fiscal_year
3173 && je.header.fiscal_period == fiscal_period
3174 {
3175 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3176 line.debit_amount;
3177 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3178 line.credit_amount;
3179 }
3180 }
3181 }
3182 }
3183
3184 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3186 all_accounts.extend(bs_debits.keys().cloned());
3187 all_accounts.extend(bs_credits.keys().cloned());
3188 all_accounts.extend(is_debits.keys().cloned());
3189 all_accounts.extend(is_credits.keys().cloned());
3190
3191 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3192 sorted_accounts.sort();
3193
3194 let mut entries = Vec::new();
3195
3196 for acct_number in &sorted_accounts {
3197 let category = Self::category_from_account_code(acct_number);
3198 let is_bs_account = matches!(
3199 category.as_str(),
3200 "Cash"
3201 | "Receivables"
3202 | "Inventory"
3203 | "FixedAssets"
3204 | "Payables"
3205 | "AccruedLiabilities"
3206 | "LongTermDebt"
3207 | "Equity"
3208 );
3209
3210 let (debit, credit) = if is_bs_account {
3211 (
3212 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3213 bs_credits
3214 .get(acct_number)
3215 .copied()
3216 .unwrap_or(Decimal::ZERO),
3217 )
3218 } else {
3219 (
3220 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3221 is_credits
3222 .get(acct_number)
3223 .copied()
3224 .unwrap_or(Decimal::ZERO),
3225 )
3226 };
3227
3228 if debit.is_zero() && credit.is_zero() {
3229 continue;
3230 }
3231
3232 let account_name = coa
3233 .get_account(acct_number)
3234 .map(|gl| gl.short_description.clone())
3235 .unwrap_or_else(|| format!("Account {}", acct_number));
3236
3237 entries.push(datasynth_generators::TrialBalanceEntry {
3238 account_code: acct_number.clone(),
3239 account_name,
3240 category,
3241 debit_balance: debit,
3242 credit_balance: credit,
3243 });
3244 }
3245
3246 entries
3247 }
3248
3249 fn build_cash_flow_from_trial_balances(
3254 current_tb: &[datasynth_generators::TrialBalanceEntry],
3255 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3256 net_income: rust_decimal::Decimal,
3257 ) -> Vec<CashFlowItem> {
3258 use rust_decimal::Decimal;
3259
3260 let aggregate =
3262 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3263 let mut map: HashMap<String, Decimal> = HashMap::new();
3264 for entry in tb {
3265 let net = entry.debit_balance - entry.credit_balance;
3266 *map.entry(entry.category.clone()).or_default() += net;
3267 }
3268 map
3269 };
3270
3271 let current = aggregate(current_tb);
3272 let prior = prior_tb.map(aggregate);
3273
3274 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3276 *map.get(key).unwrap_or(&Decimal::ZERO)
3277 };
3278
3279 let change = |key: &str| -> Decimal {
3281 let curr = get(¤t, key);
3282 match &prior {
3283 Some(p) => curr - get(p, key),
3284 None => curr,
3285 }
3286 };
3287
3288 let fixed_asset_change = change("FixedAssets");
3291 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3292 -fixed_asset_change
3293 } else {
3294 Decimal::ZERO
3295 };
3296
3297 let ar_change = change("Receivables");
3299 let inventory_change = change("Inventory");
3300 let ap_change = change("Payables");
3302 let accrued_change = change("AccruedLiabilities");
3303
3304 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3305 + (-ap_change)
3306 + (-accrued_change);
3307
3308 let capex = if fixed_asset_change > Decimal::ZERO {
3310 -fixed_asset_change
3311 } else {
3312 Decimal::ZERO
3313 };
3314 let investing_cf = capex;
3315
3316 let debt_change = -change("LongTermDebt");
3318 let equity_change = -change("Equity");
3319 let financing_cf = debt_change + equity_change;
3320
3321 let net_change = operating_cf + investing_cf + financing_cf;
3322
3323 vec![
3324 CashFlowItem {
3325 item_code: "CF-NI".to_string(),
3326 label: "Net Income".to_string(),
3327 category: CashFlowCategory::Operating,
3328 amount: net_income,
3329 amount_prior: None,
3330 sort_order: 1,
3331 is_total: false,
3332 },
3333 CashFlowItem {
3334 item_code: "CF-DEP".to_string(),
3335 label: "Depreciation & Amortization".to_string(),
3336 category: CashFlowCategory::Operating,
3337 amount: depreciation_addback,
3338 amount_prior: None,
3339 sort_order: 2,
3340 is_total: false,
3341 },
3342 CashFlowItem {
3343 item_code: "CF-AR".to_string(),
3344 label: "Change in Accounts Receivable".to_string(),
3345 category: CashFlowCategory::Operating,
3346 amount: -ar_change,
3347 amount_prior: None,
3348 sort_order: 3,
3349 is_total: false,
3350 },
3351 CashFlowItem {
3352 item_code: "CF-AP".to_string(),
3353 label: "Change in Accounts Payable".to_string(),
3354 category: CashFlowCategory::Operating,
3355 amount: -ap_change,
3356 amount_prior: None,
3357 sort_order: 4,
3358 is_total: false,
3359 },
3360 CashFlowItem {
3361 item_code: "CF-INV".to_string(),
3362 label: "Change in Inventory".to_string(),
3363 category: CashFlowCategory::Operating,
3364 amount: -inventory_change,
3365 amount_prior: None,
3366 sort_order: 5,
3367 is_total: false,
3368 },
3369 CashFlowItem {
3370 item_code: "CF-OP".to_string(),
3371 label: "Net Cash from Operating Activities".to_string(),
3372 category: CashFlowCategory::Operating,
3373 amount: operating_cf,
3374 amount_prior: None,
3375 sort_order: 6,
3376 is_total: true,
3377 },
3378 CashFlowItem {
3379 item_code: "CF-CAPEX".to_string(),
3380 label: "Capital Expenditures".to_string(),
3381 category: CashFlowCategory::Investing,
3382 amount: capex,
3383 amount_prior: None,
3384 sort_order: 7,
3385 is_total: false,
3386 },
3387 CashFlowItem {
3388 item_code: "CF-INV-T".to_string(),
3389 label: "Net Cash from Investing Activities".to_string(),
3390 category: CashFlowCategory::Investing,
3391 amount: investing_cf,
3392 amount_prior: None,
3393 sort_order: 8,
3394 is_total: true,
3395 },
3396 CashFlowItem {
3397 item_code: "CF-DEBT".to_string(),
3398 label: "Net Borrowings / (Repayments)".to_string(),
3399 category: CashFlowCategory::Financing,
3400 amount: debt_change,
3401 amount_prior: None,
3402 sort_order: 9,
3403 is_total: false,
3404 },
3405 CashFlowItem {
3406 item_code: "CF-EQ".to_string(),
3407 label: "Equity Changes".to_string(),
3408 category: CashFlowCategory::Financing,
3409 amount: equity_change,
3410 amount_prior: None,
3411 sort_order: 10,
3412 is_total: false,
3413 },
3414 CashFlowItem {
3415 item_code: "CF-FIN-T".to_string(),
3416 label: "Net Cash from Financing Activities".to_string(),
3417 category: CashFlowCategory::Financing,
3418 amount: financing_cf,
3419 amount_prior: None,
3420 sort_order: 11,
3421 is_total: true,
3422 },
3423 CashFlowItem {
3424 item_code: "CF-NET".to_string(),
3425 label: "Net Change in Cash".to_string(),
3426 category: CashFlowCategory::Operating,
3427 amount: net_change,
3428 amount_prior: None,
3429 sort_order: 12,
3430 is_total: true,
3431 },
3432 ]
3433 }
3434
3435 fn calculate_net_income_from_tb(
3439 tb: &[datasynth_generators::TrialBalanceEntry],
3440 ) -> rust_decimal::Decimal {
3441 use rust_decimal::Decimal;
3442
3443 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3444 for entry in tb {
3445 let net = entry.debit_balance - entry.credit_balance;
3446 *aggregated.entry(entry.category.clone()).or_default() += net;
3447 }
3448
3449 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3450 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3451 let opex = *aggregated
3452 .get("OperatingExpenses")
3453 .unwrap_or(&Decimal::ZERO);
3454 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3455 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3456
3457 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3460 let tax_rate = Decimal::from_f64_retain(0.25).unwrap_or(Decimal::ZERO);
3461 let tax = operating_income * tax_rate;
3462 operating_income - tax
3463 }
3464
3465 fn category_from_account_code(code: &str) -> String {
3472 let prefix: String = code.chars().take(2).collect();
3473 match prefix.as_str() {
3474 "10" => "Cash",
3475 "11" => "Receivables",
3476 "12" | "13" | "14" => "Inventory",
3477 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3478 "20" => "Payables",
3479 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3480 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3481 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3482 "40" | "41" | "42" | "43" | "44" => "Revenue",
3483 "50" | "51" | "52" => "CostOfSales",
3484 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3485 "OperatingExpenses"
3486 }
3487 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3488 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3489 _ => "OperatingExpenses",
3490 }
3491 .to_string()
3492 }
3493
3494 fn phase_hr_data(
3496 &mut self,
3497 stats: &mut EnhancedGenerationStatistics,
3498 ) -> SynthResult<HrSnapshot> {
3499 if !self.config.hr.enabled {
3500 debug!("Phase 16: Skipped (HR generation disabled)");
3501 return Ok(HrSnapshot::default());
3502 }
3503
3504 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3505
3506 let seed = self.seed;
3507 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3508 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3509 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3510 let company_code = self
3511 .config
3512 .companies
3513 .first()
3514 .map(|c| c.code.as_str())
3515 .unwrap_or("1000");
3516 let currency = self
3517 .config
3518 .companies
3519 .first()
3520 .map(|c| c.currency.as_str())
3521 .unwrap_or("USD");
3522
3523 let employee_ids: Vec<String> = self
3524 .master_data
3525 .employees
3526 .iter()
3527 .map(|e| e.employee_id.clone())
3528 .collect();
3529
3530 if employee_ids.is_empty() {
3531 debug!("Phase 16: Skipped (no employees available)");
3532 return Ok(HrSnapshot::default());
3533 }
3534
3535 let cost_center_ids: Vec<String> = self
3538 .master_data
3539 .employees
3540 .iter()
3541 .filter_map(|e| e.cost_center.clone())
3542 .collect::<std::collections::HashSet<_>>()
3543 .into_iter()
3544 .collect();
3545
3546 let mut snapshot = HrSnapshot::default();
3547
3548 if self.config.hr.payroll.enabled {
3550 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3551 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3552
3553 let payroll_pack = self.primary_pack();
3555
3556 payroll_gen.set_country_pack(payroll_pack.clone());
3559
3560 let employees_with_salary: Vec<(
3561 String,
3562 rust_decimal::Decimal,
3563 Option<String>,
3564 Option<String>,
3565 )> = self
3566 .master_data
3567 .employees
3568 .iter()
3569 .map(|e| {
3570 (
3571 e.employee_id.clone(),
3572 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3574 e.department_id.clone(),
3575 )
3576 })
3577 .collect();
3578
3579 for month in 0..self.config.global.period_months {
3580 let period_start = start_date + chrono::Months::new(month);
3581 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3582 let (run, items) = payroll_gen.generate(
3583 company_code,
3584 &employees_with_salary,
3585 period_start,
3586 period_end,
3587 currency,
3588 );
3589 snapshot.payroll_runs.push(run);
3590 snapshot.payroll_run_count += 1;
3591 snapshot.payroll_line_item_count += items.len();
3592 snapshot.payroll_line_items.extend(items);
3593 }
3594 }
3595
3596 if self.config.hr.time_attendance.enabled {
3598 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3599 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3600 let entries = time_gen.generate(
3601 &employee_ids,
3602 start_date,
3603 end_date,
3604 &self.config.hr.time_attendance,
3605 );
3606 snapshot.time_entry_count = entries.len();
3607 snapshot.time_entries = entries;
3608 }
3609
3610 if self.config.hr.expenses.enabled {
3612 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3613 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3614 let company_currency = self
3615 .config
3616 .companies
3617 .first()
3618 .map(|c| c.currency.as_str())
3619 .unwrap_or("USD");
3620 let reports = expense_gen.generate_with_currency(
3621 &employee_ids,
3622 start_date,
3623 end_date,
3624 &self.config.hr.expenses,
3625 company_currency,
3626 );
3627 snapshot.expense_report_count = reports.len();
3628 snapshot.expense_reports = reports;
3629 }
3630
3631 stats.payroll_run_count = snapshot.payroll_run_count;
3632 stats.time_entry_count = snapshot.time_entry_count;
3633 stats.expense_report_count = snapshot.expense_report_count;
3634
3635 info!(
3636 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports",
3637 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3638 snapshot.time_entry_count, snapshot.expense_report_count
3639 );
3640 self.check_resources_with_log("post-hr")?;
3641
3642 Ok(snapshot)
3643 }
3644
3645 fn phase_accounting_standards(
3647 &mut self,
3648 stats: &mut EnhancedGenerationStatistics,
3649 ) -> SynthResult<AccountingStandardsSnapshot> {
3650 if !self.phase_config.generate_accounting_standards
3651 || !self.config.accounting_standards.enabled
3652 {
3653 debug!("Phase 17: Skipped (accounting standards generation disabled)");
3654 return Ok(AccountingStandardsSnapshot::default());
3655 }
3656 info!("Phase 17: Generating Accounting Standards Data");
3657
3658 let seed = self.seed;
3659 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3660 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3661 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3662 let company_code = self
3663 .config
3664 .companies
3665 .first()
3666 .map(|c| c.code.as_str())
3667 .unwrap_or("1000");
3668 let currency = self
3669 .config
3670 .companies
3671 .first()
3672 .map(|c| c.currency.as_str())
3673 .unwrap_or("USD");
3674
3675 let framework = match self.config.accounting_standards.framework {
3680 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3681 datasynth_standards::framework::AccountingFramework::UsGaap
3682 }
3683 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3684 datasynth_standards::framework::AccountingFramework::Ifrs
3685 }
3686 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3687 datasynth_standards::framework::AccountingFramework::DualReporting
3688 }
3689 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3690 datasynth_standards::framework::AccountingFramework::FrenchGaap
3691 }
3692 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3693 datasynth_standards::framework::AccountingFramework::GermanGaap
3694 }
3695 None => {
3696 let pack = self.primary_pack();
3698 let pack_fw = pack.accounting.framework.as_str();
3699 match pack_fw {
3700 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3701 "dual_reporting" => {
3702 datasynth_standards::framework::AccountingFramework::DualReporting
3703 }
3704 "french_gaap" => {
3705 datasynth_standards::framework::AccountingFramework::FrenchGaap
3706 }
3707 "german_gaap" | "hgb" => {
3708 datasynth_standards::framework::AccountingFramework::GermanGaap
3709 }
3710 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3712 }
3713 }
3714 };
3715
3716 let mut snapshot = AccountingStandardsSnapshot::default();
3717
3718 if self.config.accounting_standards.revenue_recognition.enabled {
3720 let customer_ids: Vec<String> = self
3721 .master_data
3722 .customers
3723 .iter()
3724 .map(|c| c.customer_id.clone())
3725 .collect();
3726
3727 if !customer_ids.is_empty() {
3728 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3729 let contracts = rev_gen.generate(
3730 company_code,
3731 &customer_ids,
3732 start_date,
3733 end_date,
3734 currency,
3735 &self.config.accounting_standards.revenue_recognition,
3736 framework,
3737 );
3738 snapshot.revenue_contract_count = contracts.len();
3739 snapshot.contracts = contracts;
3740 }
3741 }
3742
3743 if self.config.accounting_standards.impairment.enabled {
3745 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3746 .master_data
3747 .assets
3748 .iter()
3749 .map(|a| {
3750 (
3751 a.asset_id.clone(),
3752 a.description.clone(),
3753 a.acquisition_cost,
3754 )
3755 })
3756 .collect();
3757
3758 if !asset_data.is_empty() {
3759 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
3760 let tests = imp_gen.generate(
3761 company_code,
3762 &asset_data,
3763 end_date,
3764 &self.config.accounting_standards.impairment,
3765 framework,
3766 );
3767 snapshot.impairment_test_count = tests.len();
3768 snapshot.impairment_tests = tests;
3769 }
3770 }
3771
3772 stats.revenue_contract_count = snapshot.revenue_contract_count;
3773 stats.impairment_test_count = snapshot.impairment_test_count;
3774
3775 info!(
3776 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
3777 snapshot.revenue_contract_count, snapshot.impairment_test_count
3778 );
3779 self.check_resources_with_log("post-accounting-standards")?;
3780
3781 Ok(snapshot)
3782 }
3783
3784 fn phase_manufacturing(
3786 &mut self,
3787 stats: &mut EnhancedGenerationStatistics,
3788 ) -> SynthResult<ManufacturingSnapshot> {
3789 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
3790 debug!("Phase 18: Skipped (manufacturing generation disabled)");
3791 return Ok(ManufacturingSnapshot::default());
3792 }
3793 info!("Phase 18: Generating Manufacturing Data");
3794
3795 let seed = self.seed;
3796 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3797 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3798 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3799 let company_code = self
3800 .config
3801 .companies
3802 .first()
3803 .map(|c| c.code.as_str())
3804 .unwrap_or("1000");
3805
3806 let material_data: Vec<(String, String)> = self
3807 .master_data
3808 .materials
3809 .iter()
3810 .map(|m| (m.material_id.clone(), m.description.clone()))
3811 .collect();
3812
3813 if material_data.is_empty() {
3814 debug!("Phase 18: Skipped (no materials available)");
3815 return Ok(ManufacturingSnapshot::default());
3816 }
3817
3818 let mut snapshot = ManufacturingSnapshot::default();
3819
3820 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
3822 let production_orders = prod_gen.generate(
3823 company_code,
3824 &material_data,
3825 start_date,
3826 end_date,
3827 &self.config.manufacturing.production_orders,
3828 &self.config.manufacturing.costing,
3829 &self.config.manufacturing.routing,
3830 );
3831 snapshot.production_order_count = production_orders.len();
3832
3833 let inspection_data: Vec<(String, String, String)> = production_orders
3835 .iter()
3836 .map(|po| {
3837 (
3838 po.order_id.clone(),
3839 po.material_id.clone(),
3840 po.material_description.clone(),
3841 )
3842 })
3843 .collect();
3844
3845 snapshot.production_orders = production_orders;
3846
3847 if !inspection_data.is_empty() {
3848 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
3849 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
3850 snapshot.quality_inspection_count = inspections.len();
3851 snapshot.quality_inspections = inspections;
3852 }
3853
3854 let storage_locations: Vec<(String, String)> = material_data
3856 .iter()
3857 .enumerate()
3858 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
3859 .collect();
3860
3861 let employee_ids: Vec<String> = self
3862 .master_data
3863 .employees
3864 .iter()
3865 .map(|e| e.employee_id.clone())
3866 .collect();
3867 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
3868 .with_employee_pool(employee_ids);
3869 let mut cycle_count_total = 0usize;
3870 for month in 0..self.config.global.period_months {
3871 let count_date = start_date + chrono::Months::new(month);
3872 let items_per_count = storage_locations.len().clamp(10, 50);
3873 let cc = cc_gen.generate(
3874 company_code,
3875 &storage_locations,
3876 count_date,
3877 items_per_count,
3878 );
3879 snapshot.cycle_counts.push(cc);
3880 cycle_count_total += 1;
3881 }
3882 snapshot.cycle_count_count = cycle_count_total;
3883
3884 stats.production_order_count = snapshot.production_order_count;
3885 stats.quality_inspection_count = snapshot.quality_inspection_count;
3886 stats.cycle_count_count = snapshot.cycle_count_count;
3887
3888 info!(
3889 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts",
3890 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count
3891 );
3892 self.check_resources_with_log("post-manufacturing")?;
3893
3894 Ok(snapshot)
3895 }
3896
3897 fn phase_sales_kpi_budgets(
3899 &mut self,
3900 coa: &Arc<ChartOfAccounts>,
3901 financial_reporting: &FinancialReportingSnapshot,
3902 stats: &mut EnhancedGenerationStatistics,
3903 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
3904 if !self.phase_config.generate_sales_kpi_budgets {
3905 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
3906 return Ok(SalesKpiBudgetsSnapshot::default());
3907 }
3908 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
3909
3910 let seed = self.seed;
3911 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3912 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
3913 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3914 let company_code = self
3915 .config
3916 .companies
3917 .first()
3918 .map(|c| c.code.as_str())
3919 .unwrap_or("1000");
3920
3921 let mut snapshot = SalesKpiBudgetsSnapshot::default();
3922
3923 if self.config.sales_quotes.enabled {
3925 let customer_data: Vec<(String, String)> = self
3926 .master_data
3927 .customers
3928 .iter()
3929 .map(|c| (c.customer_id.clone(), c.name.clone()))
3930 .collect();
3931 let material_data: Vec<(String, String)> = self
3932 .master_data
3933 .materials
3934 .iter()
3935 .map(|m| (m.material_id.clone(), m.description.clone()))
3936 .collect();
3937
3938 if !customer_data.is_empty() && !material_data.is_empty() {
3939 let employee_ids: Vec<String> = self
3940 .master_data
3941 .employees
3942 .iter()
3943 .map(|e| e.employee_id.clone())
3944 .collect();
3945 let customer_ids: Vec<String> = self
3946 .master_data
3947 .customers
3948 .iter()
3949 .map(|c| c.customer_id.clone())
3950 .collect();
3951 let company_currency = self
3952 .config
3953 .companies
3954 .first()
3955 .map(|c| c.currency.as_str())
3956 .unwrap_or("USD");
3957
3958 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
3959 .with_pools(employee_ids, customer_ids);
3960 let quotes = quote_gen.generate_with_currency(
3961 company_code,
3962 &customer_data,
3963 &material_data,
3964 start_date,
3965 end_date,
3966 &self.config.sales_quotes,
3967 company_currency,
3968 );
3969 snapshot.sales_quote_count = quotes.len();
3970 snapshot.sales_quotes = quotes;
3971 }
3972 }
3973
3974 if self.config.financial_reporting.management_kpis.enabled {
3976 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
3977 let mut kpis = kpi_gen.generate(
3978 company_code,
3979 start_date,
3980 end_date,
3981 &self.config.financial_reporting.management_kpis,
3982 );
3983
3984 {
3986 use rust_decimal::Decimal;
3987
3988 if let Some(income_stmt) =
3989 financial_reporting.financial_statements.iter().find(|fs| {
3990 fs.statement_type == StatementType::IncomeStatement
3991 && fs.company_code == company_code
3992 })
3993 {
3994 let total_revenue: Decimal = income_stmt
3996 .line_items
3997 .iter()
3998 .filter(|li| li.section.contains("Revenue") && !li.is_total)
3999 .map(|li| li.amount)
4000 .sum();
4001 let total_cogs: Decimal = income_stmt
4002 .line_items
4003 .iter()
4004 .filter(|li| {
4005 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4006 && !li.is_total
4007 })
4008 .map(|li| li.amount.abs())
4009 .sum();
4010 let total_opex: Decimal = income_stmt
4011 .line_items
4012 .iter()
4013 .filter(|li| {
4014 li.section.contains("Expense")
4015 && !li.is_total
4016 && !li.section.contains("Cost")
4017 })
4018 .map(|li| li.amount.abs())
4019 .sum();
4020
4021 if total_revenue > Decimal::ZERO {
4022 let hundred = Decimal::from(100);
4023 let gross_margin_pct =
4024 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4025 let operating_income = total_revenue - total_cogs - total_opex;
4026 let op_margin_pct =
4027 (operating_income * hundred / total_revenue).round_dp(2);
4028
4029 for kpi in &mut kpis {
4031 if kpi.name == "Gross Margin" {
4032 kpi.value = gross_margin_pct;
4033 } else if kpi.name == "Operating Margin" {
4034 kpi.value = op_margin_pct;
4035 }
4036 }
4037 }
4038 }
4039
4040 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4042 fs.statement_type == StatementType::BalanceSheet
4043 && fs.company_code == company_code
4044 }) {
4045 let current_assets: Decimal = bs
4046 .line_items
4047 .iter()
4048 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4049 .map(|li| li.amount)
4050 .sum();
4051 let current_liabilities: Decimal = bs
4052 .line_items
4053 .iter()
4054 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4055 .map(|li| li.amount.abs())
4056 .sum();
4057
4058 if current_liabilities > Decimal::ZERO {
4059 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4060 for kpi in &mut kpis {
4061 if kpi.name == "Current Ratio" {
4062 kpi.value = current_ratio;
4063 }
4064 }
4065 }
4066 }
4067 }
4068
4069 snapshot.kpi_count = kpis.len();
4070 snapshot.kpis = kpis;
4071 }
4072
4073 if self.config.financial_reporting.budgets.enabled {
4075 let account_data: Vec<(String, String)> = coa
4076 .accounts
4077 .iter()
4078 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4079 .collect();
4080
4081 if !account_data.is_empty() {
4082 let fiscal_year = start_date.year() as u32;
4083 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4084 let budget = budget_gen.generate(
4085 company_code,
4086 fiscal_year,
4087 &account_data,
4088 &self.config.financial_reporting.budgets,
4089 );
4090 snapshot.budget_line_count = budget.line_items.len();
4091 snapshot.budgets.push(budget);
4092 }
4093 }
4094
4095 stats.sales_quote_count = snapshot.sales_quote_count;
4096 stats.kpi_count = snapshot.kpi_count;
4097 stats.budget_line_count = snapshot.budget_line_count;
4098
4099 info!(
4100 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4101 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4102 );
4103 self.check_resources_with_log("post-sales-kpi-budgets")?;
4104
4105 Ok(snapshot)
4106 }
4107
4108 fn phase_tax_generation(
4110 &mut self,
4111 document_flows: &DocumentFlowSnapshot,
4112 stats: &mut EnhancedGenerationStatistics,
4113 ) -> SynthResult<TaxSnapshot> {
4114 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4115 debug!("Phase 20: Skipped (tax generation disabled)");
4116 return Ok(TaxSnapshot::default());
4117 }
4118 info!("Phase 20: Generating Tax Data");
4119
4120 let seed = self.seed;
4121 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4122 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4123 let fiscal_year = start_date.year();
4124 let company_code = self
4125 .config
4126 .companies
4127 .first()
4128 .map(|c| c.code.as_str())
4129 .unwrap_or("1000");
4130
4131 let mut gen =
4132 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4133
4134 let pack = self.primary_pack().clone();
4135 let (jurisdictions, codes) =
4136 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4137
4138 let mut provisions = Vec::new();
4140 if self.config.tax.provisions.enabled {
4141 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4142 for company in &self.config.companies {
4143 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4144 let statutory_rate = rust_decimal::Decimal::new(
4145 (self.config.tax.provisions.statutory_rate * 100.0) as i64,
4146 2,
4147 );
4148 let provision = provision_gen.generate(
4149 &company.code,
4150 start_date,
4151 pre_tax_income,
4152 statutory_rate,
4153 );
4154 provisions.push(provision);
4155 }
4156 }
4157
4158 let mut tax_lines = Vec::new();
4160 if !codes.is_empty() {
4161 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4162 datasynth_generators::TaxLineGeneratorConfig::default(),
4163 codes.clone(),
4164 seed + 72,
4165 );
4166
4167 let buyer_country = self
4170 .config
4171 .companies
4172 .first()
4173 .map(|c| c.country.as_str())
4174 .unwrap_or("US");
4175 for vi in &document_flows.vendor_invoices {
4176 let lines = tax_line_gen.generate_for_document(
4177 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4178 &vi.header.document_id,
4179 buyer_country, buyer_country,
4181 vi.payable_amount,
4182 vi.header.document_date,
4183 None,
4184 );
4185 tax_lines.extend(lines);
4186 }
4187
4188 for ci in &document_flows.customer_invoices {
4190 let lines = tax_line_gen.generate_for_document(
4191 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4192 &ci.header.document_id,
4193 buyer_country, buyer_country,
4195 ci.total_gross_amount,
4196 ci.header.document_date,
4197 None,
4198 );
4199 tax_lines.extend(lines);
4200 }
4201 }
4202
4203 let snapshot = TaxSnapshot {
4204 jurisdiction_count: jurisdictions.len(),
4205 code_count: codes.len(),
4206 jurisdictions,
4207 codes,
4208 tax_provisions: provisions,
4209 tax_lines,
4210 tax_returns: Vec::new(),
4211 withholding_records: Vec::new(),
4212 tax_anomaly_labels: Vec::new(),
4213 };
4214
4215 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4216 stats.tax_code_count = snapshot.code_count;
4217 stats.tax_provision_count = snapshot.tax_provisions.len();
4218 stats.tax_line_count = snapshot.tax_lines.len();
4219
4220 info!(
4221 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4222 snapshot.jurisdiction_count,
4223 snapshot.code_count,
4224 snapshot.tax_provisions.len()
4225 );
4226 self.check_resources_with_log("post-tax")?;
4227
4228 Ok(snapshot)
4229 }
4230
4231 fn phase_esg_generation(
4233 &mut self,
4234 document_flows: &DocumentFlowSnapshot,
4235 stats: &mut EnhancedGenerationStatistics,
4236 ) -> SynthResult<EsgSnapshot> {
4237 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4238 debug!("Phase 21: Skipped (ESG generation disabled)");
4239 return Ok(EsgSnapshot::default());
4240 }
4241 info!("Phase 21: Generating ESG Data");
4242
4243 let seed = self.seed;
4244 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4245 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4246 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4247 let entity_id = self
4248 .config
4249 .companies
4250 .first()
4251 .map(|c| c.code.as_str())
4252 .unwrap_or("1000");
4253
4254 let esg_cfg = &self.config.esg;
4255 let mut snapshot = EsgSnapshot::default();
4256
4257 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4259 esg_cfg.environmental.energy.clone(),
4260 seed + 80,
4261 );
4262 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4263
4264 let facility_count = esg_cfg.environmental.energy.facility_count;
4266 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4267 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4268
4269 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4271 seed + 82,
4272 esg_cfg.environmental.waste.diversion_target,
4273 facility_count,
4274 );
4275 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4276
4277 let mut emission_gen =
4279 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4280
4281 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4283 .iter()
4284 .map(|e| datasynth_generators::EnergyInput {
4285 facility_id: e.facility_id.clone(),
4286 energy_type: match e.energy_source {
4287 EnergySourceType::NaturalGas => {
4288 datasynth_generators::EnergyInputType::NaturalGas
4289 }
4290 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4291 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4292 _ => datasynth_generators::EnergyInputType::Electricity,
4293 },
4294 consumption_kwh: e.consumption_kwh,
4295 period: e.period,
4296 })
4297 .collect();
4298
4299 let mut emissions = Vec::new();
4300 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4301 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4302
4303 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4305 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4306 for payment in &document_flows.payments {
4307 if payment.is_vendor {
4308 *totals
4309 .entry(payment.business_partner_id.clone())
4310 .or_default() += payment.amount;
4311 }
4312 }
4313 totals
4314 };
4315 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4316 .master_data
4317 .vendors
4318 .iter()
4319 .map(|v| {
4320 let spend = vendor_payment_totals
4321 .get(&v.vendor_id)
4322 .copied()
4323 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4324 datasynth_generators::VendorSpendInput {
4325 vendor_id: v.vendor_id.clone(),
4326 category: format!("{:?}", v.vendor_type).to_lowercase(),
4327 spend,
4328 country: v.country.clone(),
4329 }
4330 })
4331 .collect();
4332 if !vendor_spend.is_empty() {
4333 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4334 entity_id,
4335 &vendor_spend,
4336 start_date,
4337 end_date,
4338 ));
4339 }
4340
4341 let headcount = self.master_data.employees.len() as u32;
4343 if headcount > 0 {
4344 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4345 emissions.extend(emission_gen.generate_scope3_business_travel(
4346 entity_id,
4347 travel_spend,
4348 start_date,
4349 ));
4350 emissions
4351 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4352 }
4353
4354 snapshot.emission_count = emissions.len();
4355 snapshot.emissions = emissions;
4356 snapshot.energy = energy_records;
4357
4358 let mut workforce_gen =
4360 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4361 let total_headcount = headcount.max(100);
4362 snapshot.diversity =
4363 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4364 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4365 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4366 entity_id,
4367 facility_count,
4368 start_date,
4369 end_date,
4370 );
4371
4372 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4375 entity_id,
4376 &snapshot.safety_incidents,
4377 total_hours,
4378 start_date,
4379 );
4380 snapshot.safety_metrics = vec![safety_metric];
4381
4382 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4384 seed + 85,
4385 esg_cfg.governance.board_size,
4386 esg_cfg.governance.independence_target,
4387 );
4388 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4389
4390 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4392 esg_cfg.supply_chain_esg.clone(),
4393 seed + 86,
4394 );
4395 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4396 .master_data
4397 .vendors
4398 .iter()
4399 .map(|v| datasynth_generators::VendorInput {
4400 vendor_id: v.vendor_id.clone(),
4401 country: v.country.clone(),
4402 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4403 quality_score: None,
4404 })
4405 .collect();
4406 snapshot.supplier_assessments =
4407 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4408
4409 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4411 seed + 87,
4412 esg_cfg.reporting.clone(),
4413 esg_cfg.climate_scenarios.clone(),
4414 );
4415 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4416 snapshot.disclosures = disclosure_gen.generate_disclosures(
4417 entity_id,
4418 &snapshot.materiality,
4419 start_date,
4420 end_date,
4421 );
4422 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4423 snapshot.disclosure_count = snapshot.disclosures.len();
4424
4425 if esg_cfg.anomaly_rate > 0.0 {
4427 let mut anomaly_injector =
4428 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4429 let mut labels = Vec::new();
4430 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4431 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4432 labels.extend(
4433 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4434 );
4435 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4436 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4437 snapshot.anomaly_labels = labels;
4438 }
4439
4440 stats.esg_emission_count = snapshot.emission_count;
4441 stats.esg_disclosure_count = snapshot.disclosure_count;
4442
4443 info!(
4444 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4445 snapshot.emission_count,
4446 snapshot.disclosure_count,
4447 snapshot.supplier_assessments.len()
4448 );
4449 self.check_resources_with_log("post-esg")?;
4450
4451 Ok(snapshot)
4452 }
4453
4454 fn phase_treasury_data(
4456 &mut self,
4457 document_flows: &DocumentFlowSnapshot,
4458 stats: &mut EnhancedGenerationStatistics,
4459 ) -> SynthResult<TreasurySnapshot> {
4460 if !self.config.treasury.enabled {
4461 debug!("Phase 22: Skipped (treasury generation disabled)");
4462 return Ok(TreasurySnapshot::default());
4463 }
4464 info!("Phase 22: Generating Treasury Data");
4465
4466 let seed = self.seed;
4467 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4468 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4469 let currency = self
4470 .config
4471 .companies
4472 .first()
4473 .map(|c| c.currency.as_str())
4474 .unwrap_or("USD");
4475 let entity_id = self
4476 .config
4477 .companies
4478 .first()
4479 .map(|c| c.code.as_str())
4480 .unwrap_or("1000");
4481
4482 let mut snapshot = TreasurySnapshot::default();
4483
4484 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4486 self.config.treasury.debt.clone(),
4487 seed + 90,
4488 );
4489 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4490
4491 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4493 self.config.treasury.hedging.clone(),
4494 seed + 91,
4495 );
4496 for debt in &snapshot.debt_instruments {
4497 if debt.rate_type == InterestRateType::Variable {
4498 let swap = hedge_gen.generate_ir_swap(
4499 currency,
4500 debt.principal,
4501 debt.origination_date,
4502 debt.maturity_date,
4503 );
4504 snapshot.hedging_instruments.push(swap);
4505 }
4506 }
4507
4508 {
4511 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4512 for payment in &document_flows.payments {
4513 if payment.currency != currency {
4514 let entry = fx_map
4515 .entry(payment.currency.clone())
4516 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4517 entry.0 += payment.amount;
4518 if payment.header.document_date > entry.1 {
4520 entry.1 = payment.header.document_date;
4521 }
4522 }
4523 }
4524 if !fx_map.is_empty() {
4525 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4526 .into_iter()
4527 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4528 datasynth_generators::treasury::FxExposure {
4529 currency_pair: format!("{}/{}", foreign_ccy, currency),
4530 foreign_currency: foreign_ccy,
4531 net_amount,
4532 settlement_date,
4533 description: "AP payment FX exposure".to_string(),
4534 }
4535 })
4536 .collect();
4537 let (fx_instruments, fx_relationships) =
4538 hedge_gen.generate(start_date, &fx_exposures);
4539 snapshot.hedging_instruments.extend(fx_instruments);
4540 snapshot.hedge_relationships.extend(fx_relationships);
4541 }
4542 }
4543
4544 if self.config.treasury.anomaly_rate > 0.0 {
4546 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4547 seed + 92,
4548 self.config.treasury.anomaly_rate,
4549 );
4550 let mut labels = Vec::new();
4551 labels.extend(
4552 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4553 );
4554 snapshot.treasury_anomaly_labels = labels;
4555 }
4556
4557 if self.config.treasury.cash_positioning.enabled {
4559 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4560
4561 for payment in &document_flows.payments {
4563 cash_flows.push(datasynth_generators::treasury::CashFlow {
4564 date: payment.header.document_date,
4565 account_id: format!("{}-MAIN", entity_id),
4566 amount: payment.amount,
4567 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4568 });
4569 }
4570
4571 for chain in &document_flows.o2c_chains {
4573 if let Some(ref receipt) = chain.customer_receipt {
4574 cash_flows.push(datasynth_generators::treasury::CashFlow {
4575 date: receipt.header.document_date,
4576 account_id: format!("{}-MAIN", entity_id),
4577 amount: receipt.amount,
4578 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4579 });
4580 }
4581 for receipt in &chain.remainder_receipts {
4583 cash_flows.push(datasynth_generators::treasury::CashFlow {
4584 date: receipt.header.document_date,
4585 account_id: format!("{}-MAIN", entity_id),
4586 amount: receipt.amount,
4587 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4588 });
4589 }
4590 }
4591
4592 if !cash_flows.is_empty() {
4593 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4594 self.config.treasury.cash_positioning.clone(),
4595 seed + 93,
4596 );
4597 let account_id = format!("{}-MAIN", entity_id);
4598 snapshot.cash_positions = cash_gen.generate(
4599 entity_id,
4600 &account_id,
4601 currency,
4602 &cash_flows,
4603 start_date,
4604 start_date + chrono::Months::new(self.config.global.period_months),
4605 rust_decimal::Decimal::new(1_000_000, 0), );
4607 }
4608 }
4609
4610 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
4611 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
4612 stats.cash_position_count = snapshot.cash_positions.len();
4613
4614 info!(
4615 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions",
4616 snapshot.debt_instruments.len(),
4617 snapshot.hedging_instruments.len(),
4618 snapshot.cash_positions.len()
4619 );
4620 self.check_resources_with_log("post-treasury")?;
4621
4622 Ok(snapshot)
4623 }
4624
4625 fn phase_project_accounting(
4627 &mut self,
4628 document_flows: &DocumentFlowSnapshot,
4629 hr: &HrSnapshot,
4630 stats: &mut EnhancedGenerationStatistics,
4631 ) -> SynthResult<ProjectAccountingSnapshot> {
4632 if !self.config.project_accounting.enabled {
4633 debug!("Phase 23: Skipped (project accounting disabled)");
4634 return Ok(ProjectAccountingSnapshot::default());
4635 }
4636 info!("Phase 23: Generating Project Accounting Data");
4637
4638 let seed = self.seed;
4639 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4640 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4641 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4642 let company_code = self
4643 .config
4644 .companies
4645 .first()
4646 .map(|c| c.code.as_str())
4647 .unwrap_or("1000");
4648
4649 let mut snapshot = ProjectAccountingSnapshot::default();
4650
4651 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
4653 self.config.project_accounting.clone(),
4654 seed + 95,
4655 );
4656 let pool = project_gen.generate(company_code, start_date, end_date);
4657 snapshot.projects = pool.projects.clone();
4658
4659 {
4661 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
4662 Vec::new();
4663
4664 for te in &hr.time_entries {
4666 let total_hours = te.hours_regular + te.hours_overtime;
4667 if total_hours > 0.0 {
4668 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4669 id: te.entry_id.clone(),
4670 entity_id: company_code.to_string(),
4671 date: te.date,
4672 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
4673 .unwrap_or(rust_decimal::Decimal::ZERO),
4674 source_type: CostSourceType::TimeEntry,
4675 hours: Some(
4676 rust_decimal::Decimal::from_f64_retain(total_hours)
4677 .unwrap_or(rust_decimal::Decimal::ZERO),
4678 ),
4679 });
4680 }
4681 }
4682
4683 for er in &hr.expense_reports {
4685 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4686 id: er.report_id.clone(),
4687 entity_id: company_code.to_string(),
4688 date: er.submission_date,
4689 amount: er.total_amount,
4690 source_type: CostSourceType::ExpenseReport,
4691 hours: None,
4692 });
4693 }
4694
4695 for po in &document_flows.purchase_orders {
4697 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4698 id: po.header.document_id.clone(),
4699 entity_id: company_code.to_string(),
4700 date: po.header.document_date,
4701 amount: po.total_net_amount,
4702 source_type: CostSourceType::PurchaseOrder,
4703 hours: None,
4704 });
4705 }
4706
4707 for vi in &document_flows.vendor_invoices {
4709 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
4710 id: vi.header.document_id.clone(),
4711 entity_id: company_code.to_string(),
4712 date: vi.header.document_date,
4713 amount: vi.payable_amount,
4714 source_type: CostSourceType::VendorInvoice,
4715 hours: None,
4716 });
4717 }
4718
4719 if !source_docs.is_empty() && !pool.projects.is_empty() {
4720 let mut cost_gen =
4721 datasynth_generators::project_accounting::ProjectCostGenerator::new(
4722 self.config.project_accounting.cost_allocation.clone(),
4723 seed + 99,
4724 );
4725 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
4726 }
4727 }
4728
4729 if self.config.project_accounting.change_orders.enabled {
4731 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
4732 self.config.project_accounting.change_orders.clone(),
4733 seed + 96,
4734 );
4735 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
4736 }
4737
4738 if self.config.project_accounting.milestones.enabled {
4740 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
4741 self.config.project_accounting.milestones.clone(),
4742 seed + 97,
4743 );
4744 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
4745 }
4746
4747 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
4749 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
4750 self.config.project_accounting.earned_value.clone(),
4751 seed + 98,
4752 );
4753 snapshot.earned_value_metrics =
4754 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
4755 }
4756
4757 stats.project_count = snapshot.projects.len();
4758 stats.project_change_order_count = snapshot.change_orders.len();
4759 stats.project_cost_line_count = snapshot.cost_lines.len();
4760
4761 info!(
4762 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
4763 snapshot.projects.len(),
4764 snapshot.change_orders.len(),
4765 snapshot.milestones.len(),
4766 snapshot.earned_value_metrics.len()
4767 );
4768 self.check_resources_with_log("post-project-accounting")?;
4769
4770 Ok(snapshot)
4771 }
4772
4773 fn phase_opening_balances(
4775 &mut self,
4776 coa: &Arc<ChartOfAccounts>,
4777 stats: &mut EnhancedGenerationStatistics,
4778 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
4779 if !self.config.balance.generate_opening_balances {
4780 debug!("Phase 3b: Skipped (opening balance generation disabled)");
4781 return Ok(Vec::new());
4782 }
4783 info!("Phase 3b: Generating Opening Balances");
4784
4785 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4786 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4787 let fiscal_year = start_date.year();
4788
4789 let industry = match self.config.global.industry {
4790 IndustrySector::Manufacturing => IndustryType::Manufacturing,
4791 IndustrySector::Retail => IndustryType::Retail,
4792 IndustrySector::FinancialServices => IndustryType::Financial,
4793 IndustrySector::Healthcare => IndustryType::Healthcare,
4794 IndustrySector::Technology => IndustryType::Technology,
4795 _ => IndustryType::Manufacturing,
4796 };
4797
4798 let config = datasynth_generators::OpeningBalanceConfig {
4799 industry,
4800 ..Default::default()
4801 };
4802 let mut gen =
4803 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
4804
4805 let mut results = Vec::new();
4806 for company in &self.config.companies {
4807 let spec = OpeningBalanceSpec::new(
4808 company.code.clone(),
4809 start_date,
4810 fiscal_year,
4811 company.currency.clone(),
4812 rust_decimal::Decimal::new(10_000_000, 0),
4813 industry,
4814 );
4815 let ob = gen.generate(&spec, coa, start_date, &company.code);
4816 results.push(ob);
4817 }
4818
4819 stats.opening_balance_count = results.len();
4820 info!("Opening balances generated: {} companies", results.len());
4821 self.check_resources_with_log("post-opening-balances")?;
4822
4823 Ok(results)
4824 }
4825
4826 fn phase_subledger_reconciliation(
4828 &mut self,
4829 subledger: &SubledgerSnapshot,
4830 entries: &[JournalEntry],
4831 stats: &mut EnhancedGenerationStatistics,
4832 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
4833 if !self.config.balance.reconcile_subledgers {
4834 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
4835 return Ok(Vec::new());
4836 }
4837 info!("Phase 9b: Reconciling GL to subledger balances");
4838
4839 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4840 .map(|d| d + chrono::Months::new(self.config.global.period_months))
4841 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4842
4843 let tracker_config = BalanceTrackerConfig {
4845 validate_on_each_entry: false,
4846 track_history: false,
4847 fail_on_validation_error: false,
4848 ..Default::default()
4849 };
4850 let recon_currency = self
4851 .config
4852 .companies
4853 .first()
4854 .map(|c| c.currency.clone())
4855 .unwrap_or_else(|| "USD".to_string());
4856 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
4857 let _ = tracker.apply_entries(entries);
4858
4859 let mut engine = datasynth_generators::ReconciliationEngine::new(
4860 datasynth_generators::ReconciliationConfig::default(),
4861 );
4862
4863 let mut results = Vec::new();
4864 let company_code = self
4865 .config
4866 .companies
4867 .first()
4868 .map(|c| c.code.as_str())
4869 .unwrap_or("1000");
4870
4871 if !subledger.ar_invoices.is_empty() {
4873 let gl_balance = tracker
4874 .get_account_balance(
4875 company_code,
4876 datasynth_core::accounts::control_accounts::AR_CONTROL,
4877 )
4878 .map(|b| b.closing_balance)
4879 .unwrap_or_default();
4880 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
4881 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
4882 }
4883
4884 if !subledger.ap_invoices.is_empty() {
4886 let gl_balance = tracker
4887 .get_account_balance(
4888 company_code,
4889 datasynth_core::accounts::control_accounts::AP_CONTROL,
4890 )
4891 .map(|b| b.closing_balance)
4892 .unwrap_or_default();
4893 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
4894 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
4895 }
4896
4897 if !subledger.fa_records.is_empty() {
4899 let gl_asset_balance = tracker
4900 .get_account_balance(
4901 company_code,
4902 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
4903 )
4904 .map(|b| b.closing_balance)
4905 .unwrap_or_default();
4906 let gl_accum_depr_balance = tracker
4907 .get_account_balance(
4908 company_code,
4909 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
4910 )
4911 .map(|b| b.closing_balance)
4912 .unwrap_or_default();
4913 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
4914 subledger.fa_records.iter().collect();
4915 let (asset_recon, depr_recon) = engine.reconcile_fa(
4916 company_code,
4917 end_date,
4918 gl_asset_balance,
4919 gl_accum_depr_balance,
4920 &fa_refs,
4921 );
4922 results.push(asset_recon);
4923 results.push(depr_recon);
4924 }
4925
4926 if !subledger.inventory_positions.is_empty() {
4928 let gl_balance = tracker
4929 .get_account_balance(
4930 company_code,
4931 datasynth_core::accounts::control_accounts::INVENTORY,
4932 )
4933 .map(|b| b.closing_balance)
4934 .unwrap_or_default();
4935 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
4936 subledger.inventory_positions.iter().collect();
4937 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
4938 }
4939
4940 stats.subledger_reconciliation_count = results.len();
4941 info!(
4942 "Subledger reconciliation complete: {} reconciliations",
4943 results.len()
4944 );
4945 self.check_resources_with_log("post-subledger-reconciliation")?;
4946
4947 Ok(results)
4948 }
4949
4950 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
4952 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
4953
4954 let coa_framework = self.resolve_coa_framework();
4955
4956 let mut gen = ChartOfAccountsGenerator::new(
4957 self.config.chart_of_accounts.complexity,
4958 self.config.global.industry,
4959 self.seed,
4960 )
4961 .with_coa_framework(coa_framework);
4962
4963 let coa = Arc::new(gen.generate());
4964 self.coa = Some(Arc::clone(&coa));
4965
4966 if let Some(pb) = pb {
4967 pb.finish_with_message("Chart of Accounts complete");
4968 }
4969
4970 Ok(coa)
4971 }
4972
4973 fn generate_master_data(&mut self) -> SynthResult<()> {
4975 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4976 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
4977 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4978
4979 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
4981
4982 let pack = self.primary_pack().clone();
4984
4985 let vendors_per_company = self.phase_config.vendors_per_company;
4987 let customers_per_company = self.phase_config.customers_per_company;
4988 let materials_per_company = self.phase_config.materials_per_company;
4989 let assets_per_company = self.phase_config.assets_per_company;
4990 let coa_framework = self.resolve_coa_framework();
4991
4992 let per_company_results: Vec<_> = self
4995 .config
4996 .companies
4997 .par_iter()
4998 .enumerate()
4999 .map(|(i, company)| {
5000 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
5001 let pack = pack.clone();
5002
5003 let mut vendor_gen = VendorGenerator::new(company_seed);
5005 vendor_gen.set_country_pack(pack.clone());
5006 vendor_gen.set_coa_framework(coa_framework);
5007 let vendor_pool =
5008 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
5009
5010 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
5012 customer_gen.set_country_pack(pack.clone());
5013 customer_gen.set_coa_framework(coa_framework);
5014 let customer_pool = customer_gen.generate_customer_pool(
5015 customers_per_company,
5016 &company.code,
5017 start_date,
5018 );
5019
5020 let mut material_gen = MaterialGenerator::new(company_seed + 200);
5022 material_gen.set_country_pack(pack);
5023 let material_pool = material_gen.generate_material_pool(
5024 materials_per_company,
5025 &company.code,
5026 start_date,
5027 );
5028
5029 let mut asset_gen = AssetGenerator::new(company_seed + 300);
5031 let asset_pool = asset_gen.generate_asset_pool(
5032 assets_per_company,
5033 &company.code,
5034 (start_date, end_date),
5035 );
5036
5037 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
5039 let employee_pool =
5040 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
5041
5042 (
5043 vendor_pool.vendors,
5044 customer_pool.customers,
5045 material_pool.materials,
5046 asset_pool.assets,
5047 employee_pool.employees,
5048 )
5049 })
5050 .collect();
5051
5052 for (vendors, customers, materials, assets, employees) in per_company_results {
5054 self.master_data.vendors.extend(vendors);
5055 self.master_data.customers.extend(customers);
5056 self.master_data.materials.extend(materials);
5057 self.master_data.assets.extend(assets);
5058 self.master_data.employees.extend(employees);
5059 }
5060
5061 if let Some(pb) = &pb {
5062 pb.inc(total);
5063 }
5064 if let Some(pb) = pb {
5065 pb.finish_with_message("Master data generation complete");
5066 }
5067
5068 Ok(())
5069 }
5070
5071 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
5073 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5074 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5075
5076 let months = (self.config.global.period_months as usize).max(1);
5079 let p2p_count = self
5080 .phase_config
5081 .p2p_chains
5082 .min(self.master_data.vendors.len() * 2 * months);
5083 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
5084
5085 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
5087 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
5088 p2p_gen.set_country_pack(self.primary_pack().clone());
5089
5090 for i in 0..p2p_count {
5091 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
5092 let materials: Vec<&Material> = self
5093 .master_data
5094 .materials
5095 .iter()
5096 .skip(i % self.master_data.materials.len().max(1))
5097 .take(2.min(self.master_data.materials.len()))
5098 .collect();
5099
5100 if materials.is_empty() {
5101 continue;
5102 }
5103
5104 let company = &self.config.companies[i % self.config.companies.len()];
5105 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
5106 let fiscal_period = po_date.month() as u8;
5107 let created_by = if self.master_data.employees.is_empty() {
5108 "SYSTEM"
5109 } else {
5110 self.master_data.employees[i % self.master_data.employees.len()]
5111 .user_id
5112 .as_str()
5113 };
5114
5115 let chain = p2p_gen.generate_chain(
5116 &company.code,
5117 vendor,
5118 &materials,
5119 po_date,
5120 start_date.year() as u16,
5121 fiscal_period,
5122 created_by,
5123 );
5124
5125 flows.purchase_orders.push(chain.purchase_order.clone());
5127 flows.goods_receipts.extend(chain.goods_receipts.clone());
5128 if let Some(vi) = &chain.vendor_invoice {
5129 flows.vendor_invoices.push(vi.clone());
5130 }
5131 if let Some(payment) = &chain.payment {
5132 flows.payments.push(payment.clone());
5133 }
5134 for remainder in &chain.remainder_payments {
5135 flows.payments.push(remainder.clone());
5136 }
5137 flows.p2p_chains.push(chain);
5138
5139 if let Some(pb) = &pb {
5140 pb.inc(1);
5141 }
5142 }
5143
5144 if let Some(pb) = pb {
5145 pb.finish_with_message("P2P document flows complete");
5146 }
5147
5148 let o2c_count = self
5151 .phase_config
5152 .o2c_chains
5153 .min(self.master_data.customers.len() * 2 * months);
5154 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
5155
5156 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
5158 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
5159 o2c_gen.set_country_pack(self.primary_pack().clone());
5160
5161 for i in 0..o2c_count {
5162 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
5163 let materials: Vec<&Material> = self
5164 .master_data
5165 .materials
5166 .iter()
5167 .skip(i % self.master_data.materials.len().max(1))
5168 .take(2.min(self.master_data.materials.len()))
5169 .collect();
5170
5171 if materials.is_empty() {
5172 continue;
5173 }
5174
5175 let company = &self.config.companies[i % self.config.companies.len()];
5176 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
5177 let fiscal_period = so_date.month() as u8;
5178 let created_by = if self.master_data.employees.is_empty() {
5179 "SYSTEM"
5180 } else {
5181 self.master_data.employees[i % self.master_data.employees.len()]
5182 .user_id
5183 .as_str()
5184 };
5185
5186 let chain = o2c_gen.generate_chain(
5187 &company.code,
5188 customer,
5189 &materials,
5190 so_date,
5191 start_date.year() as u16,
5192 fiscal_period,
5193 created_by,
5194 );
5195
5196 flows.sales_orders.push(chain.sales_order.clone());
5198 flows.deliveries.extend(chain.deliveries.clone());
5199 if let Some(ci) = &chain.customer_invoice {
5200 flows.customer_invoices.push(ci.clone());
5201 }
5202 if let Some(receipt) = &chain.customer_receipt {
5203 flows.payments.push(receipt.clone());
5204 }
5205 for receipt in &chain.remainder_receipts {
5207 flows.payments.push(receipt.clone());
5208 }
5209 flows.o2c_chains.push(chain);
5210
5211 if let Some(pb) = &pb {
5212 pb.inc(1);
5213 }
5214 }
5215
5216 if let Some(pb) = pb {
5217 pb.finish_with_message("O2C document flows complete");
5218 }
5219
5220 Ok(())
5221 }
5222
5223 fn generate_journal_entries(
5225 &mut self,
5226 coa: &Arc<ChartOfAccounts>,
5227 ) -> SynthResult<Vec<JournalEntry>> {
5228 use datasynth_core::traits::ParallelGenerator;
5229
5230 let total = self.calculate_total_transactions();
5231 let pb = self.create_progress_bar(total, "Generating Journal Entries");
5232
5233 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5234 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
5235 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5236
5237 let company_codes: Vec<String> = self
5238 .config
5239 .companies
5240 .iter()
5241 .map(|c| c.code.clone())
5242 .collect();
5243
5244 let generator = JournalEntryGenerator::new_with_params(
5245 self.config.transactions.clone(),
5246 Arc::clone(coa),
5247 company_codes,
5248 start_date,
5249 end_date,
5250 self.seed,
5251 );
5252
5253 let je_pack = self.primary_pack();
5257
5258 let mut generator = generator
5259 .with_master_data(
5260 &self.master_data.vendors,
5261 &self.master_data.customers,
5262 &self.master_data.materials,
5263 )
5264 .with_country_pack_names(je_pack)
5265 .with_country_pack_temporal(
5266 self.config.temporal_patterns.clone(),
5267 self.seed + 200,
5268 je_pack,
5269 )
5270 .with_persona_errors(true)
5271 .with_fraud_config(self.config.fraud.clone());
5272
5273 if self.config.temporal.enabled {
5275 let drift_config = self.config.temporal.to_core_config();
5276 generator = generator.with_drift_config(drift_config, self.seed + 100);
5277 }
5278
5279 self.check_memory_limit()?;
5281
5282 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
5284
5285 let entries = if total >= 10_000 && num_threads > 1 {
5289 let sub_generators = generator.split(num_threads);
5292 let entries_per_thread = total as usize / num_threads;
5293 let remainder = total as usize % num_threads;
5294
5295 let batches: Vec<Vec<JournalEntry>> = sub_generators
5296 .into_par_iter()
5297 .enumerate()
5298 .map(|(i, mut gen)| {
5299 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
5300 gen.generate_batch(count)
5301 })
5302 .collect();
5303
5304 let entries = JournalEntryGenerator::merge_results(batches);
5306
5307 if let Some(pb) = &pb {
5308 pb.inc(total);
5309 }
5310 entries
5311 } else {
5312 let mut entries = Vec::with_capacity(total as usize);
5314 for _ in 0..total {
5315 let entry = generator.generate();
5316 entries.push(entry);
5317 if let Some(pb) = &pb {
5318 pb.inc(1);
5319 }
5320 }
5321 entries
5322 };
5323
5324 if let Some(pb) = pb {
5325 pb.finish_with_message("Journal entries complete");
5326 }
5327
5328 Ok(entries)
5329 }
5330
5331 fn generate_jes_from_document_flows(
5336 &mut self,
5337 flows: &DocumentFlowSnapshot,
5338 ) -> SynthResult<Vec<JournalEntry>> {
5339 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
5340 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
5341
5342 let je_config = match self.resolve_coa_framework() {
5343 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
5344 CoAFramework::GermanSkr04 => {
5345 let fa = datasynth_core::FrameworkAccounts::german_gaap();
5346 DocumentFlowJeConfig::from(&fa)
5347 }
5348 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
5349 };
5350
5351 let populate_fec = je_config.populate_fec_fields;
5352 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
5353
5354 if populate_fec {
5358 let mut aux_lookup = std::collections::HashMap::new();
5359 for vendor in &self.master_data.vendors {
5360 if let Some(ref aux) = vendor.auxiliary_gl_account {
5361 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
5362 }
5363 }
5364 for customer in &self.master_data.customers {
5365 if let Some(ref aux) = customer.auxiliary_gl_account {
5366 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
5367 }
5368 }
5369 if !aux_lookup.is_empty() {
5370 generator.set_auxiliary_account_lookup(aux_lookup);
5371 }
5372 }
5373
5374 let mut entries = Vec::new();
5375
5376 for chain in &flows.p2p_chains {
5378 let chain_entries = generator.generate_from_p2p_chain(chain);
5379 entries.extend(chain_entries);
5380 if let Some(pb) = &pb {
5381 pb.inc(1);
5382 }
5383 }
5384
5385 for chain in &flows.o2c_chains {
5387 let chain_entries = generator.generate_from_o2c_chain(chain);
5388 entries.extend(chain_entries);
5389 if let Some(pb) = &pb {
5390 pb.inc(1);
5391 }
5392 }
5393
5394 if let Some(pb) = pb {
5395 pb.finish_with_message(format!(
5396 "Generated {} JEs from document flows",
5397 entries.len()
5398 ));
5399 }
5400
5401 Ok(entries)
5402 }
5403
5404 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
5410 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
5411
5412 let mut jes = Vec::with_capacity(payroll_runs.len());
5413
5414 for run in payroll_runs {
5415 let mut je = JournalEntry::new_simple(
5416 format!("JE-PAYROLL-{}", run.payroll_id),
5417 run.company_code.clone(),
5418 run.run_date,
5419 format!("Payroll {}", run.payroll_id),
5420 );
5421
5422 je.add_line(JournalEntryLine {
5424 line_number: 1,
5425 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
5426 debit_amount: run.total_gross,
5427 reference: Some(run.payroll_id.clone()),
5428 text: Some(format!(
5429 "Payroll {} ({} employees)",
5430 run.payroll_id, run.employee_count
5431 )),
5432 ..Default::default()
5433 });
5434
5435 je.add_line(JournalEntryLine {
5437 line_number: 2,
5438 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
5439 credit_amount: run.total_gross,
5440 reference: Some(run.payroll_id.clone()),
5441 ..Default::default()
5442 });
5443
5444 jes.push(je);
5445 }
5446
5447 jes
5448 }
5449
5450 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
5456 use datasynth_core::accounts::{control_accounts, expense_accounts};
5457 use datasynth_core::models::ProductionOrderStatus;
5458
5459 let mut jes = Vec::new();
5460
5461 for order in production_orders {
5462 if !matches!(
5464 order.status,
5465 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
5466 ) {
5467 continue;
5468 }
5469
5470 let mut je = JournalEntry::new_simple(
5471 format!("JE-MFG-{}", order.order_id),
5472 order.company_code.clone(),
5473 order.actual_end.unwrap_or(order.planned_end),
5474 format!(
5475 "Production Order {} - {}",
5476 order.order_id, order.material_description
5477 ),
5478 );
5479
5480 je.add_line(JournalEntryLine {
5482 line_number: 1,
5483 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
5484 debit_amount: order.actual_cost,
5485 reference: Some(order.order_id.clone()),
5486 text: Some(format!(
5487 "Material consumption for {}",
5488 order.material_description
5489 )),
5490 quantity: Some(order.actual_quantity),
5491 unit: Some("EA".to_string()),
5492 ..Default::default()
5493 });
5494
5495 je.add_line(JournalEntryLine {
5497 line_number: 2,
5498 gl_account: control_accounts::INVENTORY.to_string(),
5499 credit_amount: order.actual_cost,
5500 reference: Some(order.order_id.clone()),
5501 ..Default::default()
5502 });
5503
5504 jes.push(je);
5505 }
5506
5507 jes
5508 }
5509
5510 fn link_document_flows_to_subledgers(
5515 &mut self,
5516 flows: &DocumentFlowSnapshot,
5517 ) -> SynthResult<SubledgerSnapshot> {
5518 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
5519 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
5520
5521 let vendor_names: std::collections::HashMap<String, String> = self
5523 .master_data
5524 .vendors
5525 .iter()
5526 .map(|v| (v.vendor_id.clone(), v.name.clone()))
5527 .collect();
5528 let customer_names: std::collections::HashMap<String, String> = self
5529 .master_data
5530 .customers
5531 .iter()
5532 .map(|c| (c.customer_id.clone(), c.name.clone()))
5533 .collect();
5534
5535 let mut linker = DocumentFlowLinker::new()
5536 .with_vendor_names(vendor_names)
5537 .with_customer_names(customer_names);
5538
5539 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
5541 if let Some(pb) = &pb {
5542 pb.inc(flows.vendor_invoices.len() as u64);
5543 }
5544
5545 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
5547 if let Some(pb) = &pb {
5548 pb.inc(flows.customer_invoices.len() as u64);
5549 }
5550
5551 if let Some(pb) = pb {
5552 pb.finish_with_message(format!(
5553 "Linked {} AP and {} AR invoices",
5554 ap_invoices.len(),
5555 ar_invoices.len()
5556 ));
5557 }
5558
5559 Ok(SubledgerSnapshot {
5560 ap_invoices,
5561 ar_invoices,
5562 fa_records: Vec::new(),
5563 inventory_positions: Vec::new(),
5564 inventory_movements: Vec::new(),
5565 })
5566 }
5567
5568 #[allow(clippy::too_many_arguments)]
5573 fn generate_ocpm_events(
5574 &mut self,
5575 flows: &DocumentFlowSnapshot,
5576 sourcing: &SourcingSnapshot,
5577 hr: &HrSnapshot,
5578 manufacturing: &ManufacturingSnapshot,
5579 banking: &BankingSnapshot,
5580 audit: &AuditSnapshot,
5581 financial_reporting: &FinancialReportingSnapshot,
5582 ) -> SynthResult<OcpmSnapshot> {
5583 let total_chains = flows.p2p_chains.len()
5584 + flows.o2c_chains.len()
5585 + sourcing.sourcing_projects.len()
5586 + hr.payroll_runs.len()
5587 + manufacturing.production_orders.len()
5588 + banking.customers.len()
5589 + audit.engagements.len()
5590 + financial_reporting.bank_reconciliations.len();
5591 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
5592
5593 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
5595 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
5596
5597 let ocpm_config = OcpmGeneratorConfig {
5599 generate_p2p: true,
5600 generate_o2c: true,
5601 generate_s2c: !sourcing.sourcing_projects.is_empty(),
5602 generate_h2r: !hr.payroll_runs.is_empty(),
5603 generate_mfg: !manufacturing.production_orders.is_empty(),
5604 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
5605 generate_bank: !banking.customers.is_empty(),
5606 generate_audit: !audit.engagements.is_empty(),
5607 happy_path_rate: 0.75,
5608 exception_path_rate: 0.20,
5609 error_path_rate: 0.05,
5610 add_duration_variability: true,
5611 duration_std_dev_factor: 0.3,
5612 };
5613 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
5614 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
5615
5616 let available_users: Vec<String> = self
5618 .master_data
5619 .employees
5620 .iter()
5621 .take(20)
5622 .map(|e| e.user_id.clone())
5623 .collect();
5624
5625 let fallback_date =
5627 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
5628 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5629 .unwrap_or(fallback_date);
5630 let base_midnight = base_date
5631 .and_hms_opt(0, 0, 0)
5632 .expect("midnight is always valid");
5633 let base_datetime =
5634 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
5635
5636 let add_result = |event_log: &mut OcpmEventLog,
5638 result: datasynth_ocpm::CaseGenerationResult| {
5639 for event in result.events {
5640 event_log.add_event(event);
5641 }
5642 for object in result.objects {
5643 event_log.add_object(object);
5644 }
5645 for relationship in result.relationships {
5646 event_log.add_relationship(relationship);
5647 }
5648 event_log.add_case(result.case_trace);
5649 };
5650
5651 for chain in &flows.p2p_chains {
5653 let po = &chain.purchase_order;
5654 let documents = P2pDocuments::new(
5655 &po.header.document_id,
5656 &po.vendor_id,
5657 &po.header.company_code,
5658 po.total_net_amount,
5659 &po.header.currency,
5660 &ocpm_uuid_factory,
5661 )
5662 .with_goods_receipt(
5663 chain
5664 .goods_receipts
5665 .first()
5666 .map(|gr| gr.header.document_id.as_str())
5667 .unwrap_or(""),
5668 &ocpm_uuid_factory,
5669 )
5670 .with_invoice(
5671 chain
5672 .vendor_invoice
5673 .as_ref()
5674 .map(|vi| vi.header.document_id.as_str())
5675 .unwrap_or(""),
5676 &ocpm_uuid_factory,
5677 )
5678 .with_payment(
5679 chain
5680 .payment
5681 .as_ref()
5682 .map(|p| p.header.document_id.as_str())
5683 .unwrap_or(""),
5684 &ocpm_uuid_factory,
5685 );
5686
5687 let start_time =
5688 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
5689 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
5690 add_result(&mut event_log, result);
5691
5692 if let Some(pb) = &pb {
5693 pb.inc(1);
5694 }
5695 }
5696
5697 for chain in &flows.o2c_chains {
5699 let so = &chain.sales_order;
5700 let documents = O2cDocuments::new(
5701 &so.header.document_id,
5702 &so.customer_id,
5703 &so.header.company_code,
5704 so.total_net_amount,
5705 &so.header.currency,
5706 &ocpm_uuid_factory,
5707 )
5708 .with_delivery(
5709 chain
5710 .deliveries
5711 .first()
5712 .map(|d| d.header.document_id.as_str())
5713 .unwrap_or(""),
5714 &ocpm_uuid_factory,
5715 )
5716 .with_invoice(
5717 chain
5718 .customer_invoice
5719 .as_ref()
5720 .map(|ci| ci.header.document_id.as_str())
5721 .unwrap_or(""),
5722 &ocpm_uuid_factory,
5723 )
5724 .with_receipt(
5725 chain
5726 .customer_receipt
5727 .as_ref()
5728 .map(|r| r.header.document_id.as_str())
5729 .unwrap_or(""),
5730 &ocpm_uuid_factory,
5731 );
5732
5733 let start_time =
5734 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
5735 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
5736 add_result(&mut event_log, result);
5737
5738 if let Some(pb) = &pb {
5739 pb.inc(1);
5740 }
5741 }
5742
5743 for project in &sourcing.sourcing_projects {
5745 let vendor_id = sourcing
5747 .contracts
5748 .iter()
5749 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5750 .map(|c| c.vendor_id.clone())
5751 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
5752 .or_else(|| {
5753 self.master_data
5754 .vendors
5755 .first()
5756 .map(|v| v.vendor_id.clone())
5757 })
5758 .unwrap_or_else(|| "V000".to_string());
5759 let mut docs = S2cDocuments::new(
5760 &project.project_id,
5761 &vendor_id,
5762 &project.company_code,
5763 project.estimated_annual_spend,
5764 &ocpm_uuid_factory,
5765 );
5766 if let Some(rfx) = sourcing
5768 .rfx_events
5769 .iter()
5770 .find(|r| r.sourcing_project_id == project.project_id)
5771 {
5772 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
5773 if let Some(bid) = sourcing.bids.iter().find(|b| {
5775 b.rfx_id == rfx.rfx_id
5776 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
5777 }) {
5778 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
5779 }
5780 }
5781 if let Some(contract) = sourcing
5783 .contracts
5784 .iter()
5785 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
5786 {
5787 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
5788 }
5789 let start_time = base_datetime - chrono::Duration::days(90);
5790 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
5791 add_result(&mut event_log, result);
5792
5793 if let Some(pb) = &pb {
5794 pb.inc(1);
5795 }
5796 }
5797
5798 for run in &hr.payroll_runs {
5800 let employee_id = hr
5802 .payroll_line_items
5803 .iter()
5804 .find(|li| li.payroll_id == run.payroll_id)
5805 .map(|li| li.employee_id.as_str())
5806 .unwrap_or("EMP000");
5807 let docs = H2rDocuments::new(
5808 &run.payroll_id,
5809 employee_id,
5810 &run.company_code,
5811 run.total_gross,
5812 &ocpm_uuid_factory,
5813 )
5814 .with_time_entries(
5815 hr.time_entries
5816 .iter()
5817 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
5818 .take(5)
5819 .map(|t| t.entry_id.as_str())
5820 .collect(),
5821 );
5822 let start_time = base_datetime - chrono::Duration::days(30);
5823 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
5824 add_result(&mut event_log, result);
5825
5826 if let Some(pb) = &pb {
5827 pb.inc(1);
5828 }
5829 }
5830
5831 for order in &manufacturing.production_orders {
5833 let mut docs = MfgDocuments::new(
5834 &order.order_id,
5835 &order.material_id,
5836 &order.company_code,
5837 order.planned_quantity,
5838 &ocpm_uuid_factory,
5839 )
5840 .with_operations(
5841 order
5842 .operations
5843 .iter()
5844 .map(|o| format!("OP-{:04}", o.operation_number))
5845 .collect::<Vec<_>>()
5846 .iter()
5847 .map(|s| s.as_str())
5848 .collect(),
5849 );
5850 if let Some(insp) = manufacturing
5852 .quality_inspections
5853 .iter()
5854 .find(|i| i.reference_id == order.order_id)
5855 {
5856 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
5857 }
5858 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
5860 cc.items
5861 .iter()
5862 .any(|item| item.material_id == order.material_id)
5863 }) {
5864 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
5865 }
5866 let start_time = base_datetime - chrono::Duration::days(60);
5867 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
5868 add_result(&mut event_log, result);
5869
5870 if let Some(pb) = &pb {
5871 pb.inc(1);
5872 }
5873 }
5874
5875 for customer in &banking.customers {
5877 let customer_id_str = customer.customer_id.to_string();
5878 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
5879 if let Some(account) = banking
5881 .accounts
5882 .iter()
5883 .find(|a| a.primary_owner_id == customer.customer_id)
5884 {
5885 let account_id_str = account.account_id.to_string();
5886 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
5887 let txn_strs: Vec<String> = banking
5889 .transactions
5890 .iter()
5891 .filter(|t| t.account_id == account.account_id)
5892 .take(10)
5893 .map(|t| t.transaction_id.to_string())
5894 .collect();
5895 let txn_ids: Vec<&str> = txn_strs.iter().map(|s| s.as_str()).collect();
5896 let txn_amounts: Vec<rust_decimal::Decimal> = banking
5897 .transactions
5898 .iter()
5899 .filter(|t| t.account_id == account.account_id)
5900 .take(10)
5901 .map(|t| t.amount)
5902 .collect();
5903 if !txn_ids.is_empty() {
5904 docs = docs.with_transactions(txn_ids, txn_amounts);
5905 }
5906 }
5907 let start_time = base_datetime - chrono::Duration::days(180);
5908 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
5909 add_result(&mut event_log, result);
5910
5911 if let Some(pb) = &pb {
5912 pb.inc(1);
5913 }
5914 }
5915
5916 for engagement in &audit.engagements {
5918 let engagement_id_str = engagement.engagement_id.to_string();
5919 let docs = AuditDocuments::new(
5920 &engagement_id_str,
5921 &engagement.client_entity_id,
5922 &ocpm_uuid_factory,
5923 )
5924 .with_workpapers(
5925 audit
5926 .workpapers
5927 .iter()
5928 .filter(|w| w.engagement_id == engagement.engagement_id)
5929 .take(10)
5930 .map(|w| w.workpaper_id.to_string())
5931 .collect::<Vec<_>>()
5932 .iter()
5933 .map(|s| s.as_str())
5934 .collect(),
5935 )
5936 .with_evidence(
5937 audit
5938 .evidence
5939 .iter()
5940 .filter(|e| e.engagement_id == engagement.engagement_id)
5941 .take(10)
5942 .map(|e| e.evidence_id.to_string())
5943 .collect::<Vec<_>>()
5944 .iter()
5945 .map(|s| s.as_str())
5946 .collect(),
5947 )
5948 .with_risks(
5949 audit
5950 .risk_assessments
5951 .iter()
5952 .filter(|r| r.engagement_id == engagement.engagement_id)
5953 .take(5)
5954 .map(|r| r.risk_id.to_string())
5955 .collect::<Vec<_>>()
5956 .iter()
5957 .map(|s| s.as_str())
5958 .collect(),
5959 )
5960 .with_findings(
5961 audit
5962 .findings
5963 .iter()
5964 .filter(|f| f.engagement_id == engagement.engagement_id)
5965 .take(5)
5966 .map(|f| f.finding_id.to_string())
5967 .collect::<Vec<_>>()
5968 .iter()
5969 .map(|s| s.as_str())
5970 .collect(),
5971 )
5972 .with_judgments(
5973 audit
5974 .judgments
5975 .iter()
5976 .filter(|j| j.engagement_id == engagement.engagement_id)
5977 .take(5)
5978 .map(|j| j.judgment_id.to_string())
5979 .collect::<Vec<_>>()
5980 .iter()
5981 .map(|s| s.as_str())
5982 .collect(),
5983 );
5984 let start_time = base_datetime - chrono::Duration::days(120);
5985 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
5986 add_result(&mut event_log, result);
5987
5988 if let Some(pb) = &pb {
5989 pb.inc(1);
5990 }
5991 }
5992
5993 for recon in &financial_reporting.bank_reconciliations {
5995 let docs = BankReconDocuments::new(
5996 &recon.reconciliation_id,
5997 &recon.bank_account_id,
5998 &recon.company_code,
5999 recon.bank_ending_balance,
6000 &ocpm_uuid_factory,
6001 )
6002 .with_statement_lines(
6003 recon
6004 .statement_lines
6005 .iter()
6006 .take(20)
6007 .map(|l| l.line_id.as_str())
6008 .collect(),
6009 )
6010 .with_reconciling_items(
6011 recon
6012 .reconciling_items
6013 .iter()
6014 .take(10)
6015 .map(|i| i.item_id.as_str())
6016 .collect(),
6017 );
6018 let start_time = base_datetime - chrono::Duration::days(30);
6019 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
6020 add_result(&mut event_log, result);
6021
6022 if let Some(pb) = &pb {
6023 pb.inc(1);
6024 }
6025 }
6026
6027 event_log.compute_variants();
6029
6030 let summary = event_log.summary();
6031
6032 if let Some(pb) = pb {
6033 pb.finish_with_message(format!(
6034 "Generated {} OCPM events, {} objects",
6035 summary.event_count, summary.object_count
6036 ));
6037 }
6038
6039 Ok(OcpmSnapshot {
6040 event_count: summary.event_count,
6041 object_count: summary.object_count,
6042 case_count: summary.case_count,
6043 event_log: Some(event_log),
6044 })
6045 }
6046
6047 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
6049 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
6050
6051 let total_rate = if self.config.anomaly_injection.enabled {
6054 self.config.anomaly_injection.rates.total_rate
6055 } else if self.config.fraud.enabled {
6056 self.config.fraud.fraud_rate
6057 } else {
6058 0.02
6059 };
6060
6061 let fraud_rate = if self.config.anomaly_injection.enabled {
6062 self.config.anomaly_injection.rates.fraud_rate
6063 } else {
6064 AnomalyRateConfig::default().fraud_rate
6065 };
6066
6067 let error_rate = if self.config.anomaly_injection.enabled {
6068 self.config.anomaly_injection.rates.error_rate
6069 } else {
6070 AnomalyRateConfig::default().error_rate
6071 };
6072
6073 let process_issue_rate = if self.config.anomaly_injection.enabled {
6074 self.config.anomaly_injection.rates.process_rate
6075 } else {
6076 AnomalyRateConfig::default().process_issue_rate
6077 };
6078
6079 let anomaly_config = AnomalyInjectorConfig {
6080 rates: AnomalyRateConfig {
6081 total_rate,
6082 fraud_rate,
6083 error_rate,
6084 process_issue_rate,
6085 ..Default::default()
6086 },
6087 seed: self.seed + 5000,
6088 ..Default::default()
6089 };
6090
6091 let mut injector = AnomalyInjector::new(anomaly_config);
6092 let result = injector.process_entries(entries);
6093
6094 if let Some(pb) = &pb {
6095 pb.inc(entries.len() as u64);
6096 pb.finish_with_message("Anomaly injection complete");
6097 }
6098
6099 let mut by_type = HashMap::new();
6100 for label in &result.labels {
6101 *by_type
6102 .entry(format!("{:?}", label.anomaly_type))
6103 .or_insert(0) += 1;
6104 }
6105
6106 Ok(AnomalyLabels {
6107 labels: result.labels,
6108 summary: Some(result.summary),
6109 by_type,
6110 })
6111 }
6112
6113 fn validate_journal_entries(
6122 &mut self,
6123 entries: &[JournalEntry],
6124 ) -> SynthResult<BalanceValidationResult> {
6125 let clean_entries: Vec<&JournalEntry> = entries
6127 .iter()
6128 .filter(|e| {
6129 e.header
6130 .header_text
6131 .as_ref()
6132 .map(|t| !t.contains("[HUMAN_ERROR:"))
6133 .unwrap_or(true)
6134 })
6135 .collect();
6136
6137 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
6138
6139 let config = BalanceTrackerConfig {
6141 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
6145 };
6146 let validation_currency = self
6147 .config
6148 .companies
6149 .first()
6150 .map(|c| c.currency.clone())
6151 .unwrap_or_else(|| "USD".to_string());
6152
6153 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
6154
6155 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
6157 let errors = tracker.apply_entries(&clean_refs);
6158
6159 if let Some(pb) = &pb {
6160 pb.inc(entries.len() as u64);
6161 }
6162
6163 let has_unbalanced = tracker
6166 .get_validation_errors()
6167 .iter()
6168 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
6169
6170 let mut all_errors = errors;
6173 all_errors.extend(tracker.get_validation_errors().iter().cloned());
6174 let company_codes: Vec<String> = self
6175 .config
6176 .companies
6177 .iter()
6178 .map(|c| c.code.clone())
6179 .collect();
6180
6181 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6182 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6183 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6184
6185 for company_code in &company_codes {
6186 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
6187 all_errors.push(e);
6188 }
6189 }
6190
6191 let stats = tracker.get_statistics();
6193
6194 let is_balanced = all_errors.is_empty();
6196
6197 if let Some(pb) = pb {
6198 let msg = if is_balanced {
6199 "Balance validation passed"
6200 } else {
6201 "Balance validation completed with errors"
6202 };
6203 pb.finish_with_message(msg);
6204 }
6205
6206 Ok(BalanceValidationResult {
6207 validated: true,
6208 is_balanced,
6209 entries_processed: stats.entries_processed,
6210 total_debits: stats.total_debits,
6211 total_credits: stats.total_credits,
6212 accounts_tracked: stats.accounts_tracked,
6213 companies_tracked: stats.companies_tracked,
6214 validation_errors: all_errors,
6215 has_unbalanced_entries: has_unbalanced,
6216 })
6217 }
6218
6219 fn inject_data_quality(
6224 &mut self,
6225 entries: &mut [JournalEntry],
6226 ) -> SynthResult<DataQualityStats> {
6227 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
6228
6229 let config = if self.config.data_quality.enabled {
6232 let dq = &self.config.data_quality;
6233 DataQualityConfig {
6234 enable_missing_values: dq.missing_values.enabled,
6235 missing_values: datasynth_generators::MissingValueConfig {
6236 global_rate: dq.effective_missing_rate(),
6237 ..Default::default()
6238 },
6239 enable_format_variations: dq.format_variations.enabled,
6240 format_variations: datasynth_generators::FormatVariationConfig {
6241 date_variation_rate: dq.format_variations.dates.rate,
6242 amount_variation_rate: dq.format_variations.amounts.rate,
6243 identifier_variation_rate: dq.format_variations.identifiers.rate,
6244 ..Default::default()
6245 },
6246 enable_duplicates: dq.duplicates.enabled,
6247 duplicates: datasynth_generators::DuplicateConfig {
6248 duplicate_rate: dq.effective_duplicate_rate(),
6249 ..Default::default()
6250 },
6251 enable_typos: dq.typos.enabled,
6252 typos: datasynth_generators::TypoConfig {
6253 char_error_rate: dq.effective_typo_rate(),
6254 ..Default::default()
6255 },
6256 enable_encoding_issues: dq.encoding_issues.enabled,
6257 encoding_issue_rate: dq.encoding_issues.rate,
6258 seed: self.seed.wrapping_add(77), track_statistics: true,
6260 }
6261 } else {
6262 DataQualityConfig::minimal()
6263 };
6264 let mut injector = DataQualityInjector::new(config);
6265
6266 injector.set_country_pack(self.primary_pack().clone());
6268
6269 let context = HashMap::new();
6271
6272 for entry in entries.iter_mut() {
6273 if let Some(text) = &entry.header.header_text {
6275 let processed = injector.process_text_field(
6276 "header_text",
6277 text,
6278 &entry.header.document_id.to_string(),
6279 &context,
6280 );
6281 match processed {
6282 Some(new_text) if new_text != *text => {
6283 entry.header.header_text = Some(new_text);
6284 }
6285 None => {
6286 entry.header.header_text = None; }
6288 _ => {}
6289 }
6290 }
6291
6292 if let Some(ref_text) = &entry.header.reference {
6294 let processed = injector.process_text_field(
6295 "reference",
6296 ref_text,
6297 &entry.header.document_id.to_string(),
6298 &context,
6299 );
6300 match processed {
6301 Some(new_text) if new_text != *ref_text => {
6302 entry.header.reference = Some(new_text);
6303 }
6304 None => {
6305 entry.header.reference = None;
6306 }
6307 _ => {}
6308 }
6309 }
6310
6311 let user_persona = entry.header.user_persona.clone();
6313 if let Some(processed) = injector.process_text_field(
6314 "user_persona",
6315 &user_persona,
6316 &entry.header.document_id.to_string(),
6317 &context,
6318 ) {
6319 if processed != user_persona {
6320 entry.header.user_persona = processed;
6321 }
6322 }
6323
6324 for line in &mut entry.lines {
6326 if let Some(ref text) = line.line_text {
6328 let processed = injector.process_text_field(
6329 "line_text",
6330 text,
6331 &entry.header.document_id.to_string(),
6332 &context,
6333 );
6334 match processed {
6335 Some(new_text) if new_text != *text => {
6336 line.line_text = Some(new_text);
6337 }
6338 None => {
6339 line.line_text = None;
6340 }
6341 _ => {}
6342 }
6343 }
6344
6345 if let Some(cc) = &line.cost_center {
6347 let processed = injector.process_text_field(
6348 "cost_center",
6349 cc,
6350 &entry.header.document_id.to_string(),
6351 &context,
6352 );
6353 match processed {
6354 Some(new_cc) if new_cc != *cc => {
6355 line.cost_center = Some(new_cc);
6356 }
6357 None => {
6358 line.cost_center = None;
6359 }
6360 _ => {}
6361 }
6362 }
6363 }
6364
6365 if let Some(pb) = &pb {
6366 pb.inc(1);
6367 }
6368 }
6369
6370 if let Some(pb) = pb {
6371 pb.finish_with_message("Data quality injection complete");
6372 }
6373
6374 Ok(injector.stats().clone())
6375 }
6376
6377 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
6388 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6389 .map_err(|e| SynthError::config(format!("Invalid start_date: {}", e)))?;
6390 let fiscal_year = start_date.year() as u16;
6391 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
6392
6393 let total_revenue: rust_decimal::Decimal = entries
6395 .iter()
6396 .flat_map(|e| e.lines.iter())
6397 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
6398 .map(|l| l.credit_amount)
6399 .sum();
6400
6401 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
6403
6404 let mut snapshot = AuditSnapshot::default();
6405
6406 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
6408 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
6409 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
6410 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
6411 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
6412 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
6413
6414 let accounts: Vec<String> = self
6416 .coa
6417 .as_ref()
6418 .map(|coa| {
6419 coa.get_postable_accounts()
6420 .iter()
6421 .map(|acc| acc.account_code().to_string())
6422 .collect()
6423 })
6424 .unwrap_or_default();
6425
6426 for (i, company) in self.config.companies.iter().enumerate() {
6428 let company_revenue = total_revenue
6430 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
6431
6432 let engagements_for_company =
6434 self.phase_config.audit_engagements / self.config.companies.len().max(1);
6435 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
6436 1
6437 } else {
6438 0
6439 };
6440
6441 for _eng_idx in 0..(engagements_for_company + extra) {
6442 let mut engagement = engagement_gen.generate_engagement(
6444 &company.code,
6445 &company.name,
6446 fiscal_year,
6447 period_end,
6448 company_revenue,
6449 None, );
6451
6452 if !self.master_data.employees.is_empty() {
6454 let emp_count = self.master_data.employees.len();
6455 let base = (i * 10 + _eng_idx) % emp_count;
6457 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
6458 .employee_id
6459 .clone();
6460 engagement.engagement_manager_id = self.master_data.employees
6461 [(base + 1) % emp_count]
6462 .employee_id
6463 .clone();
6464 let real_team: Vec<String> = engagement
6465 .team_member_ids
6466 .iter()
6467 .enumerate()
6468 .map(|(j, _)| {
6469 self.master_data.employees[(base + 2 + j) % emp_count]
6470 .employee_id
6471 .clone()
6472 })
6473 .collect();
6474 engagement.team_member_ids = real_team;
6475 }
6476
6477 if let Some(pb) = &pb {
6478 pb.inc(1);
6479 }
6480
6481 let team_members: Vec<String> = engagement.team_member_ids.clone();
6483
6484 let workpapers =
6486 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
6487
6488 for wp in &workpapers {
6489 if let Some(pb) = &pb {
6490 pb.inc(1);
6491 }
6492
6493 let evidence = evidence_gen.generate_evidence_for_workpaper(
6495 wp,
6496 &team_members,
6497 wp.preparer_date,
6498 );
6499
6500 for _ in &evidence {
6501 if let Some(pb) = &pb {
6502 pb.inc(1);
6503 }
6504 }
6505
6506 snapshot.evidence.extend(evidence);
6507 }
6508
6509 let risks =
6511 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
6512
6513 for _ in &risks {
6514 if let Some(pb) = &pb {
6515 pb.inc(1);
6516 }
6517 }
6518 snapshot.risk_assessments.extend(risks);
6519
6520 let findings = finding_gen.generate_findings_for_engagement(
6522 &engagement,
6523 &workpapers,
6524 &team_members,
6525 );
6526
6527 for _ in &findings {
6528 if let Some(pb) = &pb {
6529 pb.inc(1);
6530 }
6531 }
6532 snapshot.findings.extend(findings);
6533
6534 let judgments =
6536 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
6537
6538 for _ in &judgments {
6539 if let Some(pb) = &pb {
6540 pb.inc(1);
6541 }
6542 }
6543 snapshot.judgments.extend(judgments);
6544
6545 snapshot.workpapers.extend(workpapers);
6547 snapshot.engagements.push(engagement);
6548 }
6549 }
6550
6551 if let Some(pb) = pb {
6552 pb.finish_with_message(format!(
6553 "Audit data: {} engagements, {} workpapers, {} evidence",
6554 snapshot.engagements.len(),
6555 snapshot.workpapers.len(),
6556 snapshot.evidence.len()
6557 ));
6558 }
6559
6560 Ok(snapshot)
6561 }
6562
6563 fn export_graphs(
6570 &mut self,
6571 entries: &[JournalEntry],
6572 _coa: &Arc<ChartOfAccounts>,
6573 stats: &mut EnhancedGenerationStatistics,
6574 ) -> SynthResult<GraphExportSnapshot> {
6575 let pb = self.create_progress_bar(100, "Exporting Graphs");
6576
6577 let mut snapshot = GraphExportSnapshot::default();
6578
6579 let output_dir = self
6581 .output_path
6582 .clone()
6583 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6584 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6585
6586 for graph_type in &self.config.graph_export.graph_types {
6588 if let Some(pb) = &pb {
6589 pb.inc(10);
6590 }
6591
6592 let graph_config = TransactionGraphConfig {
6594 include_vendors: false,
6595 include_customers: false,
6596 create_debit_credit_edges: true,
6597 include_document_nodes: graph_type.include_document_nodes,
6598 min_edge_weight: graph_type.min_edge_weight,
6599 aggregate_parallel_edges: graph_type.aggregate_edges,
6600 framework: None,
6601 };
6602
6603 let mut builder = TransactionGraphBuilder::new(graph_config);
6604 builder.add_journal_entries(entries);
6605 let graph = builder.build();
6606
6607 stats.graph_node_count += graph.node_count();
6609 stats.graph_edge_count += graph.edge_count();
6610
6611 if let Some(pb) = &pb {
6612 pb.inc(40);
6613 }
6614
6615 for format in &self.config.graph_export.formats {
6617 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
6618
6619 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6621 warn!("Failed to create graph output directory: {}", e);
6622 continue;
6623 }
6624
6625 match format {
6626 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
6627 let pyg_config = PyGExportConfig {
6628 common: datasynth_graph::CommonExportConfig {
6629 export_node_features: true,
6630 export_edge_features: true,
6631 export_node_labels: true,
6632 export_edge_labels: true,
6633 export_masks: true,
6634 train_ratio: self.config.graph_export.train_ratio,
6635 val_ratio: self.config.graph_export.validation_ratio,
6636 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6637 },
6638 one_hot_categoricals: false,
6639 };
6640
6641 let exporter = PyGExporter::new(pyg_config);
6642 match exporter.export(&graph, &format_dir) {
6643 Ok(metadata) => {
6644 snapshot.exports.insert(
6645 format!("{}_{}", graph_type.name, "pytorch_geometric"),
6646 GraphExportInfo {
6647 name: graph_type.name.clone(),
6648 format: "pytorch_geometric".to_string(),
6649 output_path: format_dir.clone(),
6650 node_count: metadata.num_nodes,
6651 edge_count: metadata.num_edges,
6652 },
6653 );
6654 snapshot.graph_count += 1;
6655 }
6656 Err(e) => {
6657 warn!("Failed to export PyTorch Geometric graph: {}", e);
6658 }
6659 }
6660 }
6661 datasynth_config::schema::GraphExportFormat::Neo4j => {
6662 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
6663
6664 let neo4j_config = Neo4jExportConfig {
6665 export_node_properties: true,
6666 export_edge_properties: true,
6667 export_features: true,
6668 generate_cypher: true,
6669 generate_admin_import: true,
6670 database_name: "synth".to_string(),
6671 cypher_batch_size: 1000,
6672 };
6673
6674 let exporter = Neo4jExporter::new(neo4j_config);
6675 match exporter.export(&graph, &format_dir) {
6676 Ok(metadata) => {
6677 snapshot.exports.insert(
6678 format!("{}_{}", graph_type.name, "neo4j"),
6679 GraphExportInfo {
6680 name: graph_type.name.clone(),
6681 format: "neo4j".to_string(),
6682 output_path: format_dir.clone(),
6683 node_count: metadata.num_nodes,
6684 edge_count: metadata.num_edges,
6685 },
6686 );
6687 snapshot.graph_count += 1;
6688 }
6689 Err(e) => {
6690 warn!("Failed to export Neo4j graph: {}", e);
6691 }
6692 }
6693 }
6694 datasynth_config::schema::GraphExportFormat::Dgl => {
6695 use datasynth_graph::{DGLExportConfig, DGLExporter};
6696
6697 let dgl_config = DGLExportConfig {
6698 common: datasynth_graph::CommonExportConfig {
6699 export_node_features: true,
6700 export_edge_features: true,
6701 export_node_labels: true,
6702 export_edge_labels: true,
6703 export_masks: true,
6704 train_ratio: self.config.graph_export.train_ratio,
6705 val_ratio: self.config.graph_export.validation_ratio,
6706 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
6707 },
6708 heterogeneous: false,
6709 include_pickle_script: true, };
6711
6712 let exporter = DGLExporter::new(dgl_config);
6713 match exporter.export(&graph, &format_dir) {
6714 Ok(metadata) => {
6715 snapshot.exports.insert(
6716 format!("{}_{}", graph_type.name, "dgl"),
6717 GraphExportInfo {
6718 name: graph_type.name.clone(),
6719 format: "dgl".to_string(),
6720 output_path: format_dir.clone(),
6721 node_count: metadata.common.num_nodes,
6722 edge_count: metadata.common.num_edges,
6723 },
6724 );
6725 snapshot.graph_count += 1;
6726 }
6727 Err(e) => {
6728 warn!("Failed to export DGL graph: {}", e);
6729 }
6730 }
6731 }
6732 datasynth_config::schema::GraphExportFormat::RustGraph => {
6733 use datasynth_graph::{
6734 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
6735 };
6736
6737 let rustgraph_config = RustGraphExportConfig {
6738 include_features: true,
6739 include_temporal: true,
6740 include_labels: true,
6741 source_name: "datasynth".to_string(),
6742 batch_id: None,
6743 output_format: RustGraphOutputFormat::JsonLines,
6744 export_node_properties: true,
6745 export_edge_properties: true,
6746 pretty_print: false,
6747 };
6748
6749 let exporter = RustGraphExporter::new(rustgraph_config);
6750 match exporter.export(&graph, &format_dir) {
6751 Ok(metadata) => {
6752 snapshot.exports.insert(
6753 format!("{}_{}", graph_type.name, "rustgraph"),
6754 GraphExportInfo {
6755 name: graph_type.name.clone(),
6756 format: "rustgraph".to_string(),
6757 output_path: format_dir.clone(),
6758 node_count: metadata.num_nodes,
6759 edge_count: metadata.num_edges,
6760 },
6761 );
6762 snapshot.graph_count += 1;
6763 }
6764 Err(e) => {
6765 warn!("Failed to export RustGraph: {}", e);
6766 }
6767 }
6768 }
6769 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
6770 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
6772 }
6773 }
6774 }
6775
6776 if let Some(pb) = &pb {
6777 pb.inc(40);
6778 }
6779 }
6780
6781 stats.graph_export_count = snapshot.graph_count;
6782 snapshot.exported = snapshot.graph_count > 0;
6783
6784 if let Some(pb) = pb {
6785 pb.finish_with_message(format!(
6786 "Graphs exported: {} graphs ({} nodes, {} edges)",
6787 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
6788 ));
6789 }
6790
6791 Ok(snapshot)
6792 }
6793
6794 fn build_additional_graphs(
6799 &self,
6800 banking: &BankingSnapshot,
6801 _intercompany: &IntercompanySnapshot,
6802 entries: &[JournalEntry],
6803 stats: &mut EnhancedGenerationStatistics,
6804 ) {
6805 let output_dir = self
6806 .output_path
6807 .clone()
6808 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
6809 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
6810
6811 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
6813 info!("Phase 10c: Building banking network graph");
6814 let config = BankingGraphConfig::default();
6815 let mut builder = BankingGraphBuilder::new(config);
6816 builder.add_customers(&banking.customers);
6817 builder.add_accounts(&banking.accounts, &banking.customers);
6818 builder.add_transactions(&banking.transactions);
6819 let graph = builder.build();
6820
6821 let node_count = graph.node_count();
6822 let edge_count = graph.edge_count();
6823 stats.graph_node_count += node_count;
6824 stats.graph_edge_count += edge_count;
6825
6826 for format in &self.config.graph_export.formats {
6828 if matches!(
6829 format,
6830 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6831 ) {
6832 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
6833 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6834 warn!("Failed to create banking graph output dir: {}", e);
6835 continue;
6836 }
6837 let pyg_config = PyGExportConfig::default();
6838 let exporter = PyGExporter::new(pyg_config);
6839 if let Err(e) = exporter.export(&graph, &format_dir) {
6840 warn!("Failed to export banking graph as PyG: {}", e);
6841 } else {
6842 info!(
6843 "Banking network graph exported: {} nodes, {} edges",
6844 node_count, edge_count
6845 );
6846 }
6847 }
6848 }
6849 }
6850
6851 let approval_entries: Vec<_> = entries
6853 .iter()
6854 .filter(|je| je.header.approval_workflow.is_some())
6855 .collect();
6856
6857 if !approval_entries.is_empty() {
6858 info!(
6859 "Phase 10c: Building approval network graph ({} entries with approvals)",
6860 approval_entries.len()
6861 );
6862 let config = ApprovalGraphConfig::default();
6863 let mut builder = ApprovalGraphBuilder::new(config);
6864
6865 for je in &approval_entries {
6866 if let Some(ref wf) = je.header.approval_workflow {
6867 for action in &wf.actions {
6868 let record = datasynth_core::models::ApprovalRecord {
6869 approval_id: format!(
6870 "APR-{}-{}",
6871 je.header.document_id, action.approval_level
6872 ),
6873 document_number: je.header.document_id.to_string(),
6874 document_type: "JE".to_string(),
6875 company_code: je.company_code().to_string(),
6876 requester_id: wf.preparer_id.clone(),
6877 requester_name: Some(wf.preparer_name.clone()),
6878 approver_id: action.actor_id.clone(),
6879 approver_name: action.actor_name.clone(),
6880 approval_date: je.posting_date(),
6881 action: format!("{:?}", action.action),
6882 amount: wf.amount,
6883 approval_limit: None,
6884 comments: action.comments.clone(),
6885 delegation_from: None,
6886 is_auto_approved: false,
6887 };
6888 builder.add_approval(&record);
6889 }
6890 }
6891 }
6892
6893 let graph = builder.build();
6894 let node_count = graph.node_count();
6895 let edge_count = graph.edge_count();
6896 stats.graph_node_count += node_count;
6897 stats.graph_edge_count += edge_count;
6898
6899 for format in &self.config.graph_export.formats {
6901 if matches!(
6902 format,
6903 datasynth_config::schema::GraphExportFormat::PytorchGeometric
6904 ) {
6905 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
6906 if let Err(e) = std::fs::create_dir_all(&format_dir) {
6907 warn!("Failed to create approval graph output dir: {}", e);
6908 continue;
6909 }
6910 let pyg_config = PyGExportConfig::default();
6911 let exporter = PyGExporter::new(pyg_config);
6912 if let Err(e) = exporter.export(&graph, &format_dir) {
6913 warn!("Failed to export approval graph as PyG: {}", e);
6914 } else {
6915 info!(
6916 "Approval network graph exported: {} nodes, {} edges",
6917 node_count, edge_count
6918 );
6919 }
6920 }
6921 }
6922 }
6923
6924 debug!(
6927 "EntityGraphBuilder: skipped (requires Company→CompanyConfig mapping; \
6928 available when intercompany relationships are modeled as IntercompanyRelationship)"
6929 );
6930 }
6931
6932 #[allow(clippy::too_many_arguments)]
6939 fn export_hypergraph(
6940 &self,
6941 coa: &Arc<ChartOfAccounts>,
6942 entries: &[JournalEntry],
6943 document_flows: &DocumentFlowSnapshot,
6944 sourcing: &SourcingSnapshot,
6945 hr: &HrSnapshot,
6946 manufacturing: &ManufacturingSnapshot,
6947 banking: &BankingSnapshot,
6948 audit: &AuditSnapshot,
6949 financial_reporting: &FinancialReportingSnapshot,
6950 ocpm: &OcpmSnapshot,
6951 stats: &mut EnhancedGenerationStatistics,
6952 ) -> SynthResult<HypergraphExportInfo> {
6953 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
6954 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
6955 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
6956 use datasynth_graph::models::hypergraph::AggregationStrategy;
6957
6958 let hg_settings = &self.config.graph_export.hypergraph;
6959
6960 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
6962 "truncate" => AggregationStrategy::Truncate,
6963 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
6964 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
6965 "importance_sample" => AggregationStrategy::ImportanceSample,
6966 _ => AggregationStrategy::PoolByCounterparty,
6967 };
6968
6969 let builder_config = HypergraphConfig {
6970 max_nodes: hg_settings.max_nodes,
6971 aggregation_strategy,
6972 include_coso: hg_settings.governance_layer.include_coso,
6973 include_controls: hg_settings.governance_layer.include_controls,
6974 include_sox: hg_settings.governance_layer.include_sox,
6975 include_vendors: hg_settings.governance_layer.include_vendors,
6976 include_customers: hg_settings.governance_layer.include_customers,
6977 include_employees: hg_settings.governance_layer.include_employees,
6978 include_p2p: hg_settings.process_layer.include_p2p,
6979 include_o2c: hg_settings.process_layer.include_o2c,
6980 include_s2c: hg_settings.process_layer.include_s2c,
6981 include_h2r: hg_settings.process_layer.include_h2r,
6982 include_mfg: hg_settings.process_layer.include_mfg,
6983 include_bank: hg_settings.process_layer.include_bank,
6984 include_audit: hg_settings.process_layer.include_audit,
6985 include_r2r: hg_settings.process_layer.include_r2r,
6986 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
6987 docs_per_counterparty_threshold: hg_settings
6988 .process_layer
6989 .docs_per_counterparty_threshold,
6990 include_accounts: hg_settings.accounting_layer.include_accounts,
6991 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
6992 include_cross_layer_edges: hg_settings.cross_layer.enabled,
6993 };
6994
6995 let mut builder = HypergraphBuilder::new(builder_config);
6996
6997 builder.add_coso_framework();
6999
7000 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
7003 let controls = InternalControl::standard_controls();
7004 builder.add_controls(&controls);
7005 }
7006
7007 builder.add_vendors(&self.master_data.vendors);
7009 builder.add_customers(&self.master_data.customers);
7010 builder.add_employees(&self.master_data.employees);
7011
7012 builder.add_p2p_documents(
7014 &document_flows.purchase_orders,
7015 &document_flows.goods_receipts,
7016 &document_flows.vendor_invoices,
7017 &document_flows.payments,
7018 );
7019 builder.add_o2c_documents(
7020 &document_flows.sales_orders,
7021 &document_flows.deliveries,
7022 &document_flows.customer_invoices,
7023 );
7024 builder.add_s2c_documents(
7025 &sourcing.sourcing_projects,
7026 &sourcing.qualifications,
7027 &sourcing.rfx_events,
7028 &sourcing.bids,
7029 &sourcing.bid_evaluations,
7030 &sourcing.contracts,
7031 );
7032 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
7033 builder.add_mfg_documents(
7034 &manufacturing.production_orders,
7035 &manufacturing.quality_inspections,
7036 &manufacturing.cycle_counts,
7037 );
7038 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
7039 builder.add_audit_documents(
7040 &audit.engagements,
7041 &audit.workpapers,
7042 &audit.findings,
7043 &audit.evidence,
7044 &audit.risk_assessments,
7045 &audit.judgments,
7046 );
7047 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
7048
7049 if let Some(ref event_log) = ocpm.event_log {
7051 builder.add_ocpm_events(event_log);
7052 }
7053
7054 builder.add_accounts(coa);
7056 builder.add_journal_entries_as_hyperedges(entries);
7057
7058 let hypergraph = builder.build();
7060
7061 let output_dir = self
7063 .output_path
7064 .clone()
7065 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7066 let hg_dir = output_dir
7067 .join(&self.config.graph_export.output_subdirectory)
7068 .join(&hg_settings.output_subdirectory);
7069
7070 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
7072 "unified" => {
7073 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7074 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7075 SynthError::generation(format!("Unified hypergraph export failed: {}", e))
7076 })?;
7077 (
7078 metadata.num_nodes,
7079 metadata.num_edges,
7080 metadata.num_hyperedges,
7081 )
7082 }
7083 _ => {
7084 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
7086 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
7087 SynthError::generation(format!("Hypergraph export failed: {}", e))
7088 })?;
7089 (
7090 metadata.num_nodes,
7091 metadata.num_edges,
7092 metadata.num_hyperedges,
7093 )
7094 }
7095 };
7096
7097 #[cfg(feature = "streaming")]
7099 if let Some(ref target_url) = hg_settings.stream_target {
7100 use crate::stream_client::{StreamClient, StreamConfig};
7101 use std::io::Write as _;
7102
7103 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
7104 let stream_config = StreamConfig {
7105 target_url: target_url.clone(),
7106 batch_size: hg_settings.stream_batch_size,
7107 api_key,
7108 ..StreamConfig::default()
7109 };
7110
7111 match StreamClient::new(stream_config) {
7112 Ok(mut client) => {
7113 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
7114 match exporter.export_to_writer(&hypergraph, &mut client) {
7115 Ok(_) => {
7116 if let Err(e) = client.flush() {
7117 warn!("Failed to flush stream client: {}", e);
7118 } else {
7119 info!("Streamed {} records to {}", client.total_sent(), target_url);
7120 }
7121 }
7122 Err(e) => {
7123 warn!("Streaming export failed: {}", e);
7124 }
7125 }
7126 }
7127 Err(e) => {
7128 warn!("Failed to create stream client: {}", e);
7129 }
7130 }
7131 }
7132
7133 stats.graph_node_count += num_nodes;
7135 stats.graph_edge_count += num_edges;
7136 stats.graph_export_count += 1;
7137
7138 Ok(HypergraphExportInfo {
7139 node_count: num_nodes,
7140 edge_count: num_edges,
7141 hyperedge_count: num_hyperedges,
7142 output_path: hg_dir,
7143 })
7144 }
7145
7146 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
7151 let pb = self.create_progress_bar(100, "Generating Banking Data");
7152
7153 let orchestrator = BankingOrchestratorBuilder::new()
7155 .config(self.config.banking.clone())
7156 .seed(self.seed + 9000)
7157 .country_pack(self.primary_pack().clone())
7158 .build();
7159
7160 if let Some(pb) = &pb {
7161 pb.inc(10);
7162 }
7163
7164 let result = orchestrator.generate();
7166
7167 if let Some(pb) = &pb {
7168 pb.inc(90);
7169 pb.finish_with_message(format!(
7170 "Banking: {} customers, {} transactions",
7171 result.customers.len(),
7172 result.transactions.len()
7173 ));
7174 }
7175
7176 let mut banking_customers = result.customers;
7181 let core_customers = &self.master_data.customers;
7182 if !core_customers.is_empty() {
7183 for (i, bc) in banking_customers.iter_mut().enumerate() {
7184 let core = &core_customers[i % core_customers.len()];
7185 bc.name = CustomerName::business(&core.name);
7186 bc.residence_country = core.country.clone();
7187 bc.enterprise_customer_id = Some(core.customer_id.clone());
7188 }
7189 debug!(
7190 "Cross-referenced {} banking customers with {} core customers",
7191 banking_customers.len(),
7192 core_customers.len()
7193 );
7194 }
7195
7196 Ok(BankingSnapshot {
7197 customers: banking_customers,
7198 accounts: result.accounts,
7199 transactions: result.transactions,
7200 transaction_labels: result.transaction_labels,
7201 customer_labels: result.customer_labels,
7202 account_labels: result.account_labels,
7203 relationship_labels: result.relationship_labels,
7204 narratives: result.narratives,
7205 suspicious_count: result.stats.suspicious_count,
7206 scenario_count: result.scenarios.len(),
7207 })
7208 }
7209
7210 fn calculate_total_transactions(&self) -> u64 {
7212 let months = self.config.global.period_months as f64;
7213 self.config
7214 .companies
7215 .iter()
7216 .map(|c| {
7217 let annual = c.annual_transaction_volume.count() as f64;
7218 let weighted = annual * c.volume_weight;
7219 (weighted * months / 12.0) as u64
7220 })
7221 .sum()
7222 }
7223
7224 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
7226 if !self.phase_config.show_progress {
7227 return None;
7228 }
7229
7230 let pb = if let Some(mp) = &self.multi_progress {
7231 mp.add(ProgressBar::new(total))
7232 } else {
7233 ProgressBar::new(total)
7234 };
7235
7236 pb.set_style(
7237 ProgressStyle::default_bar()
7238 .template(&format!(
7239 "{{spinner:.green}} {} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})",
7240 message
7241 ))
7242 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
7243 .progress_chars("#>-"),
7244 );
7245
7246 Some(pb)
7247 }
7248
7249 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
7251 self.coa.clone()
7252 }
7253
7254 pub fn get_master_data(&self) -> &MasterDataSnapshot {
7256 &self.master_data
7257 }
7258
7259 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
7261 use super::lineage::LineageGraphBuilder;
7262
7263 let mut builder = LineageGraphBuilder::new();
7264
7265 builder.add_config_section("config:global", "Global Config");
7267 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
7268 builder.add_config_section("config:transactions", "Transaction Config");
7269
7270 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
7272 builder.add_generator_phase("phase:je", "Journal Entry Generation");
7273
7274 builder.configured_by("phase:coa", "config:chart_of_accounts");
7276 builder.configured_by("phase:je", "config:transactions");
7277
7278 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
7280 builder.produced_by("output:je", "phase:je");
7281
7282 if self.phase_config.generate_master_data {
7284 builder.add_config_section("config:master_data", "Master Data Config");
7285 builder.add_generator_phase("phase:master_data", "Master Data Generation");
7286 builder.configured_by("phase:master_data", "config:master_data");
7287 builder.input_to("phase:master_data", "phase:je");
7288 }
7289
7290 if self.phase_config.generate_document_flows {
7291 builder.add_config_section("config:document_flows", "Document Flow Config");
7292 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
7293 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
7294 builder.configured_by("phase:p2p", "config:document_flows");
7295 builder.configured_by("phase:o2c", "config:document_flows");
7296
7297 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
7298 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
7299 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
7300 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
7301 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
7302
7303 builder.produced_by("output:po", "phase:p2p");
7304 builder.produced_by("output:gr", "phase:p2p");
7305 builder.produced_by("output:vi", "phase:p2p");
7306 builder.produced_by("output:so", "phase:o2c");
7307 builder.produced_by("output:ci", "phase:o2c");
7308 }
7309
7310 if self.phase_config.inject_anomalies {
7311 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
7312 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
7313 builder.configured_by("phase:anomaly", "config:fraud");
7314 builder.add_output_file(
7315 "output:labels",
7316 "Anomaly Labels",
7317 "labels/anomaly_labels.csv",
7318 );
7319 builder.produced_by("output:labels", "phase:anomaly");
7320 }
7321
7322 if self.phase_config.generate_audit {
7323 builder.add_config_section("config:audit", "Audit Config");
7324 builder.add_generator_phase("phase:audit", "Audit Data Generation");
7325 builder.configured_by("phase:audit", "config:audit");
7326 }
7327
7328 if self.phase_config.generate_banking {
7329 builder.add_config_section("config:banking", "Banking Config");
7330 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
7331 builder.configured_by("phase:banking", "config:banking");
7332 }
7333
7334 if self.config.llm.enabled {
7335 builder.add_config_section("config:llm", "LLM Enrichment Config");
7336 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
7337 builder.configured_by("phase:llm_enrichment", "config:llm");
7338 }
7339
7340 if self.config.diffusion.enabled {
7341 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
7342 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
7343 builder.configured_by("phase:diffusion", "config:diffusion");
7344 }
7345
7346 if self.config.causal.enabled {
7347 builder.add_config_section("config:causal", "Causal Generation Config");
7348 builder.add_generator_phase("phase:causal", "Causal Overlay");
7349 builder.configured_by("phase:causal", "config:causal");
7350 }
7351
7352 builder.build()
7353 }
7354}
7355
7356fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
7358 match format {
7359 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
7360 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
7361 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
7362 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
7363 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
7364 }
7365}
7366
7367#[cfg(test)]
7368#[allow(clippy::unwrap_used)]
7369mod tests {
7370 use super::*;
7371 use datasynth_config::schema::*;
7372
7373 fn create_test_config() -> GeneratorConfig {
7374 GeneratorConfig {
7375 global: GlobalConfig {
7376 industry: IndustrySector::Manufacturing,
7377 start_date: "2024-01-01".to_string(),
7378 period_months: 1,
7379 seed: Some(42),
7380 parallel: false,
7381 group_currency: "USD".to_string(),
7382 worker_threads: 0,
7383 memory_limit_mb: 0,
7384 },
7385 companies: vec![CompanyConfig {
7386 code: "1000".to_string(),
7387 name: "Test Company".to_string(),
7388 currency: "USD".to_string(),
7389 country: "US".to_string(),
7390 annual_transaction_volume: TransactionVolume::TenK,
7391 volume_weight: 1.0,
7392 fiscal_year_variant: "K4".to_string(),
7393 }],
7394 chart_of_accounts: ChartOfAccountsConfig {
7395 complexity: CoAComplexity::Small,
7396 industry_specific: true,
7397 custom_accounts: None,
7398 min_hierarchy_depth: 2,
7399 max_hierarchy_depth: 4,
7400 },
7401 transactions: TransactionConfig::default(),
7402 output: OutputConfig::default(),
7403 fraud: FraudConfig::default(),
7404 internal_controls: InternalControlsConfig::default(),
7405 business_processes: BusinessProcessConfig::default(),
7406 user_personas: UserPersonaConfig::default(),
7407 templates: TemplateConfig::default(),
7408 approval: ApprovalConfig::default(),
7409 departments: DepartmentConfig::default(),
7410 master_data: MasterDataConfig::default(),
7411 document_flows: DocumentFlowConfig::default(),
7412 intercompany: IntercompanyConfig::default(),
7413 balance: BalanceConfig::default(),
7414 ocpm: OcpmConfig::default(),
7415 audit: AuditGenerationConfig::default(),
7416 banking: datasynth_banking::BankingConfig::default(),
7417 data_quality: DataQualitySchemaConfig::default(),
7418 scenario: ScenarioConfig::default(),
7419 temporal: TemporalDriftConfig::default(),
7420 graph_export: GraphExportConfig::default(),
7421 streaming: StreamingSchemaConfig::default(),
7422 rate_limit: RateLimitSchemaConfig::default(),
7423 temporal_attributes: TemporalAttributeSchemaConfig::default(),
7424 relationships: RelationshipSchemaConfig::default(),
7425 accounting_standards: AccountingStandardsConfig::default(),
7426 audit_standards: AuditStandardsConfig::default(),
7427 distributions: Default::default(),
7428 temporal_patterns: Default::default(),
7429 vendor_network: VendorNetworkSchemaConfig::default(),
7430 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
7431 relationship_strength: RelationshipStrengthSchemaConfig::default(),
7432 cross_process_links: CrossProcessLinksSchemaConfig::default(),
7433 organizational_events: OrganizationalEventsSchemaConfig::default(),
7434 behavioral_drift: BehavioralDriftSchemaConfig::default(),
7435 market_drift: MarketDriftSchemaConfig::default(),
7436 drift_labeling: DriftLabelingSchemaConfig::default(),
7437 anomaly_injection: Default::default(),
7438 industry_specific: Default::default(),
7439 fingerprint_privacy: Default::default(),
7440 quality_gates: Default::default(),
7441 compliance: Default::default(),
7442 webhooks: Default::default(),
7443 llm: Default::default(),
7444 diffusion: Default::default(),
7445 causal: Default::default(),
7446 source_to_pay: Default::default(),
7447 financial_reporting: Default::default(),
7448 hr: Default::default(),
7449 manufacturing: Default::default(),
7450 sales_quotes: Default::default(),
7451 tax: Default::default(),
7452 treasury: Default::default(),
7453 project_accounting: Default::default(),
7454 esg: Default::default(),
7455 country_packs: None,
7456 }
7457 }
7458
7459 #[test]
7460 fn test_enhanced_orchestrator_creation() {
7461 let config = create_test_config();
7462 let orchestrator = EnhancedOrchestrator::with_defaults(config);
7463 assert!(orchestrator.is_ok());
7464 }
7465
7466 #[test]
7467 fn test_minimal_generation() {
7468 let config = create_test_config();
7469 let phase_config = PhaseConfig {
7470 generate_master_data: false,
7471 generate_document_flows: false,
7472 generate_journal_entries: true,
7473 inject_anomalies: false,
7474 show_progress: false,
7475 ..Default::default()
7476 };
7477
7478 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7479 let result = orchestrator.generate();
7480
7481 assert!(result.is_ok());
7482 let result = result.unwrap();
7483 assert!(!result.journal_entries.is_empty());
7484 }
7485
7486 #[test]
7487 fn test_master_data_generation() {
7488 let config = create_test_config();
7489 let phase_config = PhaseConfig {
7490 generate_master_data: true,
7491 generate_document_flows: false,
7492 generate_journal_entries: false,
7493 inject_anomalies: false,
7494 show_progress: false,
7495 vendors_per_company: 5,
7496 customers_per_company: 5,
7497 materials_per_company: 10,
7498 assets_per_company: 5,
7499 employees_per_company: 10,
7500 ..Default::default()
7501 };
7502
7503 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7504 let result = orchestrator.generate().unwrap();
7505
7506 assert!(!result.master_data.vendors.is_empty());
7507 assert!(!result.master_data.customers.is_empty());
7508 assert!(!result.master_data.materials.is_empty());
7509 }
7510
7511 #[test]
7512 fn test_document_flow_generation() {
7513 let config = create_test_config();
7514 let phase_config = PhaseConfig {
7515 generate_master_data: true,
7516 generate_document_flows: true,
7517 generate_journal_entries: false,
7518 inject_anomalies: false,
7519 inject_data_quality: false,
7520 validate_balances: false,
7521 generate_ocpm_events: false,
7522 show_progress: false,
7523 vendors_per_company: 5,
7524 customers_per_company: 5,
7525 materials_per_company: 10,
7526 assets_per_company: 5,
7527 employees_per_company: 10,
7528 p2p_chains: 5,
7529 o2c_chains: 5,
7530 ..Default::default()
7531 };
7532
7533 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7534 let result = orchestrator.generate().unwrap();
7535
7536 assert!(!result.document_flows.p2p_chains.is_empty());
7538 assert!(!result.document_flows.o2c_chains.is_empty());
7539
7540 assert!(!result.document_flows.purchase_orders.is_empty());
7542 assert!(!result.document_flows.sales_orders.is_empty());
7543 }
7544
7545 #[test]
7546 fn test_anomaly_injection() {
7547 let config = create_test_config();
7548 let phase_config = PhaseConfig {
7549 generate_master_data: false,
7550 generate_document_flows: false,
7551 generate_journal_entries: true,
7552 inject_anomalies: true,
7553 show_progress: false,
7554 ..Default::default()
7555 };
7556
7557 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7558 let result = orchestrator.generate().unwrap();
7559
7560 assert!(!result.journal_entries.is_empty());
7562
7563 assert!(result.anomaly_labels.summary.is_some());
7566 }
7567
7568 #[test]
7569 fn test_full_generation_pipeline() {
7570 let config = create_test_config();
7571 let phase_config = PhaseConfig {
7572 generate_master_data: true,
7573 generate_document_flows: true,
7574 generate_journal_entries: true,
7575 inject_anomalies: false,
7576 inject_data_quality: false,
7577 validate_balances: true,
7578 generate_ocpm_events: false,
7579 show_progress: false,
7580 vendors_per_company: 3,
7581 customers_per_company: 3,
7582 materials_per_company: 5,
7583 assets_per_company: 3,
7584 employees_per_company: 5,
7585 p2p_chains: 3,
7586 o2c_chains: 3,
7587 ..Default::default()
7588 };
7589
7590 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7591 let result = orchestrator.generate().unwrap();
7592
7593 assert!(!result.master_data.vendors.is_empty());
7595 assert!(!result.master_data.customers.is_empty());
7596 assert!(!result.document_flows.p2p_chains.is_empty());
7597 assert!(!result.document_flows.o2c_chains.is_empty());
7598 assert!(!result.journal_entries.is_empty());
7599 assert!(result.statistics.accounts_count > 0);
7600
7601 assert!(!result.subledger.ap_invoices.is_empty());
7603 assert!(!result.subledger.ar_invoices.is_empty());
7604
7605 assert!(result.balance_validation.validated);
7607 assert!(result.balance_validation.entries_processed > 0);
7608 }
7609
7610 #[test]
7611 fn test_subledger_linking() {
7612 let config = create_test_config();
7613 let phase_config = PhaseConfig {
7614 generate_master_data: true,
7615 generate_document_flows: true,
7616 generate_journal_entries: false,
7617 inject_anomalies: false,
7618 inject_data_quality: false,
7619 validate_balances: false,
7620 generate_ocpm_events: false,
7621 show_progress: false,
7622 vendors_per_company: 5,
7623 customers_per_company: 5,
7624 materials_per_company: 10,
7625 assets_per_company: 3,
7626 employees_per_company: 5,
7627 p2p_chains: 5,
7628 o2c_chains: 5,
7629 ..Default::default()
7630 };
7631
7632 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7633 let result = orchestrator.generate().unwrap();
7634
7635 assert!(!result.document_flows.vendor_invoices.is_empty());
7637 assert!(!result.document_flows.customer_invoices.is_empty());
7638
7639 assert!(!result.subledger.ap_invoices.is_empty());
7641 assert!(!result.subledger.ar_invoices.is_empty());
7642
7643 assert_eq!(
7645 result.subledger.ap_invoices.len(),
7646 result.document_flows.vendor_invoices.len()
7647 );
7648
7649 assert_eq!(
7651 result.subledger.ar_invoices.len(),
7652 result.document_flows.customer_invoices.len()
7653 );
7654
7655 assert_eq!(
7657 result.statistics.ap_invoice_count,
7658 result.subledger.ap_invoices.len()
7659 );
7660 assert_eq!(
7661 result.statistics.ar_invoice_count,
7662 result.subledger.ar_invoices.len()
7663 );
7664 }
7665
7666 #[test]
7667 fn test_balance_validation() {
7668 let config = create_test_config();
7669 let phase_config = PhaseConfig {
7670 generate_master_data: false,
7671 generate_document_flows: false,
7672 generate_journal_entries: true,
7673 inject_anomalies: false,
7674 validate_balances: true,
7675 show_progress: false,
7676 ..Default::default()
7677 };
7678
7679 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7680 let result = orchestrator.generate().unwrap();
7681
7682 assert!(result.balance_validation.validated);
7684 assert!(result.balance_validation.entries_processed > 0);
7685
7686 assert!(!result.balance_validation.has_unbalanced_entries);
7688
7689 assert_eq!(
7691 result.balance_validation.total_debits,
7692 result.balance_validation.total_credits
7693 );
7694 }
7695
7696 #[test]
7697 fn test_statistics_accuracy() {
7698 let config = create_test_config();
7699 let phase_config = PhaseConfig {
7700 generate_master_data: true,
7701 generate_document_flows: false,
7702 generate_journal_entries: true,
7703 inject_anomalies: false,
7704 show_progress: false,
7705 vendors_per_company: 10,
7706 customers_per_company: 20,
7707 materials_per_company: 15,
7708 assets_per_company: 5,
7709 employees_per_company: 8,
7710 ..Default::default()
7711 };
7712
7713 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7714 let result = orchestrator.generate().unwrap();
7715
7716 assert_eq!(
7718 result.statistics.vendor_count,
7719 result.master_data.vendors.len()
7720 );
7721 assert_eq!(
7722 result.statistics.customer_count,
7723 result.master_data.customers.len()
7724 );
7725 assert_eq!(
7726 result.statistics.material_count,
7727 result.master_data.materials.len()
7728 );
7729 assert_eq!(
7730 result.statistics.total_entries as usize,
7731 result.journal_entries.len()
7732 );
7733 }
7734
7735 #[test]
7736 fn test_phase_config_defaults() {
7737 let config = PhaseConfig::default();
7738 assert!(config.generate_master_data);
7739 assert!(config.generate_document_flows);
7740 assert!(config.generate_journal_entries);
7741 assert!(!config.inject_anomalies);
7742 assert!(config.validate_balances);
7743 assert!(config.show_progress);
7744 assert!(config.vendors_per_company > 0);
7745 assert!(config.customers_per_company > 0);
7746 }
7747
7748 #[test]
7749 fn test_get_coa_before_generation() {
7750 let config = create_test_config();
7751 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
7752
7753 assert!(orchestrator.get_coa().is_none());
7755 }
7756
7757 #[test]
7758 fn test_get_coa_after_generation() {
7759 let config = create_test_config();
7760 let phase_config = PhaseConfig {
7761 generate_master_data: false,
7762 generate_document_flows: false,
7763 generate_journal_entries: true,
7764 inject_anomalies: false,
7765 show_progress: false,
7766 ..Default::default()
7767 };
7768
7769 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7770 let _ = orchestrator.generate().unwrap();
7771
7772 assert!(orchestrator.get_coa().is_some());
7774 }
7775
7776 #[test]
7777 fn test_get_master_data() {
7778 let config = create_test_config();
7779 let phase_config = PhaseConfig {
7780 generate_master_data: true,
7781 generate_document_flows: false,
7782 generate_journal_entries: false,
7783 inject_anomalies: false,
7784 show_progress: false,
7785 vendors_per_company: 5,
7786 customers_per_company: 5,
7787 materials_per_company: 5,
7788 assets_per_company: 5,
7789 employees_per_company: 5,
7790 ..Default::default()
7791 };
7792
7793 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7794 let _ = orchestrator.generate().unwrap();
7795
7796 let master_data = orchestrator.get_master_data();
7797 assert!(!master_data.vendors.is_empty());
7798 }
7799
7800 #[test]
7801 fn test_with_progress_builder() {
7802 let config = create_test_config();
7803 let orchestrator = EnhancedOrchestrator::with_defaults(config)
7804 .unwrap()
7805 .with_progress(false);
7806
7807 assert!(!orchestrator.phase_config.show_progress);
7809 }
7810
7811 #[test]
7812 fn test_multi_company_generation() {
7813 let mut config = create_test_config();
7814 config.companies.push(CompanyConfig {
7815 code: "2000".to_string(),
7816 name: "Subsidiary".to_string(),
7817 currency: "EUR".to_string(),
7818 country: "DE".to_string(),
7819 annual_transaction_volume: TransactionVolume::TenK,
7820 volume_weight: 0.5,
7821 fiscal_year_variant: "K4".to_string(),
7822 });
7823
7824 let phase_config = PhaseConfig {
7825 generate_master_data: true,
7826 generate_document_flows: false,
7827 generate_journal_entries: true,
7828 inject_anomalies: false,
7829 show_progress: false,
7830 vendors_per_company: 5,
7831 customers_per_company: 5,
7832 materials_per_company: 5,
7833 assets_per_company: 5,
7834 employees_per_company: 5,
7835 ..Default::default()
7836 };
7837
7838 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7839 let result = orchestrator.generate().unwrap();
7840
7841 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
7844 assert!(result.statistics.companies_count == 2);
7845 }
7846
7847 #[test]
7848 fn test_empty_master_data_skips_document_flows() {
7849 let config = create_test_config();
7850 let phase_config = PhaseConfig {
7851 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
7854 inject_anomalies: false,
7855 show_progress: false,
7856 ..Default::default()
7857 };
7858
7859 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7860 let result = orchestrator.generate().unwrap();
7861
7862 assert!(result.document_flows.p2p_chains.is_empty());
7864 assert!(result.document_flows.o2c_chains.is_empty());
7865 }
7866
7867 #[test]
7868 fn test_journal_entry_line_item_count() {
7869 let config = create_test_config();
7870 let phase_config = PhaseConfig {
7871 generate_master_data: false,
7872 generate_document_flows: false,
7873 generate_journal_entries: true,
7874 inject_anomalies: false,
7875 show_progress: false,
7876 ..Default::default()
7877 };
7878
7879 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7880 let result = orchestrator.generate().unwrap();
7881
7882 let calculated_line_items: u64 = result
7884 .journal_entries
7885 .iter()
7886 .map(|e| e.line_count() as u64)
7887 .sum();
7888 assert_eq!(result.statistics.total_line_items, calculated_line_items);
7889 }
7890
7891 #[test]
7892 fn test_audit_generation() {
7893 let config = create_test_config();
7894 let phase_config = PhaseConfig {
7895 generate_master_data: false,
7896 generate_document_flows: false,
7897 generate_journal_entries: true,
7898 inject_anomalies: false,
7899 show_progress: false,
7900 generate_audit: true,
7901 audit_engagements: 2,
7902 workpapers_per_engagement: 5,
7903 evidence_per_workpaper: 2,
7904 risks_per_engagement: 3,
7905 findings_per_engagement: 2,
7906 judgments_per_engagement: 2,
7907 ..Default::default()
7908 };
7909
7910 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7911 let result = orchestrator.generate().unwrap();
7912
7913 assert_eq!(result.audit.engagements.len(), 2);
7915 assert!(!result.audit.workpapers.is_empty());
7916 assert!(!result.audit.evidence.is_empty());
7917 assert!(!result.audit.risk_assessments.is_empty());
7918 assert!(!result.audit.findings.is_empty());
7919 assert!(!result.audit.judgments.is_empty());
7920
7921 assert_eq!(
7923 result.statistics.audit_engagement_count,
7924 result.audit.engagements.len()
7925 );
7926 assert_eq!(
7927 result.statistics.audit_workpaper_count,
7928 result.audit.workpapers.len()
7929 );
7930 assert_eq!(
7931 result.statistics.audit_evidence_count,
7932 result.audit.evidence.len()
7933 );
7934 assert_eq!(
7935 result.statistics.audit_risk_count,
7936 result.audit.risk_assessments.len()
7937 );
7938 assert_eq!(
7939 result.statistics.audit_finding_count,
7940 result.audit.findings.len()
7941 );
7942 assert_eq!(
7943 result.statistics.audit_judgment_count,
7944 result.audit.judgments.len()
7945 );
7946 }
7947
7948 #[test]
7949 fn test_new_phases_disabled_by_default() {
7950 let config = create_test_config();
7951 assert!(!config.llm.enabled);
7953 assert!(!config.diffusion.enabled);
7954 assert!(!config.causal.enabled);
7955
7956 let phase_config = PhaseConfig {
7957 generate_master_data: false,
7958 generate_document_flows: false,
7959 generate_journal_entries: true,
7960 inject_anomalies: false,
7961 show_progress: false,
7962 ..Default::default()
7963 };
7964
7965 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7966 let result = orchestrator.generate().unwrap();
7967
7968 assert_eq!(result.statistics.llm_enrichment_ms, 0);
7970 assert_eq!(result.statistics.llm_vendors_enriched, 0);
7971 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
7972 assert_eq!(result.statistics.diffusion_samples_generated, 0);
7973 assert_eq!(result.statistics.causal_generation_ms, 0);
7974 assert_eq!(result.statistics.causal_samples_generated, 0);
7975 assert!(result.statistics.causal_validation_passed.is_none());
7976 }
7977
7978 #[test]
7979 fn test_llm_enrichment_enabled() {
7980 let mut config = create_test_config();
7981 config.llm.enabled = true;
7982 config.llm.max_vendor_enrichments = 3;
7983
7984 let phase_config = PhaseConfig {
7985 generate_master_data: true,
7986 generate_document_flows: false,
7987 generate_journal_entries: false,
7988 inject_anomalies: false,
7989 show_progress: false,
7990 vendors_per_company: 5,
7991 customers_per_company: 3,
7992 materials_per_company: 3,
7993 assets_per_company: 3,
7994 employees_per_company: 3,
7995 ..Default::default()
7996 };
7997
7998 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
7999 let result = orchestrator.generate().unwrap();
8000
8001 assert!(result.statistics.llm_vendors_enriched > 0);
8003 assert!(result.statistics.llm_vendors_enriched <= 3);
8004 }
8005
8006 #[test]
8007 fn test_diffusion_enhancement_enabled() {
8008 let mut config = create_test_config();
8009 config.diffusion.enabled = true;
8010 config.diffusion.n_steps = 50;
8011 config.diffusion.sample_size = 20;
8012
8013 let phase_config = PhaseConfig {
8014 generate_master_data: false,
8015 generate_document_flows: false,
8016 generate_journal_entries: true,
8017 inject_anomalies: false,
8018 show_progress: false,
8019 ..Default::default()
8020 };
8021
8022 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8023 let result = orchestrator.generate().unwrap();
8024
8025 assert_eq!(result.statistics.diffusion_samples_generated, 20);
8027 }
8028
8029 #[test]
8030 fn test_causal_overlay_enabled() {
8031 let mut config = create_test_config();
8032 config.causal.enabled = true;
8033 config.causal.template = "fraud_detection".to_string();
8034 config.causal.sample_size = 100;
8035 config.causal.validate = true;
8036
8037 let phase_config = PhaseConfig {
8038 generate_master_data: false,
8039 generate_document_flows: false,
8040 generate_journal_entries: true,
8041 inject_anomalies: false,
8042 show_progress: false,
8043 ..Default::default()
8044 };
8045
8046 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8047 let result = orchestrator.generate().unwrap();
8048
8049 assert_eq!(result.statistics.causal_samples_generated, 100);
8051 assert!(result.statistics.causal_validation_passed.is_some());
8053 }
8054
8055 #[test]
8056 fn test_causal_overlay_revenue_cycle_template() {
8057 let mut config = create_test_config();
8058 config.causal.enabled = true;
8059 config.causal.template = "revenue_cycle".to_string();
8060 config.causal.sample_size = 50;
8061 config.causal.validate = false;
8062
8063 let phase_config = PhaseConfig {
8064 generate_master_data: false,
8065 generate_document_flows: false,
8066 generate_journal_entries: true,
8067 inject_anomalies: false,
8068 show_progress: false,
8069 ..Default::default()
8070 };
8071
8072 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8073 let result = orchestrator.generate().unwrap();
8074
8075 assert_eq!(result.statistics.causal_samples_generated, 50);
8077 assert!(result.statistics.causal_validation_passed.is_none());
8079 }
8080
8081 #[test]
8082 fn test_all_new_phases_enabled_together() {
8083 let mut config = create_test_config();
8084 config.llm.enabled = true;
8085 config.llm.max_vendor_enrichments = 2;
8086 config.diffusion.enabled = true;
8087 config.diffusion.n_steps = 20;
8088 config.diffusion.sample_size = 10;
8089 config.causal.enabled = true;
8090 config.causal.sample_size = 50;
8091 config.causal.validate = true;
8092
8093 let phase_config = PhaseConfig {
8094 generate_master_data: true,
8095 generate_document_flows: false,
8096 generate_journal_entries: true,
8097 inject_anomalies: false,
8098 show_progress: false,
8099 vendors_per_company: 5,
8100 customers_per_company: 3,
8101 materials_per_company: 3,
8102 assets_per_company: 3,
8103 employees_per_company: 3,
8104 ..Default::default()
8105 };
8106
8107 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8108 let result = orchestrator.generate().unwrap();
8109
8110 assert!(result.statistics.llm_vendors_enriched > 0);
8112 assert_eq!(result.statistics.diffusion_samples_generated, 10);
8113 assert_eq!(result.statistics.causal_samples_generated, 50);
8114 assert!(result.statistics.causal_validation_passed.is_some());
8115 }
8116
8117 #[test]
8118 fn test_statistics_serialization_with_new_fields() {
8119 let stats = EnhancedGenerationStatistics {
8120 total_entries: 100,
8121 total_line_items: 500,
8122 llm_enrichment_ms: 42,
8123 llm_vendors_enriched: 10,
8124 diffusion_enhancement_ms: 100,
8125 diffusion_samples_generated: 50,
8126 causal_generation_ms: 200,
8127 causal_samples_generated: 100,
8128 causal_validation_passed: Some(true),
8129 ..Default::default()
8130 };
8131
8132 let json = serde_json::to_string(&stats).unwrap();
8133 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
8134
8135 assert_eq!(deserialized.llm_enrichment_ms, 42);
8136 assert_eq!(deserialized.llm_vendors_enriched, 10);
8137 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
8138 assert_eq!(deserialized.diffusion_samples_generated, 50);
8139 assert_eq!(deserialized.causal_generation_ms, 200);
8140 assert_eq!(deserialized.causal_samples_generated, 100);
8141 assert_eq!(deserialized.causal_validation_passed, Some(true));
8142 }
8143
8144 #[test]
8145 fn test_statistics_backward_compat_deserialization() {
8146 let old_json = r#"{
8148 "total_entries": 100,
8149 "total_line_items": 500,
8150 "accounts_count": 50,
8151 "companies_count": 1,
8152 "period_months": 12,
8153 "vendor_count": 10,
8154 "customer_count": 20,
8155 "material_count": 15,
8156 "asset_count": 5,
8157 "employee_count": 8,
8158 "p2p_chain_count": 5,
8159 "o2c_chain_count": 5,
8160 "ap_invoice_count": 5,
8161 "ar_invoice_count": 5,
8162 "ocpm_event_count": 0,
8163 "ocpm_object_count": 0,
8164 "ocpm_case_count": 0,
8165 "audit_engagement_count": 0,
8166 "audit_workpaper_count": 0,
8167 "audit_evidence_count": 0,
8168 "audit_risk_count": 0,
8169 "audit_finding_count": 0,
8170 "audit_judgment_count": 0,
8171 "anomalies_injected": 0,
8172 "data_quality_issues": 0,
8173 "banking_customer_count": 0,
8174 "banking_account_count": 0,
8175 "banking_transaction_count": 0,
8176 "banking_suspicious_count": 0,
8177 "graph_export_count": 0,
8178 "graph_node_count": 0,
8179 "graph_edge_count": 0
8180 }"#;
8181
8182 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
8183
8184 assert_eq!(stats.llm_enrichment_ms, 0);
8186 assert_eq!(stats.llm_vendors_enriched, 0);
8187 assert_eq!(stats.diffusion_enhancement_ms, 0);
8188 assert_eq!(stats.diffusion_samples_generated, 0);
8189 assert_eq!(stats.causal_generation_ms, 0);
8190 assert_eq!(stats.causal_samples_generated, 0);
8191 assert!(stats.causal_validation_passed.is_none());
8192 }
8193}