1use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33 models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34 BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39 AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40 AuditSample, ConfirmationResponse, ExternalConfirmation, InternalAuditFunction,
41 InternalAuditReport, ProfessionalJudgment, RelatedParty, RelatedPartyTransaction,
42 RiskAssessment, Workpaper,
43};
44use datasynth_core::models::sourcing::{
45 BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
46 SupplierBid, SupplierQualification, SupplierScorecard,
47};
48use datasynth_core::models::subledger::ap::APInvoice;
49use datasynth_core::models::subledger::ar::ARInvoice;
50use datasynth_core::models::*;
51use datasynth_core::traits::Generator;
52use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
53use datasynth_fingerprint::{
54 io::FingerprintReader,
55 models::Fingerprint,
56 synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
57};
58use datasynth_generators::{
59 AnomalyInjector,
61 AnomalyInjectorConfig,
62 AssetGenerator,
63 AuditEngagementGenerator,
65 BalanceTrackerConfig,
66 BankReconciliationGenerator,
68 BidEvaluationGenerator,
70 BidGenerator,
71 CatalogGenerator,
72 ChartOfAccountsGenerator,
74 ContractGenerator,
75 ControlGenerator,
77 ControlGeneratorConfig,
78 CustomerGenerator,
79 DataQualityConfig,
80 DataQualityInjector,
82 DataQualityStats,
83 DocumentFlowJeConfig,
85 DocumentFlowJeGenerator,
86 DocumentFlowLinker,
88 EmployeeGenerator,
89 EsgAnomalyLabel,
91 EvidenceGenerator,
92 FinancialStatementGenerator,
94 FindingGenerator,
95 JournalEntryGenerator,
96 JudgmentGenerator,
97 LatePaymentDistribution,
98 MaterialGenerator,
99 O2CDocumentChain,
100 O2CGenerator,
101 O2CGeneratorConfig,
102 O2CPaymentBehavior,
103 P2PDocumentChain,
104 P2PGenerator,
106 P2PGeneratorConfig,
107 P2PPaymentBehavior,
108 PaymentReference,
109 QualificationGenerator,
110 RfxGenerator,
111 RiskAssessmentGenerator,
112 RunningBalanceTracker,
114 ScorecardGenerator,
115 SourcingProjectGenerator,
116 SpendAnalysisGenerator,
117 ValidationError,
118 VendorGenerator,
120 WorkpaperGenerator,
121};
122use datasynth_graph::{
123 ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
124 EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
125 TransactionGraphConfig,
126};
127use datasynth_ocpm::{
128 AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
129 MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
130 OcpmUuidFactory, P2pDocuments, S2cDocuments,
131};
132
133use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
134use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
135use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
136use datasynth_core::llm::MockLlmProvider;
137use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
138use datasynth_core::models::documents::PaymentMethod;
139use datasynth_core::models::IndustrySector;
140use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
141use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
142use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
143use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
144use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
145use datasynth_generators::audit::sample_generator::SampleGenerator;
146use datasynth_generators::coa_generator::CoAFramework;
147use datasynth_generators::llm_enrichment::VendorLlmEnricher;
148use rayon::prelude::*;
149
150fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
156 let payment_behavior = &schema_config.payment_behavior;
157 let late_dist = &payment_behavior.late_payment_days_distribution;
158
159 P2PGeneratorConfig {
160 three_way_match_rate: schema_config.three_way_match_rate,
161 partial_delivery_rate: schema_config.partial_delivery_rate,
162 over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
163 price_variance_rate: schema_config.price_variance_rate,
164 max_price_variance_percent: schema_config.max_price_variance_percent,
165 avg_days_po_to_gr: schema_config.average_po_to_gr_days,
166 avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
167 avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
168 payment_method_distribution: vec![
169 (PaymentMethod::BankTransfer, 0.60),
170 (PaymentMethod::Check, 0.25),
171 (PaymentMethod::Wire, 0.10),
172 (PaymentMethod::CreditCard, 0.05),
173 ],
174 early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
175 payment_behavior: P2PPaymentBehavior {
176 late_payment_rate: payment_behavior.late_payment_rate,
177 late_payment_distribution: LatePaymentDistribution {
178 slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
179 late_8_to_14: late_dist.late_8_to_14,
180 very_late_15_to_30: late_dist.very_late_15_to_30,
181 severely_late_31_to_60: late_dist.severely_late_31_to_60,
182 extremely_late_over_60: late_dist.extremely_late_over_60,
183 },
184 partial_payment_rate: payment_behavior.partial_payment_rate,
185 payment_correction_rate: payment_behavior.payment_correction_rate,
186 avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
187 },
188 }
189}
190
191fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
193 let payment_behavior = &schema_config.payment_behavior;
194
195 O2CGeneratorConfig {
196 credit_check_failure_rate: schema_config.credit_check_failure_rate,
197 partial_shipment_rate: schema_config.partial_shipment_rate,
198 avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
199 avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
200 avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
201 late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
202 bad_debt_rate: schema_config.bad_debt_rate,
203 returns_rate: schema_config.return_rate,
204 cash_discount_take_rate: schema_config.cash_discount.taken_rate,
205 payment_method_distribution: vec![
206 (PaymentMethod::BankTransfer, 0.50),
207 (PaymentMethod::Check, 0.30),
208 (PaymentMethod::Wire, 0.15),
209 (PaymentMethod::CreditCard, 0.05),
210 ],
211 payment_behavior: O2CPaymentBehavior {
212 partial_payment_rate: payment_behavior.partial_payments.rate,
213 short_payment_rate: payment_behavior.short_payments.rate,
214 max_short_percent: payment_behavior.short_payments.max_short_percent,
215 on_account_rate: payment_behavior.on_account_payments.rate,
216 payment_correction_rate: payment_behavior.payment_corrections.rate,
217 avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
218 },
219 }
220}
221
222#[derive(Debug, Clone)]
224pub struct PhaseConfig {
225 pub generate_master_data: bool,
227 pub generate_document_flows: bool,
229 pub generate_ocpm_events: bool,
231 pub generate_journal_entries: bool,
233 pub inject_anomalies: bool,
235 pub inject_data_quality: bool,
237 pub validate_balances: bool,
239 pub show_progress: bool,
241 pub vendors_per_company: usize,
243 pub customers_per_company: usize,
245 pub materials_per_company: usize,
247 pub assets_per_company: usize,
249 pub employees_per_company: usize,
251 pub p2p_chains: usize,
253 pub o2c_chains: usize,
255 pub generate_audit: bool,
257 pub audit_engagements: usize,
259 pub workpapers_per_engagement: usize,
261 pub evidence_per_workpaper: usize,
263 pub risks_per_engagement: usize,
265 pub findings_per_engagement: usize,
267 pub judgments_per_engagement: usize,
269 pub generate_banking: bool,
271 pub generate_graph_export: bool,
273 pub generate_sourcing: bool,
275 pub generate_bank_reconciliation: bool,
277 pub generate_financial_statements: bool,
279 pub generate_accounting_standards: bool,
281 pub generate_manufacturing: bool,
283 pub generate_sales_kpi_budgets: bool,
285 pub generate_tax: bool,
287 pub generate_esg: bool,
289 pub generate_intercompany: bool,
291 pub generate_evolution_events: bool,
293 pub generate_counterfactuals: bool,
295 pub generate_compliance_regulations: bool,
297}
298
299impl Default for PhaseConfig {
300 fn default() -> Self {
301 Self {
302 generate_master_data: true,
303 generate_document_flows: true,
304 generate_ocpm_events: false, generate_journal_entries: true,
306 inject_anomalies: false,
307 inject_data_quality: false, validate_balances: true,
309 show_progress: true,
310 vendors_per_company: 50,
311 customers_per_company: 100,
312 materials_per_company: 200,
313 assets_per_company: 50,
314 employees_per_company: 100,
315 p2p_chains: 100,
316 o2c_chains: 100,
317 generate_audit: false, audit_engagements: 5,
319 workpapers_per_engagement: 20,
320 evidence_per_workpaper: 5,
321 risks_per_engagement: 15,
322 findings_per_engagement: 8,
323 judgments_per_engagement: 10,
324 generate_banking: false, generate_graph_export: false, generate_sourcing: false, generate_bank_reconciliation: false, generate_financial_statements: false, generate_accounting_standards: false, generate_manufacturing: false, generate_sales_kpi_budgets: false, generate_tax: false, generate_esg: false, generate_intercompany: false, generate_evolution_events: true, generate_counterfactuals: false, generate_compliance_regulations: false, }
339 }
340}
341
342#[derive(Debug, Clone, Default)]
344pub struct MasterDataSnapshot {
345 pub vendors: Vec<Vendor>,
347 pub customers: Vec<Customer>,
349 pub materials: Vec<Material>,
351 pub assets: Vec<FixedAsset>,
353 pub employees: Vec<Employee>,
355}
356
357#[derive(Debug, Clone)]
359pub struct HypergraphExportInfo {
360 pub node_count: usize,
362 pub edge_count: usize,
364 pub hyperedge_count: usize,
366 pub output_path: PathBuf,
368}
369
370#[derive(Debug, Clone, Default)]
372pub struct DocumentFlowSnapshot {
373 pub p2p_chains: Vec<P2PDocumentChain>,
375 pub o2c_chains: Vec<O2CDocumentChain>,
377 pub purchase_orders: Vec<documents::PurchaseOrder>,
379 pub goods_receipts: Vec<documents::GoodsReceipt>,
381 pub vendor_invoices: Vec<documents::VendorInvoice>,
383 pub sales_orders: Vec<documents::SalesOrder>,
385 pub deliveries: Vec<documents::Delivery>,
387 pub customer_invoices: Vec<documents::CustomerInvoice>,
389 pub payments: Vec<documents::Payment>,
391}
392
393#[derive(Debug, Clone, Default)]
395pub struct SubledgerSnapshot {
396 pub ap_invoices: Vec<APInvoice>,
398 pub ar_invoices: Vec<ARInvoice>,
400 pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
402 pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
404 pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
406}
407
408#[derive(Debug, Clone, Default)]
410pub struct OcpmSnapshot {
411 pub event_log: Option<OcpmEventLog>,
413 pub event_count: usize,
415 pub object_count: usize,
417 pub case_count: usize,
419}
420
421#[derive(Debug, Clone, Default)]
423pub struct AuditSnapshot {
424 pub engagements: Vec<AuditEngagement>,
426 pub workpapers: Vec<Workpaper>,
428 pub evidence: Vec<AuditEvidence>,
430 pub risk_assessments: Vec<RiskAssessment>,
432 pub findings: Vec<AuditFinding>,
434 pub judgments: Vec<ProfessionalJudgment>,
436 pub confirmations: Vec<ExternalConfirmation>,
438 pub confirmation_responses: Vec<ConfirmationResponse>,
440 pub procedure_steps: Vec<AuditProcedureStep>,
442 pub samples: Vec<AuditSample>,
444 pub analytical_results: Vec<AnalyticalProcedureResult>,
446 pub ia_functions: Vec<InternalAuditFunction>,
448 pub ia_reports: Vec<InternalAuditReport>,
450 pub related_parties: Vec<RelatedParty>,
452 pub related_party_transactions: Vec<RelatedPartyTransaction>,
454}
455
456#[derive(Debug, Clone, Default)]
458pub struct BankingSnapshot {
459 pub customers: Vec<BankingCustomer>,
461 pub accounts: Vec<BankAccount>,
463 pub transactions: Vec<BankTransaction>,
465 pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
467 pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
469 pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
471 pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
473 pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
475 pub suspicious_count: usize,
477 pub scenario_count: usize,
479}
480
481#[derive(Debug, Clone, Default, Serialize)]
483pub struct GraphExportSnapshot {
484 pub exported: bool,
486 pub graph_count: usize,
488 pub exports: HashMap<String, GraphExportInfo>,
490}
491
492#[derive(Debug, Clone, Serialize)]
494pub struct GraphExportInfo {
495 pub name: String,
497 pub format: String,
499 pub output_path: PathBuf,
501 pub node_count: usize,
503 pub edge_count: usize,
505}
506
507#[derive(Debug, Clone, Default)]
509pub struct SourcingSnapshot {
510 pub spend_analyses: Vec<SpendAnalysis>,
512 pub sourcing_projects: Vec<SourcingProject>,
514 pub qualifications: Vec<SupplierQualification>,
516 pub rfx_events: Vec<RfxEvent>,
518 pub bids: Vec<SupplierBid>,
520 pub bid_evaluations: Vec<BidEvaluation>,
522 pub contracts: Vec<ProcurementContract>,
524 pub catalog_items: Vec<CatalogItem>,
526 pub scorecards: Vec<SupplierScorecard>,
528}
529
530#[derive(Debug, Clone, Serialize, Deserialize)]
532pub struct PeriodTrialBalance {
533 pub fiscal_year: u16,
535 pub fiscal_period: u8,
537 pub period_start: NaiveDate,
539 pub period_end: NaiveDate,
541 pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
543}
544
545#[derive(Debug, Clone, Default)]
547pub struct FinancialReportingSnapshot {
548 pub financial_statements: Vec<FinancialStatement>,
550 pub bank_reconciliations: Vec<BankReconciliation>,
552 pub trial_balances: Vec<PeriodTrialBalance>,
554}
555
556#[derive(Debug, Clone, Default)]
558pub struct HrSnapshot {
559 pub payroll_runs: Vec<PayrollRun>,
561 pub payroll_line_items: Vec<PayrollLineItem>,
563 pub time_entries: Vec<TimeEntry>,
565 pub expense_reports: Vec<ExpenseReport>,
567 pub benefit_enrollments: Vec<BenefitEnrollment>,
569 pub payroll_run_count: usize,
571 pub payroll_line_item_count: usize,
573 pub time_entry_count: usize,
575 pub expense_report_count: usize,
577 pub benefit_enrollment_count: usize,
579}
580
581#[derive(Debug, Clone, Default)]
583pub struct AccountingStandardsSnapshot {
584 pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
586 pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
588 pub revenue_contract_count: usize,
590 pub impairment_test_count: usize,
592}
593
594#[derive(Debug, Clone, Default)]
596pub struct ComplianceRegulationsSnapshot {
597 pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
599 pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
601 pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
603 pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
605 pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
607 pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
609 pub compliance_graph: Option<datasynth_graph::Graph>,
611}
612
613#[derive(Debug, Clone, Default)]
615pub struct ManufacturingSnapshot {
616 pub production_orders: Vec<ProductionOrder>,
618 pub quality_inspections: Vec<QualityInspection>,
620 pub cycle_counts: Vec<CycleCount>,
622 pub bom_components: Vec<BomComponent>,
624 pub inventory_movements: Vec<InventoryMovement>,
626 pub production_order_count: usize,
628 pub quality_inspection_count: usize,
630 pub cycle_count_count: usize,
632 pub bom_component_count: usize,
634 pub inventory_movement_count: usize,
636}
637
638#[derive(Debug, Clone, Default)]
640pub struct SalesKpiBudgetsSnapshot {
641 pub sales_quotes: Vec<SalesQuote>,
643 pub kpis: Vec<ManagementKpi>,
645 pub budgets: Vec<Budget>,
647 pub sales_quote_count: usize,
649 pub kpi_count: usize,
651 pub budget_line_count: usize,
653}
654
655#[derive(Debug, Clone, Default)]
657pub struct AnomalyLabels {
658 pub labels: Vec<LabeledAnomaly>,
660 pub summary: Option<AnomalySummary>,
662 pub by_type: HashMap<String, usize>,
664}
665
666#[derive(Debug, Clone, Default)]
668pub struct BalanceValidationResult {
669 pub validated: bool,
671 pub is_balanced: bool,
673 pub entries_processed: u64,
675 pub total_debits: rust_decimal::Decimal,
677 pub total_credits: rust_decimal::Decimal,
679 pub accounts_tracked: usize,
681 pub companies_tracked: usize,
683 pub validation_errors: Vec<ValidationError>,
685 pub has_unbalanced_entries: bool,
687}
688
689#[derive(Debug, Clone, Default)]
691pub struct TaxSnapshot {
692 pub jurisdictions: Vec<TaxJurisdiction>,
694 pub codes: Vec<TaxCode>,
696 pub tax_lines: Vec<TaxLine>,
698 pub tax_returns: Vec<TaxReturn>,
700 pub tax_provisions: Vec<TaxProvision>,
702 pub withholding_records: Vec<WithholdingTaxRecord>,
704 pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
706 pub jurisdiction_count: usize,
708 pub code_count: usize,
710}
711
712#[derive(Debug, Clone, Default, Serialize, Deserialize)]
714pub struct IntercompanySnapshot {
715 pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
717 pub seller_journal_entries: Vec<JournalEntry>,
719 pub buyer_journal_entries: Vec<JournalEntry>,
721 pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
723 pub matched_pair_count: usize,
725 pub elimination_entry_count: usize,
727 pub match_rate: f64,
729}
730
731#[derive(Debug, Clone, Default)]
733pub struct EsgSnapshot {
734 pub emissions: Vec<EmissionRecord>,
736 pub energy: Vec<EnergyConsumption>,
738 pub water: Vec<WaterUsage>,
740 pub waste: Vec<WasteRecord>,
742 pub diversity: Vec<WorkforceDiversityMetric>,
744 pub pay_equity: Vec<PayEquityMetric>,
746 pub safety_incidents: Vec<SafetyIncident>,
748 pub safety_metrics: Vec<SafetyMetric>,
750 pub governance: Vec<GovernanceMetric>,
752 pub supplier_assessments: Vec<SupplierEsgAssessment>,
754 pub materiality: Vec<MaterialityAssessment>,
756 pub disclosures: Vec<EsgDisclosure>,
758 pub climate_scenarios: Vec<ClimateScenario>,
760 pub anomaly_labels: Vec<EsgAnomalyLabel>,
762 pub emission_count: usize,
764 pub disclosure_count: usize,
766}
767
768#[derive(Debug, Clone, Default)]
770pub struct TreasurySnapshot {
771 pub cash_positions: Vec<CashPosition>,
773 pub cash_forecasts: Vec<CashForecast>,
775 pub cash_pools: Vec<CashPool>,
777 pub cash_pool_sweeps: Vec<CashPoolSweep>,
779 pub hedging_instruments: Vec<HedgingInstrument>,
781 pub hedge_relationships: Vec<HedgeRelationship>,
783 pub debt_instruments: Vec<DebtInstrument>,
785 pub bank_guarantees: Vec<BankGuarantee>,
787 pub netting_runs: Vec<NettingRun>,
789 pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
791}
792
793#[derive(Debug, Clone, Default)]
795pub struct ProjectAccountingSnapshot {
796 pub projects: Vec<Project>,
798 pub cost_lines: Vec<ProjectCostLine>,
800 pub revenue_records: Vec<ProjectRevenue>,
802 pub earned_value_metrics: Vec<EarnedValueMetric>,
804 pub change_orders: Vec<ChangeOrder>,
806 pub milestones: Vec<ProjectMilestone>,
808}
809
810#[derive(Debug, Default)]
812pub struct EnhancedGenerationResult {
813 pub chart_of_accounts: ChartOfAccounts,
815 pub master_data: MasterDataSnapshot,
817 pub document_flows: DocumentFlowSnapshot,
819 pub subledger: SubledgerSnapshot,
821 pub ocpm: OcpmSnapshot,
823 pub audit: AuditSnapshot,
825 pub banking: BankingSnapshot,
827 pub graph_export: GraphExportSnapshot,
829 pub sourcing: SourcingSnapshot,
831 pub financial_reporting: FinancialReportingSnapshot,
833 pub hr: HrSnapshot,
835 pub accounting_standards: AccountingStandardsSnapshot,
837 pub manufacturing: ManufacturingSnapshot,
839 pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
841 pub tax: TaxSnapshot,
843 pub esg: EsgSnapshot,
845 pub treasury: TreasurySnapshot,
847 pub project_accounting: ProjectAccountingSnapshot,
849 pub process_evolution: Vec<ProcessEvolutionEvent>,
851 pub organizational_events: Vec<OrganizationalEvent>,
853 pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
855 pub intercompany: IntercompanySnapshot,
857 pub journal_entries: Vec<JournalEntry>,
859 pub anomaly_labels: AnomalyLabels,
861 pub balance_validation: BalanceValidationResult,
863 pub data_quality_stats: DataQualityStats,
865 pub statistics: EnhancedGenerationStatistics,
867 pub lineage: Option<super::lineage::LineageGraph>,
869 pub gate_result: Option<datasynth_eval::gates::GateResult>,
871 pub internal_controls: Vec<InternalControl>,
873 pub opening_balances: Vec<GeneratedOpeningBalance>,
875 pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
877 pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
879 pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
881 pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
883 pub temporal_vendor_chains:
885 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
886 pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
888 pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
890 pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
892 pub compliance_regulations: ComplianceRegulationsSnapshot,
894}
895
896#[derive(Debug, Clone, Default, Serialize, Deserialize)]
898pub struct EnhancedGenerationStatistics {
899 pub total_entries: u64,
901 pub total_line_items: u64,
903 pub accounts_count: usize,
905 pub companies_count: usize,
907 pub period_months: u32,
909 pub vendor_count: usize,
911 pub customer_count: usize,
912 pub material_count: usize,
913 pub asset_count: usize,
914 pub employee_count: usize,
915 pub p2p_chain_count: usize,
917 pub o2c_chain_count: usize,
918 pub ap_invoice_count: usize,
920 pub ar_invoice_count: usize,
921 pub ocpm_event_count: usize,
923 pub ocpm_object_count: usize,
924 pub ocpm_case_count: usize,
925 pub audit_engagement_count: usize,
927 pub audit_workpaper_count: usize,
928 pub audit_evidence_count: usize,
929 pub audit_risk_count: usize,
930 pub audit_finding_count: usize,
931 pub audit_judgment_count: usize,
932 #[serde(default)]
934 pub audit_confirmation_count: usize,
935 #[serde(default)]
936 pub audit_confirmation_response_count: usize,
937 #[serde(default)]
939 pub audit_procedure_step_count: usize,
940 #[serde(default)]
941 pub audit_sample_count: usize,
942 #[serde(default)]
944 pub audit_analytical_result_count: usize,
945 #[serde(default)]
947 pub audit_ia_function_count: usize,
948 #[serde(default)]
949 pub audit_ia_report_count: usize,
950 #[serde(default)]
952 pub audit_related_party_count: usize,
953 #[serde(default)]
954 pub audit_related_party_transaction_count: usize,
955 pub anomalies_injected: usize,
957 pub data_quality_issues: usize,
959 pub banking_customer_count: usize,
961 pub banking_account_count: usize,
962 pub banking_transaction_count: usize,
963 pub banking_suspicious_count: usize,
964 pub graph_export_count: usize,
966 pub graph_node_count: usize,
967 pub graph_edge_count: usize,
968 #[serde(default)]
970 pub llm_enrichment_ms: u64,
971 #[serde(default)]
973 pub llm_vendors_enriched: usize,
974 #[serde(default)]
976 pub diffusion_enhancement_ms: u64,
977 #[serde(default)]
979 pub diffusion_samples_generated: usize,
980 #[serde(default)]
982 pub causal_generation_ms: u64,
983 #[serde(default)]
985 pub causal_samples_generated: usize,
986 #[serde(default)]
988 pub causal_validation_passed: Option<bool>,
989 #[serde(default)]
991 pub sourcing_project_count: usize,
992 #[serde(default)]
993 pub rfx_event_count: usize,
994 #[serde(default)]
995 pub bid_count: usize,
996 #[serde(default)]
997 pub contract_count: usize,
998 #[serde(default)]
999 pub catalog_item_count: usize,
1000 #[serde(default)]
1001 pub scorecard_count: usize,
1002 #[serde(default)]
1004 pub financial_statement_count: usize,
1005 #[serde(default)]
1006 pub bank_reconciliation_count: usize,
1007 #[serde(default)]
1009 pub payroll_run_count: usize,
1010 #[serde(default)]
1011 pub time_entry_count: usize,
1012 #[serde(default)]
1013 pub expense_report_count: usize,
1014 #[serde(default)]
1015 pub benefit_enrollment_count: usize,
1016 #[serde(default)]
1018 pub revenue_contract_count: usize,
1019 #[serde(default)]
1020 pub impairment_test_count: usize,
1021 #[serde(default)]
1023 pub production_order_count: usize,
1024 #[serde(default)]
1025 pub quality_inspection_count: usize,
1026 #[serde(default)]
1027 pub cycle_count_count: usize,
1028 #[serde(default)]
1029 pub bom_component_count: usize,
1030 #[serde(default)]
1031 pub inventory_movement_count: usize,
1032 #[serde(default)]
1034 pub sales_quote_count: usize,
1035 #[serde(default)]
1036 pub kpi_count: usize,
1037 #[serde(default)]
1038 pub budget_line_count: usize,
1039 #[serde(default)]
1041 pub tax_jurisdiction_count: usize,
1042 #[serde(default)]
1043 pub tax_code_count: usize,
1044 #[serde(default)]
1046 pub esg_emission_count: usize,
1047 #[serde(default)]
1048 pub esg_disclosure_count: usize,
1049 #[serde(default)]
1051 pub ic_matched_pair_count: usize,
1052 #[serde(default)]
1053 pub ic_elimination_count: usize,
1054 #[serde(default)]
1056 pub ic_transaction_count: usize,
1057 #[serde(default)]
1059 pub fa_subledger_count: usize,
1060 #[serde(default)]
1062 pub inventory_subledger_count: usize,
1063 #[serde(default)]
1065 pub treasury_debt_instrument_count: usize,
1066 #[serde(default)]
1068 pub treasury_hedging_instrument_count: usize,
1069 #[serde(default)]
1071 pub project_count: usize,
1072 #[serde(default)]
1074 pub project_change_order_count: usize,
1075 #[serde(default)]
1077 pub tax_provision_count: usize,
1078 #[serde(default)]
1080 pub opening_balance_count: usize,
1081 #[serde(default)]
1083 pub subledger_reconciliation_count: usize,
1084 #[serde(default)]
1086 pub tax_line_count: usize,
1087 #[serde(default)]
1089 pub project_cost_line_count: usize,
1090 #[serde(default)]
1092 pub cash_position_count: usize,
1093 #[serde(default)]
1095 pub cash_forecast_count: usize,
1096 #[serde(default)]
1098 pub cash_pool_count: usize,
1099 #[serde(default)]
1101 pub process_evolution_event_count: usize,
1102 #[serde(default)]
1104 pub organizational_event_count: usize,
1105 #[serde(default)]
1107 pub counterfactual_pair_count: usize,
1108 #[serde(default)]
1110 pub red_flag_count: usize,
1111 #[serde(default)]
1113 pub collusion_ring_count: usize,
1114 #[serde(default)]
1116 pub temporal_version_chain_count: usize,
1117 #[serde(default)]
1119 pub entity_relationship_node_count: usize,
1120 #[serde(default)]
1122 pub entity_relationship_edge_count: usize,
1123 #[serde(default)]
1125 pub cross_process_link_count: usize,
1126 #[serde(default)]
1128 pub disruption_event_count: usize,
1129 #[serde(default)]
1131 pub industry_gl_account_count: usize,
1132}
1133
1134pub struct EnhancedOrchestrator {
1136 config: GeneratorConfig,
1137 phase_config: PhaseConfig,
1138 coa: Option<Arc<ChartOfAccounts>>,
1139 master_data: MasterDataSnapshot,
1140 seed: u64,
1141 multi_progress: Option<MultiProgress>,
1142 resource_guard: ResourceGuard,
1144 output_path: Option<PathBuf>,
1146 copula_generators: Vec<CopulaGeneratorSpec>,
1148 country_pack_registry: datasynth_core::CountryPackRegistry,
1150 phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1152}
1153
1154impl EnhancedOrchestrator {
1155 pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1157 datasynth_config::validate_config(&config)?;
1158
1159 let seed = config.global.seed.unwrap_or_else(rand::random);
1160
1161 let resource_guard = Self::build_resource_guard(&config, None);
1163
1164 let country_pack_registry = match &config.country_packs {
1166 Some(cp) => {
1167 datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1168 .map_err(|e| SynthError::config(e.to_string()))?
1169 }
1170 None => datasynth_core::CountryPackRegistry::builtin_only()
1171 .map_err(|e| SynthError::config(e.to_string()))?,
1172 };
1173
1174 Ok(Self {
1175 config,
1176 phase_config,
1177 coa: None,
1178 master_data: MasterDataSnapshot::default(),
1179 seed,
1180 multi_progress: None,
1181 resource_guard,
1182 output_path: None,
1183 copula_generators: Vec::new(),
1184 country_pack_registry,
1185 phase_sink: None,
1186 })
1187 }
1188
1189 pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1191 Self::new(config, PhaseConfig::default())
1192 }
1193
1194 pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1196 self.phase_sink = Some(sink);
1197 self
1198 }
1199
1200 fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1202 if let Some(ref sink) = self.phase_sink {
1203 for item in items {
1204 if let Ok(value) = serde_json::to_value(item) {
1205 if let Err(e) = sink.emit(phase, type_name, &value) {
1206 warn!(
1207 "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1208 );
1209 }
1210 }
1211 }
1212 if let Err(e) = sink.phase_complete(phase) {
1213 warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1214 }
1215 }
1216 }
1217
1218 pub fn with_progress(mut self, show: bool) -> Self {
1220 self.phase_config.show_progress = show;
1221 if show {
1222 self.multi_progress = Some(MultiProgress::new());
1223 }
1224 self
1225 }
1226
1227 pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1229 let path = path.into();
1230 self.output_path = Some(path.clone());
1231 self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1233 self
1234 }
1235
1236 pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1238 &self.country_pack_registry
1239 }
1240
1241 pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1243 self.country_pack_registry.get_by_str(country)
1244 }
1245
1246 fn primary_country_code(&self) -> &str {
1249 self.config
1250 .companies
1251 .first()
1252 .map(|c| c.country.as_str())
1253 .unwrap_or("US")
1254 }
1255
1256 fn primary_pack(&self) -> &datasynth_core::CountryPack {
1258 self.country_pack_for(self.primary_country_code())
1259 }
1260
1261 fn resolve_coa_framework(&self) -> CoAFramework {
1263 if self.config.accounting_standards.enabled {
1264 match self.config.accounting_standards.framework {
1265 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1266 return CoAFramework::FrenchPcg;
1267 }
1268 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1269 return CoAFramework::GermanSkr04;
1270 }
1271 _ => {}
1272 }
1273 }
1274 let pack = self.primary_pack();
1276 match pack.accounting.framework.as_str() {
1277 "french_gaap" => CoAFramework::FrenchPcg,
1278 "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1279 _ => CoAFramework::UsGaap,
1280 }
1281 }
1282
1283 pub fn has_copulas(&self) -> bool {
1288 !self.copula_generators.is_empty()
1289 }
1290
1291 pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1297 &self.copula_generators
1298 }
1299
1300 pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1304 &mut self.copula_generators
1305 }
1306
1307 pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1311 self.copula_generators
1312 .iter_mut()
1313 .find(|c| c.name == copula_name)
1314 .map(|c| c.generator.sample())
1315 }
1316
1317 pub fn from_fingerprint(
1340 fingerprint_path: &std::path::Path,
1341 phase_config: PhaseConfig,
1342 scale: f64,
1343 ) -> SynthResult<Self> {
1344 info!("Loading fingerprint from: {}", fingerprint_path.display());
1345
1346 let reader = FingerprintReader::new();
1348 let fingerprint = reader
1349 .read_from_file(fingerprint_path)
1350 .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1351
1352 Self::from_fingerprint_data(fingerprint, phase_config, scale)
1353 }
1354
1355 pub fn from_fingerprint_data(
1362 fingerprint: Fingerprint,
1363 phase_config: PhaseConfig,
1364 scale: f64,
1365 ) -> SynthResult<Self> {
1366 info!(
1367 "Synthesizing config from fingerprint (version: {}, tables: {})",
1368 fingerprint.manifest.version,
1369 fingerprint.schema.tables.len()
1370 );
1371
1372 let seed: u64 = rand::random();
1374
1375 let options = SynthesisOptions {
1377 scale,
1378 seed: Some(seed),
1379 preserve_correlations: true,
1380 inject_anomalies: true,
1381 };
1382 let synthesizer = ConfigSynthesizer::with_options(options);
1383
1384 let synthesis_result = synthesizer
1386 .synthesize_full(&fingerprint, seed)
1387 .map_err(|e| {
1388 SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1389 })?;
1390
1391 let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1393 Self::base_config_for_industry(industry)
1394 } else {
1395 Self::base_config_for_industry("manufacturing")
1396 };
1397
1398 config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1400
1401 info!(
1403 "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1404 fingerprint.schema.tables.len(),
1405 scale,
1406 synthesis_result.copula_generators.len()
1407 );
1408
1409 if !synthesis_result.copula_generators.is_empty() {
1410 for spec in &synthesis_result.copula_generators {
1411 info!(
1412 " Copula '{}' for table '{}': {} columns",
1413 spec.name,
1414 spec.table,
1415 spec.columns.len()
1416 );
1417 }
1418 }
1419
1420 let mut orchestrator = Self::new(config, phase_config)?;
1422
1423 orchestrator.copula_generators = synthesis_result.copula_generators;
1425
1426 Ok(orchestrator)
1427 }
1428
1429 fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1431 use datasynth_config::presets::create_preset;
1432 use datasynth_config::TransactionVolume;
1433 use datasynth_core::models::{CoAComplexity, IndustrySector};
1434
1435 let sector = match industry.to_lowercase().as_str() {
1436 "manufacturing" => IndustrySector::Manufacturing,
1437 "retail" => IndustrySector::Retail,
1438 "financial" | "financial_services" => IndustrySector::FinancialServices,
1439 "healthcare" => IndustrySector::Healthcare,
1440 "technology" | "tech" => IndustrySector::Technology,
1441 _ => IndustrySector::Manufacturing,
1442 };
1443
1444 create_preset(
1446 sector,
1447 1, 12, CoAComplexity::Medium,
1450 TransactionVolume::TenK,
1451 )
1452 }
1453
1454 fn apply_config_patch(
1456 mut config: GeneratorConfig,
1457 patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1458 ) -> GeneratorConfig {
1459 use datasynth_fingerprint::synthesis::ConfigValue;
1460
1461 for (key, value) in patch.values() {
1462 match (key.as_str(), value) {
1463 ("transactions.count", ConfigValue::Integer(n)) => {
1466 info!(
1467 "Fingerprint suggests {} transactions (apply via company volumes)",
1468 n
1469 );
1470 }
1471 ("global.period_months", ConfigValue::Integer(n)) => {
1472 config.global.period_months = (*n).clamp(1, 120) as u32;
1473 }
1474 ("global.start_date", ConfigValue::String(s)) => {
1475 config.global.start_date = s.clone();
1476 }
1477 ("global.seed", ConfigValue::Integer(n)) => {
1478 config.global.seed = Some(*n as u64);
1479 }
1480 ("fraud.enabled", ConfigValue::Bool(b)) => {
1481 config.fraud.enabled = *b;
1482 }
1483 ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1484 config.fraud.fraud_rate = *f;
1485 }
1486 ("data_quality.enabled", ConfigValue::Bool(b)) => {
1487 config.data_quality.enabled = *b;
1488 }
1489 ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1491 config.fraud.enabled = *b;
1492 }
1493 ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1494 config.fraud.fraud_rate = *f;
1495 }
1496 _ => {
1497 debug!("Ignoring unknown config patch key: {}", key);
1498 }
1499 }
1500 }
1501
1502 config
1503 }
1504
1505 fn build_resource_guard(
1507 config: &GeneratorConfig,
1508 output_path: Option<PathBuf>,
1509 ) -> ResourceGuard {
1510 let mut builder = ResourceGuardBuilder::new();
1511
1512 if config.global.memory_limit_mb > 0 {
1514 builder = builder.memory_limit(config.global.memory_limit_mb);
1515 }
1516
1517 if let Some(path) = output_path {
1519 builder = builder.output_path(path).min_free_disk(100); }
1521
1522 builder = builder.conservative();
1524
1525 builder.build()
1526 }
1527
1528 fn check_resources(&self) -> SynthResult<DegradationLevel> {
1533 self.resource_guard.check()
1534 }
1535
1536 fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1538 let level = self.resource_guard.check()?;
1539
1540 if level != DegradationLevel::Normal {
1541 warn!(
1542 "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1543 phase,
1544 level,
1545 self.resource_guard.current_memory_mb(),
1546 self.resource_guard.available_disk_mb()
1547 );
1548 }
1549
1550 Ok(level)
1551 }
1552
1553 fn get_degradation_actions(&self) -> DegradationActions {
1555 self.resource_guard.get_actions()
1556 }
1557
1558 fn check_memory_limit(&self) -> SynthResult<()> {
1560 self.check_resources()?;
1561 Ok(())
1562 }
1563
1564 pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1566 info!("Starting enhanced generation workflow");
1567 info!(
1568 "Config: industry={:?}, period_months={}, companies={}",
1569 self.config.global.industry,
1570 self.config.global.period_months,
1571 self.config.companies.len()
1572 );
1573
1574 let initial_level = self.check_resources_with_log("initial")?;
1576 if initial_level == DegradationLevel::Emergency {
1577 return Err(SynthError::resource(
1578 "Insufficient resources to start generation",
1579 ));
1580 }
1581
1582 let mut stats = EnhancedGenerationStatistics {
1583 companies_count: self.config.companies.len(),
1584 period_months: self.config.global.period_months,
1585 ..Default::default()
1586 };
1587
1588 let coa = self.phase_chart_of_accounts(&mut stats)?;
1590
1591 self.phase_master_data(&mut stats)?;
1593
1594 self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1596 self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1597 self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1598
1599 let (mut document_flows, subledger, fa_journal_entries) =
1601 self.phase_document_flows(&mut stats)?;
1602
1603 self.emit_phase_items(
1605 "document_flows",
1606 "PurchaseOrder",
1607 &document_flows.purchase_orders,
1608 );
1609 self.emit_phase_items(
1610 "document_flows",
1611 "GoodsReceipt",
1612 &document_flows.goods_receipts,
1613 );
1614 self.emit_phase_items(
1615 "document_flows",
1616 "VendorInvoice",
1617 &document_flows.vendor_invoices,
1618 );
1619 self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1620 self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1621
1622 let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1624
1625 let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1633
1634 if !fa_journal_entries.is_empty() {
1636 debug!(
1637 "Appending {} FA acquisition JEs to main entries",
1638 fa_journal_entries.len()
1639 );
1640 entries.extend(fa_journal_entries);
1641 }
1642
1643 let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1645
1646 let actions = self.get_degradation_actions();
1648
1649 let sourcing = self.phase_sourcing_data(&mut stats)?;
1651
1652 if !sourcing.contracts.is_empty() {
1654 let mut linked_count = 0usize;
1655 for chain in &mut document_flows.p2p_chains {
1656 if chain.purchase_order.contract_id.is_none() {
1657 if let Some(contract) = sourcing
1658 .contracts
1659 .iter()
1660 .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1661 {
1662 chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1663 linked_count += 1;
1664 }
1665 }
1666 }
1667 if linked_count > 0 {
1668 debug!(
1669 "Linked {} purchase orders to S2C contracts by vendor match",
1670 linked_count
1671 );
1672 }
1673 }
1674
1675 let intercompany = self.phase_intercompany(&mut stats)?;
1677
1678 if !intercompany.seller_journal_entries.is_empty()
1680 || !intercompany.buyer_journal_entries.is_empty()
1681 {
1682 let ic_je_count = intercompany.seller_journal_entries.len()
1683 + intercompany.buyer_journal_entries.len();
1684 entries.extend(intercompany.seller_journal_entries.iter().cloned());
1685 entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1686 debug!(
1687 "Appended {} IC journal entries to main entries",
1688 ic_je_count
1689 );
1690 }
1691
1692 let hr = self.phase_hr_data(&mut stats)?;
1694
1695 if !hr.payroll_runs.is_empty() {
1697 let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1698 debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1699 entries.extend(payroll_jes);
1700 }
1701
1702 let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1704
1705 if !manufacturing_snap.production_orders.is_empty() {
1707 let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1708 debug!("Generated {} JEs from production orders", mfg_jes.len());
1709 entries.extend(mfg_jes);
1710 }
1711
1712 if !entries.is_empty() {
1715 stats.total_entries = entries.len() as u64;
1716 stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1717 debug!(
1718 "Final entry count: {}, line items: {} (after all JE-generating phases)",
1719 stats.total_entries, stats.total_line_items
1720 );
1721 }
1722
1723 if self.config.internal_controls.enabled && !entries.is_empty() {
1725 info!("Phase 7b: Applying internal controls to journal entries");
1726 let control_config = ControlGeneratorConfig {
1727 exception_rate: self.config.internal_controls.exception_rate,
1728 sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1729 enable_sox_marking: true,
1730 sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1731 self.config.internal_controls.sox_materiality_threshold,
1732 )
1733 .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1734 ..Default::default()
1735 };
1736 let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1737 for entry in &mut entries {
1738 control_gen.apply_controls(entry, &coa);
1739 }
1740 let with_controls = entries
1741 .iter()
1742 .filter(|e| !e.header.control_ids.is_empty())
1743 .count();
1744 info!(
1745 "Applied controls to {} entries ({} with control IDs assigned)",
1746 entries.len(),
1747 with_controls
1748 );
1749 }
1750
1751 self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1753
1754 let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1756
1757 self.emit_phase_items(
1759 "anomaly_injection",
1760 "LabeledAnomaly",
1761 &anomaly_labels.labels,
1762 );
1763
1764 let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
1766
1767 self.emit_phase_items("red_flags", "RedFlag", &red_flags);
1769
1770 let collusion_rings = self.phase_collusion_rings(&mut stats)?;
1772
1773 self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
1775
1776 let balance_validation = self.phase_balance_validation(&entries)?;
1778
1779 let subledger_reconciliation =
1781 self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1782
1783 let data_quality_stats =
1785 self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1786
1787 let audit = self.phase_audit_data(&entries, &mut stats)?;
1789
1790 let banking = self.phase_banking_data(&mut stats)?;
1792
1793 let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1795
1796 self.phase_llm_enrichment(&mut stats);
1798
1799 self.phase_diffusion_enhancement(&mut stats);
1801
1802 self.phase_causal_overlay(&mut stats);
1804
1805 let financial_reporting =
1807 self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1808
1809 let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1811
1812 let ocpm = self.phase_ocpm_events(
1814 &document_flows,
1815 &sourcing,
1816 &hr,
1817 &manufacturing_snap,
1818 &banking,
1819 &audit,
1820 &financial_reporting,
1821 &mut stats,
1822 )?;
1823
1824 if let Some(ref event_log) = ocpm.event_log {
1826 self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1827 }
1828
1829 let sales_kpi_budgets =
1831 self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1832
1833 let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1835
1836 let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1838
1839 let treasury =
1841 self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
1842
1843 let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1845
1846 let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
1848
1849 let disruption_events = self.phase_disruption_events(&mut stats)?;
1851
1852 let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
1854
1855 let (entity_relationship_graph, cross_process_links) =
1857 self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
1858
1859 let industry_output = self.phase_industry_data(&mut stats);
1861
1862 let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
1864
1865 self.phase_hypergraph_export(
1867 &coa,
1868 &entries,
1869 &document_flows,
1870 &sourcing,
1871 &hr,
1872 &manufacturing_snap,
1873 &banking,
1874 &audit,
1875 &financial_reporting,
1876 &ocpm,
1877 &compliance_regulations,
1878 &mut stats,
1879 )?;
1880
1881 if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1884 self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1885 }
1886
1887 if self.config.streaming.enabled {
1889 info!("Note: streaming config is enabled but batch mode does not use it");
1890 }
1891 if self.config.vendor_network.enabled {
1892 debug!("Vendor network config available; relationship graph generation is partial");
1893 }
1894 if self.config.customer_segmentation.enabled {
1895 debug!("Customer segmentation config available; segment-aware generation is partial");
1896 }
1897
1898 let resource_stats = self.resource_guard.stats();
1900 info!(
1901 "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1902 resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1903 resource_stats.disk.estimated_bytes_written,
1904 resource_stats.degradation_level
1905 );
1906
1907 if let Some(ref sink) = self.phase_sink {
1909 if let Err(e) = sink.flush() {
1910 warn!("Stream sink flush failed: {e}");
1911 }
1912 }
1913
1914 let lineage = self.build_lineage_graph();
1916
1917 let gate_result = if self.config.quality_gates.enabled {
1919 let profile_name = &self.config.quality_gates.profile;
1920 match datasynth_eval::gates::get_profile(profile_name) {
1921 Some(profile) => {
1922 let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1924
1925 if balance_validation.validated {
1927 eval.coherence.balance =
1928 Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1929 equation_balanced: balance_validation.is_balanced,
1930 max_imbalance: (balance_validation.total_debits
1931 - balance_validation.total_credits)
1932 .abs(),
1933 periods_evaluated: 1,
1934 periods_imbalanced: if balance_validation.is_balanced {
1935 0
1936 } else {
1937 1
1938 },
1939 period_results: Vec::new(),
1940 companies_evaluated: self.config.companies.len(),
1941 });
1942 }
1943
1944 eval.coherence.passes = balance_validation.is_balanced;
1946 if !balance_validation.is_balanced {
1947 eval.coherence
1948 .failures
1949 .push("Balance sheet equation not satisfied".to_string());
1950 }
1951
1952 eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1954 eval.statistical.passes = !entries.is_empty();
1955
1956 eval.quality.overall_score = 0.9; eval.quality.passes = true;
1959
1960 let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1961 info!(
1962 "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1963 profile_name, result.gates_passed, result.gates_total, result.summary
1964 );
1965 Some(result)
1966 }
1967 None => {
1968 warn!(
1969 "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1970 profile_name
1971 );
1972 None
1973 }
1974 }
1975 } else {
1976 None
1977 };
1978
1979 let internal_controls = if self.config.internal_controls.enabled {
1981 InternalControl::standard_controls()
1982 } else {
1983 Vec::new()
1984 };
1985
1986 Ok(EnhancedGenerationResult {
1987 chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
1988 master_data: std::mem::take(&mut self.master_data),
1989 document_flows,
1990 subledger,
1991 ocpm,
1992 audit,
1993 banking,
1994 graph_export,
1995 sourcing,
1996 financial_reporting,
1997 hr,
1998 accounting_standards,
1999 manufacturing: manufacturing_snap,
2000 sales_kpi_budgets,
2001 tax,
2002 esg: esg_snap,
2003 treasury,
2004 project_accounting,
2005 process_evolution,
2006 organizational_events,
2007 disruption_events,
2008 intercompany,
2009 journal_entries: entries,
2010 anomaly_labels,
2011 balance_validation,
2012 data_quality_stats,
2013 statistics: stats,
2014 lineage: Some(lineage),
2015 gate_result,
2016 internal_controls,
2017 opening_balances,
2018 subledger_reconciliation,
2019 counterfactual_pairs,
2020 red_flags,
2021 collusion_rings,
2022 temporal_vendor_chains,
2023 entity_relationship_graph,
2024 cross_process_links,
2025 industry_output,
2026 compliance_regulations,
2027 })
2028 }
2029
2030 fn phase_chart_of_accounts(
2036 &mut self,
2037 stats: &mut EnhancedGenerationStatistics,
2038 ) -> SynthResult<Arc<ChartOfAccounts>> {
2039 info!("Phase 1: Generating Chart of Accounts");
2040 let coa = self.generate_coa()?;
2041 stats.accounts_count = coa.account_count();
2042 info!(
2043 "Chart of Accounts generated: {} accounts",
2044 stats.accounts_count
2045 );
2046 self.check_resources_with_log("post-coa")?;
2047 Ok(coa)
2048 }
2049
2050 fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2052 if self.phase_config.generate_master_data {
2053 info!("Phase 2: Generating Master Data");
2054 self.generate_master_data()?;
2055 stats.vendor_count = self.master_data.vendors.len();
2056 stats.customer_count = self.master_data.customers.len();
2057 stats.material_count = self.master_data.materials.len();
2058 stats.asset_count = self.master_data.assets.len();
2059 stats.employee_count = self.master_data.employees.len();
2060 info!(
2061 "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2062 stats.vendor_count, stats.customer_count, stats.material_count,
2063 stats.asset_count, stats.employee_count
2064 );
2065 self.check_resources_with_log("post-master-data")?;
2066 } else {
2067 debug!("Phase 2: Skipped (master data generation disabled)");
2068 }
2069 Ok(())
2070 }
2071
2072 fn phase_document_flows(
2074 &mut self,
2075 stats: &mut EnhancedGenerationStatistics,
2076 ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2077 let mut document_flows = DocumentFlowSnapshot::default();
2078 let mut subledger = SubledgerSnapshot::default();
2079
2080 if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2081 info!("Phase 3: Generating Document Flows");
2082 self.generate_document_flows(&mut document_flows)?;
2083 stats.p2p_chain_count = document_flows.p2p_chains.len();
2084 stats.o2c_chain_count = document_flows.o2c_chains.len();
2085 info!(
2086 "Document flows generated: {} P2P chains, {} O2C chains",
2087 stats.p2p_chain_count, stats.o2c_chain_count
2088 );
2089
2090 debug!("Phase 3b: Linking document flows to subledgers");
2092 subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2093 stats.ap_invoice_count = subledger.ap_invoices.len();
2094 stats.ar_invoice_count = subledger.ar_invoices.len();
2095 debug!(
2096 "Subledgers linked: {} AP invoices, {} AR invoices",
2097 stats.ap_invoice_count, stats.ar_invoice_count
2098 );
2099
2100 self.check_resources_with_log("post-document-flows")?;
2101 } else {
2102 debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2103 }
2104
2105 let mut fa_journal_entries = Vec::new();
2107 if !self.master_data.assets.is_empty() {
2108 debug!("Generating FA subledger records");
2109 let company_code = self
2110 .config
2111 .companies
2112 .first()
2113 .map(|c| c.code.as_str())
2114 .unwrap_or("1000");
2115 let currency = self
2116 .config
2117 .companies
2118 .first()
2119 .map(|c| c.currency.as_str())
2120 .unwrap_or("USD");
2121
2122 let mut fa_gen = datasynth_generators::FAGenerator::new(
2123 datasynth_generators::FAGeneratorConfig::default(),
2124 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2125 );
2126
2127 for asset in &self.master_data.assets {
2128 let (record, je) = fa_gen.generate_asset_acquisition(
2129 company_code,
2130 &format!("{:?}", asset.asset_class),
2131 &asset.description,
2132 asset.acquisition_date,
2133 currency,
2134 asset.cost_center.as_deref(),
2135 );
2136 subledger.fa_records.push(record);
2137 fa_journal_entries.push(je);
2138 }
2139
2140 stats.fa_subledger_count = subledger.fa_records.len();
2141 debug!(
2142 "FA subledger records generated: {} (with {} acquisition JEs)",
2143 stats.fa_subledger_count,
2144 fa_journal_entries.len()
2145 );
2146 }
2147
2148 if !self.master_data.materials.is_empty() {
2150 debug!("Generating Inventory subledger records");
2151 let first_company = self.config.companies.first();
2152 let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2153 let inv_currency = first_company
2154 .map(|c| c.currency.clone())
2155 .unwrap_or_else(|| "USD".to_string());
2156
2157 let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2158 datasynth_generators::InventoryGeneratorConfig::default(),
2159 rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2160 inv_currency.clone(),
2161 );
2162
2163 for (i, material) in self.master_data.materials.iter().enumerate() {
2164 let plant = format!("PLANT{:02}", (i % 3) + 1);
2165 let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2166 let initial_qty = rust_decimal::Decimal::from(
2167 material
2168 .safety_stock
2169 .to_string()
2170 .parse::<i64>()
2171 .unwrap_or(100),
2172 );
2173
2174 let position = inv_gen.generate_position(
2175 company_code,
2176 &plant,
2177 &storage_loc,
2178 &material.material_id,
2179 &material.description,
2180 initial_qty,
2181 Some(material.standard_cost),
2182 &inv_currency,
2183 );
2184 subledger.inventory_positions.push(position);
2185 }
2186
2187 stats.inventory_subledger_count = subledger.inventory_positions.len();
2188 debug!(
2189 "Inventory subledger records generated: {}",
2190 stats.inventory_subledger_count
2191 );
2192 }
2193
2194 Ok((document_flows, subledger, fa_journal_entries))
2195 }
2196
2197 #[allow(clippy::too_many_arguments)]
2199 fn phase_ocpm_events(
2200 &mut self,
2201 document_flows: &DocumentFlowSnapshot,
2202 sourcing: &SourcingSnapshot,
2203 hr: &HrSnapshot,
2204 manufacturing: &ManufacturingSnapshot,
2205 banking: &BankingSnapshot,
2206 audit: &AuditSnapshot,
2207 financial_reporting: &FinancialReportingSnapshot,
2208 stats: &mut EnhancedGenerationStatistics,
2209 ) -> SynthResult<OcpmSnapshot> {
2210 if self.phase_config.generate_ocpm_events {
2211 info!("Phase 3c: Generating OCPM Events");
2212 let ocpm_snapshot = self.generate_ocpm_events(
2213 document_flows,
2214 sourcing,
2215 hr,
2216 manufacturing,
2217 banking,
2218 audit,
2219 financial_reporting,
2220 )?;
2221 stats.ocpm_event_count = ocpm_snapshot.event_count;
2222 stats.ocpm_object_count = ocpm_snapshot.object_count;
2223 stats.ocpm_case_count = ocpm_snapshot.case_count;
2224 info!(
2225 "OCPM events generated: {} events, {} objects, {} cases",
2226 stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2227 );
2228 self.check_resources_with_log("post-ocpm")?;
2229 Ok(ocpm_snapshot)
2230 } else {
2231 debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2232 Ok(OcpmSnapshot::default())
2233 }
2234 }
2235
2236 fn phase_journal_entries(
2238 &mut self,
2239 coa: &Arc<ChartOfAccounts>,
2240 document_flows: &DocumentFlowSnapshot,
2241 _stats: &mut EnhancedGenerationStatistics,
2242 ) -> SynthResult<Vec<JournalEntry>> {
2243 let mut entries = Vec::new();
2244
2245 if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2247 debug!("Phase 4a: Generating JEs from document flows");
2248 let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2249 debug!("Generated {} JEs from document flows", flow_entries.len());
2250 entries.extend(flow_entries);
2251 }
2252
2253 if self.phase_config.generate_journal_entries {
2255 info!("Phase 4: Generating Journal Entries");
2256 let je_entries = self.generate_journal_entries(coa)?;
2257 info!("Generated {} standalone journal entries", je_entries.len());
2258 entries.extend(je_entries);
2259 } else {
2260 debug!("Phase 4: Skipped (journal entry generation disabled)");
2261 }
2262
2263 if !entries.is_empty() {
2264 self.check_resources_with_log("post-journal-entries")?;
2267 }
2268
2269 Ok(entries)
2270 }
2271
2272 fn phase_anomaly_injection(
2274 &mut self,
2275 entries: &mut [JournalEntry],
2276 actions: &DegradationActions,
2277 stats: &mut EnhancedGenerationStatistics,
2278 ) -> SynthResult<AnomalyLabels> {
2279 if self.phase_config.inject_anomalies
2280 && !entries.is_empty()
2281 && !actions.skip_anomaly_injection
2282 {
2283 info!("Phase 5: Injecting Anomalies");
2284 let result = self.inject_anomalies(entries)?;
2285 stats.anomalies_injected = result.labels.len();
2286 info!("Injected {} anomalies", stats.anomalies_injected);
2287 self.check_resources_with_log("post-anomaly-injection")?;
2288 Ok(result)
2289 } else if actions.skip_anomaly_injection {
2290 warn!("Phase 5: Skipped due to resource degradation");
2291 Ok(AnomalyLabels::default())
2292 } else {
2293 debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2294 Ok(AnomalyLabels::default())
2295 }
2296 }
2297
2298 fn phase_balance_validation(
2300 &mut self,
2301 entries: &[JournalEntry],
2302 ) -> SynthResult<BalanceValidationResult> {
2303 if self.phase_config.validate_balances && !entries.is_empty() {
2304 debug!("Phase 6: Validating Balances");
2305 let balance_validation = self.validate_journal_entries(entries)?;
2306 if balance_validation.is_balanced {
2307 debug!("Balance validation passed");
2308 } else {
2309 warn!(
2310 "Balance validation found {} errors",
2311 balance_validation.validation_errors.len()
2312 );
2313 }
2314 Ok(balance_validation)
2315 } else {
2316 Ok(BalanceValidationResult::default())
2317 }
2318 }
2319
2320 fn phase_data_quality_injection(
2322 &mut self,
2323 entries: &mut [JournalEntry],
2324 actions: &DegradationActions,
2325 stats: &mut EnhancedGenerationStatistics,
2326 ) -> SynthResult<DataQualityStats> {
2327 if self.phase_config.inject_data_quality
2328 && !entries.is_empty()
2329 && !actions.skip_data_quality
2330 {
2331 info!("Phase 7: Injecting Data Quality Variations");
2332 let dq_stats = self.inject_data_quality(entries)?;
2333 stats.data_quality_issues = dq_stats.records_with_issues;
2334 info!("Injected {} data quality issues", stats.data_quality_issues);
2335 self.check_resources_with_log("post-data-quality")?;
2336 Ok(dq_stats)
2337 } else if actions.skip_data_quality {
2338 warn!("Phase 7: Skipped due to resource degradation");
2339 Ok(DataQualityStats::default())
2340 } else {
2341 debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2342 Ok(DataQualityStats::default())
2343 }
2344 }
2345
2346 fn phase_audit_data(
2348 &mut self,
2349 entries: &[JournalEntry],
2350 stats: &mut EnhancedGenerationStatistics,
2351 ) -> SynthResult<AuditSnapshot> {
2352 if self.phase_config.generate_audit {
2353 info!("Phase 8: Generating Audit Data");
2354 let audit_snapshot = self.generate_audit_data(entries)?;
2355 stats.audit_engagement_count = audit_snapshot.engagements.len();
2356 stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2357 stats.audit_evidence_count = audit_snapshot.evidence.len();
2358 stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2359 stats.audit_finding_count = audit_snapshot.findings.len();
2360 stats.audit_judgment_count = audit_snapshot.judgments.len();
2361 stats.audit_confirmation_count = audit_snapshot.confirmations.len();
2362 stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
2363 stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
2364 stats.audit_sample_count = audit_snapshot.samples.len();
2365 stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
2366 stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
2367 stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
2368 stats.audit_related_party_count = audit_snapshot.related_parties.len();
2369 stats.audit_related_party_transaction_count =
2370 audit_snapshot.related_party_transactions.len();
2371 info!(
2372 "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
2373 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
2374 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
2375 {} RP transactions",
2376 stats.audit_engagement_count,
2377 stats.audit_workpaper_count,
2378 stats.audit_evidence_count,
2379 stats.audit_risk_count,
2380 stats.audit_finding_count,
2381 stats.audit_judgment_count,
2382 stats.audit_confirmation_count,
2383 stats.audit_procedure_step_count,
2384 stats.audit_sample_count,
2385 stats.audit_analytical_result_count,
2386 stats.audit_ia_function_count,
2387 stats.audit_ia_report_count,
2388 stats.audit_related_party_count,
2389 stats.audit_related_party_transaction_count,
2390 );
2391 self.check_resources_with_log("post-audit")?;
2392 Ok(audit_snapshot)
2393 } else {
2394 debug!("Phase 8: Skipped (audit generation disabled)");
2395 Ok(AuditSnapshot::default())
2396 }
2397 }
2398
2399 fn phase_banking_data(
2401 &mut self,
2402 stats: &mut EnhancedGenerationStatistics,
2403 ) -> SynthResult<BankingSnapshot> {
2404 if self.phase_config.generate_banking && self.config.banking.enabled {
2405 info!("Phase 9: Generating Banking KYC/AML Data");
2406 let banking_snapshot = self.generate_banking_data()?;
2407 stats.banking_customer_count = banking_snapshot.customers.len();
2408 stats.banking_account_count = banking_snapshot.accounts.len();
2409 stats.banking_transaction_count = banking_snapshot.transactions.len();
2410 stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2411 info!(
2412 "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2413 stats.banking_customer_count, stats.banking_account_count,
2414 stats.banking_transaction_count, stats.banking_suspicious_count
2415 );
2416 self.check_resources_with_log("post-banking")?;
2417 Ok(banking_snapshot)
2418 } else {
2419 debug!("Phase 9: Skipped (banking generation disabled)");
2420 Ok(BankingSnapshot::default())
2421 }
2422 }
2423
2424 fn phase_graph_export(
2426 &mut self,
2427 entries: &[JournalEntry],
2428 coa: &Arc<ChartOfAccounts>,
2429 stats: &mut EnhancedGenerationStatistics,
2430 ) -> SynthResult<GraphExportSnapshot> {
2431 if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2432 && !entries.is_empty()
2433 {
2434 info!("Phase 10: Exporting Accounting Network Graphs");
2435 match self.export_graphs(entries, coa, stats) {
2436 Ok(snapshot) => {
2437 info!(
2438 "Graph export complete: {} graphs ({} nodes, {} edges)",
2439 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2440 );
2441 Ok(snapshot)
2442 }
2443 Err(e) => {
2444 warn!("Phase 10: Graph export failed: {}", e);
2445 Ok(GraphExportSnapshot::default())
2446 }
2447 }
2448 } else {
2449 debug!("Phase 10: Skipped (graph export disabled or no entries)");
2450 Ok(GraphExportSnapshot::default())
2451 }
2452 }
2453
2454 #[allow(clippy::too_many_arguments)]
2456 fn phase_hypergraph_export(
2457 &self,
2458 coa: &Arc<ChartOfAccounts>,
2459 entries: &[JournalEntry],
2460 document_flows: &DocumentFlowSnapshot,
2461 sourcing: &SourcingSnapshot,
2462 hr: &HrSnapshot,
2463 manufacturing: &ManufacturingSnapshot,
2464 banking: &BankingSnapshot,
2465 audit: &AuditSnapshot,
2466 financial_reporting: &FinancialReportingSnapshot,
2467 ocpm: &OcpmSnapshot,
2468 compliance: &ComplianceRegulationsSnapshot,
2469 stats: &mut EnhancedGenerationStatistics,
2470 ) -> SynthResult<()> {
2471 if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2472 info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2473 match self.export_hypergraph(
2474 coa,
2475 entries,
2476 document_flows,
2477 sourcing,
2478 hr,
2479 manufacturing,
2480 banking,
2481 audit,
2482 financial_reporting,
2483 ocpm,
2484 compliance,
2485 stats,
2486 ) {
2487 Ok(info) => {
2488 info!(
2489 "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2490 info.node_count, info.edge_count, info.hyperedge_count
2491 );
2492 }
2493 Err(e) => {
2494 warn!("Phase 10b: Hypergraph export failed: {}", e);
2495 }
2496 }
2497 } else {
2498 debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2499 }
2500 Ok(())
2501 }
2502
2503 fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2509 if !self.config.llm.enabled {
2510 debug!("Phase 11: Skipped (LLM enrichment disabled)");
2511 return;
2512 }
2513
2514 info!("Phase 11: Starting LLM Enrichment");
2515 let start = std::time::Instant::now();
2516
2517 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2518 let provider = Arc::new(MockLlmProvider::new(self.seed));
2519 let enricher = VendorLlmEnricher::new(provider);
2520
2521 let industry = format!("{:?}", self.config.global.industry);
2522 let max_enrichments = self
2523 .config
2524 .llm
2525 .max_vendor_enrichments
2526 .min(self.master_data.vendors.len());
2527
2528 let mut enriched_count = 0usize;
2529 for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2530 match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2531 Ok(name) => {
2532 vendor.name = name;
2533 enriched_count += 1;
2534 }
2535 Err(e) => {
2536 warn!(
2537 "LLM vendor enrichment failed for {}: {}",
2538 vendor.vendor_id, e
2539 );
2540 }
2541 }
2542 }
2543
2544 enriched_count
2545 }));
2546
2547 match result {
2548 Ok(enriched_count) => {
2549 stats.llm_vendors_enriched = enriched_count;
2550 let elapsed = start.elapsed();
2551 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2552 info!(
2553 "Phase 11 complete: {} vendors enriched in {}ms",
2554 enriched_count, stats.llm_enrichment_ms
2555 );
2556 }
2557 Err(_) => {
2558 let elapsed = start.elapsed();
2559 stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2560 warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2561 }
2562 }
2563 }
2564
2565 fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2571 if !self.config.diffusion.enabled {
2572 debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2573 return;
2574 }
2575
2576 info!("Phase 12: Starting Diffusion Enhancement");
2577 let start = std::time::Instant::now();
2578
2579 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2580 let means = vec![5000.0, 3.0, 2.0]; let stds = vec![2000.0, 1.5, 1.0];
2583
2584 let diffusion_config = DiffusionConfig {
2585 n_steps: self.config.diffusion.n_steps,
2586 seed: self.seed,
2587 ..Default::default()
2588 };
2589
2590 let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2591
2592 let n_samples = self.config.diffusion.sample_size;
2593 let n_features = 3; let samples = backend.generate(n_samples, n_features, self.seed);
2595
2596 samples.len()
2597 }));
2598
2599 match result {
2600 Ok(sample_count) => {
2601 stats.diffusion_samples_generated = sample_count;
2602 let elapsed = start.elapsed();
2603 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2604 info!(
2605 "Phase 12 complete: {} diffusion samples generated in {}ms",
2606 sample_count, stats.diffusion_enhancement_ms
2607 );
2608 }
2609 Err(_) => {
2610 let elapsed = start.elapsed();
2611 stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2612 warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2613 }
2614 }
2615 }
2616
2617 fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2624 if !self.config.causal.enabled {
2625 debug!("Phase 13: Skipped (causal generation disabled)");
2626 return;
2627 }
2628
2629 info!("Phase 13: Starting Causal Overlay");
2630 let start = std::time::Instant::now();
2631
2632 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2633 let graph = match self.config.causal.template.as_str() {
2635 "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2636 _ => CausalGraph::fraud_detection_template(),
2637 };
2638
2639 let scm = StructuralCausalModel::new(graph.clone())
2640 .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
2641
2642 let n_samples = self.config.causal.sample_size;
2643 let samples = scm
2644 .generate(n_samples, self.seed)
2645 .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
2646
2647 let validation_passed = if self.config.causal.validate {
2649 let report = CausalValidator::validate_causal_structure(&samples, &graph);
2650 if report.valid {
2651 info!(
2652 "Causal validation passed: all {} checks OK",
2653 report.checks.len()
2654 );
2655 } else {
2656 warn!(
2657 "Causal validation: {} violations detected: {:?}",
2658 report.violations.len(),
2659 report.violations
2660 );
2661 }
2662 Some(report.valid)
2663 } else {
2664 None
2665 };
2666
2667 Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2668 }));
2669
2670 match result {
2671 Ok(Ok((sample_count, validation_passed))) => {
2672 stats.causal_samples_generated = sample_count;
2673 stats.causal_validation_passed = validation_passed;
2674 let elapsed = start.elapsed();
2675 stats.causal_generation_ms = elapsed.as_millis() as u64;
2676 info!(
2677 "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2678 sample_count, stats.causal_generation_ms, validation_passed,
2679 );
2680 }
2681 Ok(Err(e)) => {
2682 let elapsed = start.elapsed();
2683 stats.causal_generation_ms = elapsed.as_millis() as u64;
2684 warn!("Phase 13: Causal generation failed: {}", e);
2685 }
2686 Err(_) => {
2687 let elapsed = start.elapsed();
2688 stats.causal_generation_ms = elapsed.as_millis() as u64;
2689 warn!("Phase 13: Causal generation failed (panic caught), continuing");
2690 }
2691 }
2692 }
2693
2694 fn phase_sourcing_data(
2696 &mut self,
2697 stats: &mut EnhancedGenerationStatistics,
2698 ) -> SynthResult<SourcingSnapshot> {
2699 if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2700 debug!("Phase 14: Skipped (sourcing generation disabled)");
2701 return Ok(SourcingSnapshot::default());
2702 }
2703
2704 info!("Phase 14: Generating S2C Sourcing Data");
2705 let seed = self.seed;
2706
2707 let vendor_ids: Vec<String> = self
2709 .master_data
2710 .vendors
2711 .iter()
2712 .map(|v| v.vendor_id.clone())
2713 .collect();
2714 if vendor_ids.is_empty() {
2715 debug!("Phase 14: Skipped (no vendors available)");
2716 return Ok(SourcingSnapshot::default());
2717 }
2718
2719 let categories: Vec<(String, String)> = vec![
2720 ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2721 ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2722 ("CAT-IT".to_string(), "IT Equipment".to_string()),
2723 ("CAT-SVC".to_string(), "Professional Services".to_string()),
2724 ("CAT-LOG".to_string(), "Logistics".to_string()),
2725 ];
2726 let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2727 .iter()
2728 .map(|(id, name)| {
2729 (
2730 id.clone(),
2731 name.clone(),
2732 rust_decimal::Decimal::from(100_000),
2733 )
2734 })
2735 .collect();
2736
2737 let company_code = self
2738 .config
2739 .companies
2740 .first()
2741 .map(|c| c.code.as_str())
2742 .unwrap_or("1000");
2743 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2744 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2745 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2746 let fiscal_year = start_date.year() as u16;
2747 let owner_ids: Vec<String> = self
2748 .master_data
2749 .employees
2750 .iter()
2751 .take(5)
2752 .map(|e| e.employee_id.clone())
2753 .collect();
2754 let owner_id = owner_ids
2755 .first()
2756 .map(std::string::String::as_str)
2757 .unwrap_or("BUYER-001");
2758
2759 let mut spend_gen = SpendAnalysisGenerator::new(seed);
2761 let spend_analyses =
2762 spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2763
2764 let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2766 let sourcing_projects = if owner_ids.is_empty() {
2767 Vec::new()
2768 } else {
2769 project_gen.generate(
2770 company_code,
2771 &categories_with_spend,
2772 &owner_ids,
2773 start_date,
2774 self.config.global.period_months,
2775 )
2776 };
2777 stats.sourcing_project_count = sourcing_projects.len();
2778
2779 let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2781 let mut qual_gen = QualificationGenerator::new(seed + 2);
2782 let qualifications = qual_gen.generate(
2783 company_code,
2784 &qual_vendor_ids,
2785 sourcing_projects.first().map(|p| p.project_id.as_str()),
2786 owner_id,
2787 start_date,
2788 );
2789
2790 let mut rfx_gen = RfxGenerator::new(seed + 3);
2792 let rfx_events: Vec<RfxEvent> = sourcing_projects
2793 .iter()
2794 .map(|proj| {
2795 let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2796 rfx_gen.generate(
2797 company_code,
2798 &proj.project_id,
2799 &proj.category_id,
2800 &qualified_vids,
2801 owner_id,
2802 start_date,
2803 50000.0,
2804 )
2805 })
2806 .collect();
2807 stats.rfx_event_count = rfx_events.len();
2808
2809 let mut bid_gen = BidGenerator::new(seed + 4);
2811 let mut all_bids = Vec::new();
2812 for rfx in &rfx_events {
2813 let bidder_count = vendor_ids.len().clamp(2, 5);
2814 let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2815 let bids = bid_gen.generate(rfx, &responding, start_date);
2816 all_bids.extend(bids);
2817 }
2818 stats.bid_count = all_bids.len();
2819
2820 let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2822 let bid_evaluations: Vec<BidEvaluation> = rfx_events
2823 .iter()
2824 .map(|rfx| {
2825 let rfx_bids: Vec<SupplierBid> = all_bids
2826 .iter()
2827 .filter(|b| b.rfx_id == rfx.rfx_id)
2828 .cloned()
2829 .collect();
2830 eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2831 })
2832 .collect();
2833
2834 let mut contract_gen = ContractGenerator::new(seed + 6);
2836 let contracts: Vec<ProcurementContract> = bid_evaluations
2837 .iter()
2838 .zip(rfx_events.iter())
2839 .filter_map(|(eval, rfx)| {
2840 eval.ranked_bids.first().and_then(|winner| {
2841 all_bids
2842 .iter()
2843 .find(|b| b.bid_id == winner.bid_id)
2844 .map(|winning_bid| {
2845 contract_gen.generate_from_bid(
2846 winning_bid,
2847 Some(&rfx.sourcing_project_id),
2848 &rfx.category_id,
2849 owner_id,
2850 start_date,
2851 )
2852 })
2853 })
2854 })
2855 .collect();
2856 stats.contract_count = contracts.len();
2857
2858 let mut catalog_gen = CatalogGenerator::new(seed + 7);
2860 let catalog_items = catalog_gen.generate(&contracts);
2861 stats.catalog_item_count = catalog_items.len();
2862
2863 let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2865 let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2866 .iter()
2867 .fold(
2868 std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2869 |mut acc, c| {
2870 acc.entry(c.vendor_id.clone()).or_default().push(c);
2871 acc
2872 },
2873 )
2874 .into_iter()
2875 .collect();
2876 let scorecards = scorecard_gen.generate(
2877 company_code,
2878 &vendor_contracts,
2879 start_date,
2880 end_date,
2881 owner_id,
2882 );
2883 stats.scorecard_count = scorecards.len();
2884
2885 let mut sourcing_projects = sourcing_projects;
2888 for project in &mut sourcing_projects {
2889 project.rfx_ids = rfx_events
2891 .iter()
2892 .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2893 .map(|rfx| rfx.rfx_id.clone())
2894 .collect();
2895
2896 project.contract_id = contracts
2898 .iter()
2899 .find(|c| {
2900 c.sourcing_project_id
2901 .as_deref()
2902 .is_some_and(|sp| sp == project.project_id)
2903 })
2904 .map(|c| c.contract_id.clone());
2905
2906 project.spend_analysis_id = spend_analyses
2908 .iter()
2909 .find(|sa| sa.category_id == project.category_id)
2910 .map(|sa| sa.category_id.clone());
2911 }
2912
2913 info!(
2914 "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2915 stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2916 stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2917 );
2918 self.check_resources_with_log("post-sourcing")?;
2919
2920 Ok(SourcingSnapshot {
2921 spend_analyses,
2922 sourcing_projects,
2923 qualifications,
2924 rfx_events,
2925 bids: all_bids,
2926 bid_evaluations,
2927 contracts,
2928 catalog_items,
2929 scorecards,
2930 })
2931 }
2932
2933 fn phase_intercompany(
2935 &mut self,
2936 stats: &mut EnhancedGenerationStatistics,
2937 ) -> SynthResult<IntercompanySnapshot> {
2938 if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2940 debug!("Phase 14b: Skipped (intercompany generation disabled)");
2941 return Ok(IntercompanySnapshot::default());
2942 }
2943
2944 if self.config.companies.len() < 2 {
2946 debug!(
2947 "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2948 self.config.companies.len()
2949 );
2950 return Ok(IntercompanySnapshot::default());
2951 }
2952
2953 info!("Phase 14b: Generating Intercompany Transactions");
2954
2955 let seed = self.seed;
2956 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2957 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2958 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2959
2960 let parent_code = self.config.companies[0].code.clone();
2963 let mut ownership_structure =
2964 datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2965
2966 for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2967 let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2968 format!("REL{:03}", i + 1),
2969 parent_code.clone(),
2970 company.code.clone(),
2971 rust_decimal::Decimal::from(100), start_date,
2973 );
2974 ownership_structure.add_relationship(relationship);
2975 }
2976
2977 let tp_method = match self.config.intercompany.transfer_pricing_method {
2979 datasynth_config::schema::TransferPricingMethod::CostPlus => {
2980 datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2981 }
2982 datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2983 datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2984 }
2985 datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2986 datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2987 }
2988 datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2989 datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2990 }
2991 datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2992 datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2993 }
2994 };
2995
2996 let ic_currency = self
2998 .config
2999 .companies
3000 .first()
3001 .map(|c| c.currency.clone())
3002 .unwrap_or_else(|| "USD".to_string());
3003 let ic_gen_config = datasynth_generators::ICGeneratorConfig {
3004 ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
3005 transfer_pricing_method: tp_method,
3006 markup_percent: rust_decimal::Decimal::from_f64_retain(
3007 self.config.intercompany.markup_percent,
3008 )
3009 .unwrap_or(rust_decimal::Decimal::from(5)),
3010 generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
3011 default_currency: ic_currency,
3012 ..Default::default()
3013 };
3014
3015 let mut ic_generator = datasynth_generators::ICGenerator::new(
3017 ic_gen_config,
3018 ownership_structure.clone(),
3019 seed + 50,
3020 );
3021
3022 let transactions_per_day = 3;
3025 let matched_pairs = ic_generator.generate_transactions_for_period(
3026 start_date,
3027 end_date,
3028 transactions_per_day,
3029 );
3030
3031 let mut seller_entries = Vec::new();
3033 let mut buyer_entries = Vec::new();
3034 let fiscal_year = start_date.year();
3035
3036 for pair in &matched_pairs {
3037 let fiscal_period = pair.posting_date.month();
3038 let (seller_je, buyer_je) =
3039 ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
3040 seller_entries.push(seller_je);
3041 buyer_entries.push(buyer_je);
3042 }
3043
3044 let matching_config = datasynth_generators::ICMatchingConfig {
3046 base_currency: self
3047 .config
3048 .companies
3049 .first()
3050 .map(|c| c.currency.clone())
3051 .unwrap_or_else(|| "USD".to_string()),
3052 ..Default::default()
3053 };
3054 let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
3055 matching_engine.load_matched_pairs(&matched_pairs);
3056 let matching_result = matching_engine.run_matching(end_date);
3057
3058 let mut elimination_entries = Vec::new();
3060 if self.config.intercompany.generate_eliminations {
3061 let elim_config = datasynth_generators::EliminationConfig {
3062 consolidation_entity: "GROUP".to_string(),
3063 base_currency: self
3064 .config
3065 .companies
3066 .first()
3067 .map(|c| c.currency.clone())
3068 .unwrap_or_else(|| "USD".to_string()),
3069 ..Default::default()
3070 };
3071
3072 let mut elim_generator =
3073 datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
3074
3075 let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
3076 let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
3077 matching_result
3078 .matched_balances
3079 .iter()
3080 .chain(matching_result.unmatched_balances.iter())
3081 .cloned()
3082 .collect();
3083
3084 let journal = elim_generator.generate_eliminations(
3085 &fiscal_period,
3086 end_date,
3087 &all_balances,
3088 &matched_pairs,
3089 &std::collections::HashMap::new(), &std::collections::HashMap::new(), );
3092
3093 elimination_entries = journal.entries.clone();
3094 }
3095
3096 let matched_pair_count = matched_pairs.len();
3097 let elimination_entry_count = elimination_entries.len();
3098 let match_rate = matching_result.match_rate;
3099
3100 stats.ic_matched_pair_count = matched_pair_count;
3101 stats.ic_elimination_count = elimination_entry_count;
3102 stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
3103
3104 info!(
3105 "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3106 matched_pair_count,
3107 stats.ic_transaction_count,
3108 seller_entries.len(),
3109 buyer_entries.len(),
3110 elimination_entry_count,
3111 match_rate * 100.0
3112 );
3113 self.check_resources_with_log("post-intercompany")?;
3114
3115 Ok(IntercompanySnapshot {
3116 matched_pairs,
3117 seller_journal_entries: seller_entries,
3118 buyer_journal_entries: buyer_entries,
3119 elimination_entries,
3120 matched_pair_count,
3121 elimination_entry_count,
3122 match_rate,
3123 })
3124 }
3125
3126 fn phase_financial_reporting(
3128 &mut self,
3129 document_flows: &DocumentFlowSnapshot,
3130 journal_entries: &[JournalEntry],
3131 coa: &Arc<ChartOfAccounts>,
3132 stats: &mut EnhancedGenerationStatistics,
3133 ) -> SynthResult<FinancialReportingSnapshot> {
3134 let fs_enabled = self.phase_config.generate_financial_statements
3135 || self.config.financial_reporting.enabled;
3136 let br_enabled = self.phase_config.generate_bank_reconciliation;
3137
3138 if !fs_enabled && !br_enabled {
3139 debug!("Phase 15: Skipped (financial reporting disabled)");
3140 return Ok(FinancialReportingSnapshot::default());
3141 }
3142
3143 info!("Phase 15: Generating Financial Reporting Data");
3144
3145 let seed = self.seed;
3146 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3147 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3148
3149 let mut financial_statements = Vec::new();
3150 let mut bank_reconciliations = Vec::new();
3151 let mut trial_balances = Vec::new();
3152
3153 if fs_enabled {
3161 let company_code = self
3162 .config
3163 .companies
3164 .first()
3165 .map(|c| c.code.as_str())
3166 .unwrap_or("1000");
3167 let currency = self
3168 .config
3169 .companies
3170 .first()
3171 .map(|c| c.currency.as_str())
3172 .unwrap_or("USD");
3173 let has_journal_entries = !journal_entries.is_empty();
3174
3175 let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
3178
3179 let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
3181 None;
3182
3183 for period in 0..self.config.global.period_months {
3185 let period_start = start_date + chrono::Months::new(period);
3186 let period_end =
3187 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3188 let fiscal_year = period_end.year() as u16;
3189 let fiscal_period = period_end.month() as u8;
3190
3191 if has_journal_entries {
3192 let tb_entries = Self::build_cumulative_trial_balance(
3195 journal_entries,
3196 coa,
3197 company_code,
3198 start_date,
3199 period_end,
3200 fiscal_year,
3201 fiscal_period,
3202 );
3203
3204 let prior_ref = prior_cumulative_tb.as_deref();
3207 let stmts = fs_gen.generate(
3208 company_code,
3209 currency,
3210 &tb_entries,
3211 period_start,
3212 period_end,
3213 fiscal_year,
3214 fiscal_period,
3215 prior_ref,
3216 "SYS-AUTOCLOSE",
3217 );
3218
3219 for stmt in stmts {
3221 if stmt.statement_type == StatementType::CashFlowStatement {
3222 let net_income = Self::calculate_net_income_from_tb(&tb_entries);
3224 let cf_items = Self::build_cash_flow_from_trial_balances(
3225 &tb_entries,
3226 prior_ref,
3227 net_income,
3228 );
3229 financial_statements.push(FinancialStatement {
3230 cash_flow_items: cf_items,
3231 ..stmt
3232 });
3233 } else {
3234 financial_statements.push(stmt);
3235 }
3236 }
3237
3238 trial_balances.push(PeriodTrialBalance {
3240 fiscal_year,
3241 fiscal_period,
3242 period_start,
3243 period_end,
3244 entries: tb_entries.clone(),
3245 });
3246
3247 prior_cumulative_tb = Some(tb_entries);
3249 } else {
3250 let tb_entries = Self::build_trial_balance_from_entries(
3253 journal_entries,
3254 coa,
3255 company_code,
3256 fiscal_year,
3257 fiscal_period,
3258 );
3259
3260 let stmts = fs_gen.generate(
3261 company_code,
3262 currency,
3263 &tb_entries,
3264 period_start,
3265 period_end,
3266 fiscal_year,
3267 fiscal_period,
3268 None,
3269 "SYS-AUTOCLOSE",
3270 );
3271 financial_statements.extend(stmts);
3272
3273 if !tb_entries.is_empty() {
3275 trial_balances.push(PeriodTrialBalance {
3276 fiscal_year,
3277 fiscal_period,
3278 period_start,
3279 period_end,
3280 entries: tb_entries,
3281 });
3282 }
3283 }
3284 }
3285 stats.financial_statement_count = financial_statements.len();
3286 info!(
3287 "Financial statements generated: {} statements (JE-derived: {})",
3288 stats.financial_statement_count, has_journal_entries
3289 );
3290 }
3291
3292 if br_enabled && !document_flows.payments.is_empty() {
3294 let employee_ids: Vec<String> = self
3295 .master_data
3296 .employees
3297 .iter()
3298 .map(|e| e.employee_id.clone())
3299 .collect();
3300 let mut br_gen =
3301 BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3302
3303 for company in &self.config.companies {
3305 let company_payments: Vec<PaymentReference> = document_flows
3306 .payments
3307 .iter()
3308 .filter(|p| p.header.company_code == company.code)
3309 .map(|p| PaymentReference {
3310 id: p.header.document_id.clone(),
3311 amount: if p.is_vendor { p.amount } else { -p.amount },
3312 date: p.header.document_date,
3313 reference: p
3314 .check_number
3315 .clone()
3316 .or_else(|| p.wire_reference.clone())
3317 .unwrap_or_else(|| p.header.document_id.clone()),
3318 })
3319 .collect();
3320
3321 if company_payments.is_empty() {
3322 continue;
3323 }
3324
3325 let bank_account_id = format!("{}-MAIN", company.code);
3326
3327 for period in 0..self.config.global.period_months {
3329 let period_start = start_date + chrono::Months::new(period);
3330 let period_end =
3331 start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3332
3333 let period_payments: Vec<PaymentReference> = company_payments
3334 .iter()
3335 .filter(|p| p.date >= period_start && p.date <= period_end)
3336 .cloned()
3337 .collect();
3338
3339 let recon = br_gen.generate(
3340 &company.code,
3341 &bank_account_id,
3342 period_start,
3343 period_end,
3344 &company.currency,
3345 &period_payments,
3346 );
3347 bank_reconciliations.push(recon);
3348 }
3349 }
3350 info!(
3351 "Bank reconciliations generated: {} reconciliations",
3352 bank_reconciliations.len()
3353 );
3354 }
3355
3356 stats.bank_reconciliation_count = bank_reconciliations.len();
3357 self.check_resources_with_log("post-financial-reporting")?;
3358
3359 if !trial_balances.is_empty() {
3360 info!(
3361 "Period-close trial balances captured: {} periods",
3362 trial_balances.len()
3363 );
3364 }
3365
3366 Ok(FinancialReportingSnapshot {
3367 financial_statements,
3368 bank_reconciliations,
3369 trial_balances,
3370 })
3371 }
3372
3373 fn build_trial_balance_from_entries(
3379 journal_entries: &[JournalEntry],
3380 coa: &ChartOfAccounts,
3381 company_code: &str,
3382 fiscal_year: u16,
3383 fiscal_period: u8,
3384 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3385 use rust_decimal::Decimal;
3386
3387 let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3389 let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3390
3391 for je in journal_entries {
3392 if je.header.company_code != company_code
3394 || je.header.fiscal_year != fiscal_year
3395 || je.header.fiscal_period != fiscal_period
3396 {
3397 continue;
3398 }
3399
3400 for line in &je.lines {
3401 let acct = &line.gl_account;
3402 *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3403 *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3404 }
3405 }
3406
3407 let mut all_accounts: Vec<&String> = account_debits
3409 .keys()
3410 .chain(account_credits.keys())
3411 .collect::<std::collections::HashSet<_>>()
3412 .into_iter()
3413 .collect();
3414 all_accounts.sort();
3415
3416 let mut entries = Vec::new();
3417
3418 for acct_number in all_accounts {
3419 let debit = account_debits
3420 .get(acct_number)
3421 .copied()
3422 .unwrap_or(Decimal::ZERO);
3423 let credit = account_credits
3424 .get(acct_number)
3425 .copied()
3426 .unwrap_or(Decimal::ZERO);
3427
3428 if debit.is_zero() && credit.is_zero() {
3429 continue;
3430 }
3431
3432 let account_name = coa
3434 .get_account(acct_number)
3435 .map(|gl| gl.short_description.clone())
3436 .unwrap_or_else(|| format!("Account {acct_number}"));
3437
3438 let category = Self::category_from_account_code(acct_number);
3443
3444 entries.push(datasynth_generators::TrialBalanceEntry {
3445 account_code: acct_number.clone(),
3446 account_name,
3447 category,
3448 debit_balance: debit,
3449 credit_balance: credit,
3450 });
3451 }
3452
3453 entries
3454 }
3455
3456 fn build_cumulative_trial_balance(
3463 journal_entries: &[JournalEntry],
3464 coa: &ChartOfAccounts,
3465 company_code: &str,
3466 start_date: NaiveDate,
3467 period_end: NaiveDate,
3468 fiscal_year: u16,
3469 fiscal_period: u8,
3470 ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3471 use rust_decimal::Decimal;
3472
3473 let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3475 let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3476
3477 let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3479 let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3480
3481 for je in journal_entries {
3482 if je.header.company_code != company_code {
3483 continue;
3484 }
3485
3486 for line in &je.lines {
3487 let acct = &line.gl_account;
3488 let category = Self::category_from_account_code(acct);
3489 let is_bs_account = matches!(
3490 category.as_str(),
3491 "Cash"
3492 | "Receivables"
3493 | "Inventory"
3494 | "FixedAssets"
3495 | "Payables"
3496 | "AccruedLiabilities"
3497 | "LongTermDebt"
3498 | "Equity"
3499 );
3500
3501 if is_bs_account {
3502 if je.header.document_date <= period_end
3504 && je.header.document_date >= start_date
3505 {
3506 *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3507 line.debit_amount;
3508 *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3509 line.credit_amount;
3510 }
3511 } else {
3512 if je.header.fiscal_year == fiscal_year
3514 && je.header.fiscal_period == fiscal_period
3515 {
3516 *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3517 line.debit_amount;
3518 *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3519 line.credit_amount;
3520 }
3521 }
3522 }
3523 }
3524
3525 let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3527 all_accounts.extend(bs_debits.keys().cloned());
3528 all_accounts.extend(bs_credits.keys().cloned());
3529 all_accounts.extend(is_debits.keys().cloned());
3530 all_accounts.extend(is_credits.keys().cloned());
3531
3532 let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3533 sorted_accounts.sort();
3534
3535 let mut entries = Vec::new();
3536
3537 for acct_number in &sorted_accounts {
3538 let category = Self::category_from_account_code(acct_number);
3539 let is_bs_account = matches!(
3540 category.as_str(),
3541 "Cash"
3542 | "Receivables"
3543 | "Inventory"
3544 | "FixedAssets"
3545 | "Payables"
3546 | "AccruedLiabilities"
3547 | "LongTermDebt"
3548 | "Equity"
3549 );
3550
3551 let (debit, credit) = if is_bs_account {
3552 (
3553 bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3554 bs_credits
3555 .get(acct_number)
3556 .copied()
3557 .unwrap_or(Decimal::ZERO),
3558 )
3559 } else {
3560 (
3561 is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3562 is_credits
3563 .get(acct_number)
3564 .copied()
3565 .unwrap_or(Decimal::ZERO),
3566 )
3567 };
3568
3569 if debit.is_zero() && credit.is_zero() {
3570 continue;
3571 }
3572
3573 let account_name = coa
3574 .get_account(acct_number)
3575 .map(|gl| gl.short_description.clone())
3576 .unwrap_or_else(|| format!("Account {acct_number}"));
3577
3578 entries.push(datasynth_generators::TrialBalanceEntry {
3579 account_code: acct_number.clone(),
3580 account_name,
3581 category,
3582 debit_balance: debit,
3583 credit_balance: credit,
3584 });
3585 }
3586
3587 entries
3588 }
3589
3590 fn build_cash_flow_from_trial_balances(
3595 current_tb: &[datasynth_generators::TrialBalanceEntry],
3596 prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3597 net_income: rust_decimal::Decimal,
3598 ) -> Vec<CashFlowItem> {
3599 use rust_decimal::Decimal;
3600
3601 let aggregate =
3603 |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3604 let mut map: HashMap<String, Decimal> = HashMap::new();
3605 for entry in tb {
3606 let net = entry.debit_balance - entry.credit_balance;
3607 *map.entry(entry.category.clone()).or_default() += net;
3608 }
3609 map
3610 };
3611
3612 let current = aggregate(current_tb);
3613 let prior = prior_tb.map(aggregate);
3614
3615 let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3617 *map.get(key).unwrap_or(&Decimal::ZERO)
3618 };
3619
3620 let change = |key: &str| -> Decimal {
3622 let curr = get(¤t, key);
3623 match &prior {
3624 Some(p) => curr - get(p, key),
3625 None => curr,
3626 }
3627 };
3628
3629 let fixed_asset_change = change("FixedAssets");
3632 let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3633 -fixed_asset_change
3634 } else {
3635 Decimal::ZERO
3636 };
3637
3638 let ar_change = change("Receivables");
3640 let inventory_change = change("Inventory");
3641 let ap_change = change("Payables");
3643 let accrued_change = change("AccruedLiabilities");
3644
3645 let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3646 + (-ap_change)
3647 + (-accrued_change);
3648
3649 let capex = if fixed_asset_change > Decimal::ZERO {
3651 -fixed_asset_change
3652 } else {
3653 Decimal::ZERO
3654 };
3655 let investing_cf = capex;
3656
3657 let debt_change = -change("LongTermDebt");
3659 let equity_change = -change("Equity");
3660 let financing_cf = debt_change + equity_change;
3661
3662 let net_change = operating_cf + investing_cf + financing_cf;
3663
3664 vec![
3665 CashFlowItem {
3666 item_code: "CF-NI".to_string(),
3667 label: "Net Income".to_string(),
3668 category: CashFlowCategory::Operating,
3669 amount: net_income,
3670 amount_prior: None,
3671 sort_order: 1,
3672 is_total: false,
3673 },
3674 CashFlowItem {
3675 item_code: "CF-DEP".to_string(),
3676 label: "Depreciation & Amortization".to_string(),
3677 category: CashFlowCategory::Operating,
3678 amount: depreciation_addback,
3679 amount_prior: None,
3680 sort_order: 2,
3681 is_total: false,
3682 },
3683 CashFlowItem {
3684 item_code: "CF-AR".to_string(),
3685 label: "Change in Accounts Receivable".to_string(),
3686 category: CashFlowCategory::Operating,
3687 amount: -ar_change,
3688 amount_prior: None,
3689 sort_order: 3,
3690 is_total: false,
3691 },
3692 CashFlowItem {
3693 item_code: "CF-AP".to_string(),
3694 label: "Change in Accounts Payable".to_string(),
3695 category: CashFlowCategory::Operating,
3696 amount: -ap_change,
3697 amount_prior: None,
3698 sort_order: 4,
3699 is_total: false,
3700 },
3701 CashFlowItem {
3702 item_code: "CF-INV".to_string(),
3703 label: "Change in Inventory".to_string(),
3704 category: CashFlowCategory::Operating,
3705 amount: -inventory_change,
3706 amount_prior: None,
3707 sort_order: 5,
3708 is_total: false,
3709 },
3710 CashFlowItem {
3711 item_code: "CF-OP".to_string(),
3712 label: "Net Cash from Operating Activities".to_string(),
3713 category: CashFlowCategory::Operating,
3714 amount: operating_cf,
3715 amount_prior: None,
3716 sort_order: 6,
3717 is_total: true,
3718 },
3719 CashFlowItem {
3720 item_code: "CF-CAPEX".to_string(),
3721 label: "Capital Expenditures".to_string(),
3722 category: CashFlowCategory::Investing,
3723 amount: capex,
3724 amount_prior: None,
3725 sort_order: 7,
3726 is_total: false,
3727 },
3728 CashFlowItem {
3729 item_code: "CF-INV-T".to_string(),
3730 label: "Net Cash from Investing Activities".to_string(),
3731 category: CashFlowCategory::Investing,
3732 amount: investing_cf,
3733 amount_prior: None,
3734 sort_order: 8,
3735 is_total: true,
3736 },
3737 CashFlowItem {
3738 item_code: "CF-DEBT".to_string(),
3739 label: "Net Borrowings / (Repayments)".to_string(),
3740 category: CashFlowCategory::Financing,
3741 amount: debt_change,
3742 amount_prior: None,
3743 sort_order: 9,
3744 is_total: false,
3745 },
3746 CashFlowItem {
3747 item_code: "CF-EQ".to_string(),
3748 label: "Equity Changes".to_string(),
3749 category: CashFlowCategory::Financing,
3750 amount: equity_change,
3751 amount_prior: None,
3752 sort_order: 10,
3753 is_total: false,
3754 },
3755 CashFlowItem {
3756 item_code: "CF-FIN-T".to_string(),
3757 label: "Net Cash from Financing Activities".to_string(),
3758 category: CashFlowCategory::Financing,
3759 amount: financing_cf,
3760 amount_prior: None,
3761 sort_order: 11,
3762 is_total: true,
3763 },
3764 CashFlowItem {
3765 item_code: "CF-NET".to_string(),
3766 label: "Net Change in Cash".to_string(),
3767 category: CashFlowCategory::Operating,
3768 amount: net_change,
3769 amount_prior: None,
3770 sort_order: 12,
3771 is_total: true,
3772 },
3773 ]
3774 }
3775
3776 fn calculate_net_income_from_tb(
3780 tb: &[datasynth_generators::TrialBalanceEntry],
3781 ) -> rust_decimal::Decimal {
3782 use rust_decimal::Decimal;
3783
3784 let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3785 for entry in tb {
3786 let net = entry.debit_balance - entry.credit_balance;
3787 *aggregated.entry(entry.category.clone()).or_default() += net;
3788 }
3789
3790 let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3791 let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3792 let opex = *aggregated
3793 .get("OperatingExpenses")
3794 .unwrap_or(&Decimal::ZERO);
3795 let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3796 let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3797
3798 let operating_income = revenue - cogs - opex - other_expenses - other_income;
3801 let tax_rate = Decimal::new(25, 2); let tax = operating_income * tax_rate;
3803 operating_income - tax
3804 }
3805
3806 fn category_from_account_code(code: &str) -> String {
3813 let prefix: String = code.chars().take(2).collect();
3814 match prefix.as_str() {
3815 "10" => "Cash",
3816 "11" => "Receivables",
3817 "12" | "13" | "14" => "Inventory",
3818 "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3819 "20" => "Payables",
3820 "21" | "22" | "23" | "24" => "AccruedLiabilities",
3821 "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3822 "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3823 "40" | "41" | "42" | "43" | "44" => "Revenue",
3824 "50" | "51" | "52" => "CostOfSales",
3825 "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3826 "OperatingExpenses"
3827 }
3828 "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3829 "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3830 _ => "OperatingExpenses",
3831 }
3832 .to_string()
3833 }
3834
3835 fn phase_hr_data(
3837 &mut self,
3838 stats: &mut EnhancedGenerationStatistics,
3839 ) -> SynthResult<HrSnapshot> {
3840 if !self.config.hr.enabled {
3841 debug!("Phase 16: Skipped (HR generation disabled)");
3842 return Ok(HrSnapshot::default());
3843 }
3844
3845 info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3846
3847 let seed = self.seed;
3848 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3849 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3850 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3851 let company_code = self
3852 .config
3853 .companies
3854 .first()
3855 .map(|c| c.code.as_str())
3856 .unwrap_or("1000");
3857 let currency = self
3858 .config
3859 .companies
3860 .first()
3861 .map(|c| c.currency.as_str())
3862 .unwrap_or("USD");
3863
3864 let employee_ids: Vec<String> = self
3865 .master_data
3866 .employees
3867 .iter()
3868 .map(|e| e.employee_id.clone())
3869 .collect();
3870
3871 if employee_ids.is_empty() {
3872 debug!("Phase 16: Skipped (no employees available)");
3873 return Ok(HrSnapshot::default());
3874 }
3875
3876 let cost_center_ids: Vec<String> = self
3879 .master_data
3880 .employees
3881 .iter()
3882 .filter_map(|e| e.cost_center.clone())
3883 .collect::<std::collections::HashSet<_>>()
3884 .into_iter()
3885 .collect();
3886
3887 let mut snapshot = HrSnapshot::default();
3888
3889 if self.config.hr.payroll.enabled {
3891 let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3892 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3893
3894 let payroll_pack = self.primary_pack();
3896
3897 payroll_gen.set_country_pack(payroll_pack.clone());
3900
3901 let employees_with_salary: Vec<(
3902 String,
3903 rust_decimal::Decimal,
3904 Option<String>,
3905 Option<String>,
3906 )> = self
3907 .master_data
3908 .employees
3909 .iter()
3910 .map(|e| {
3911 (
3912 e.employee_id.clone(),
3913 rust_decimal::Decimal::from(5000), e.cost_center.clone(),
3915 e.department_id.clone(),
3916 )
3917 })
3918 .collect();
3919
3920 for month in 0..self.config.global.period_months {
3921 let period_start = start_date + chrono::Months::new(month);
3922 let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3923 let (run, items) = payroll_gen.generate(
3924 company_code,
3925 &employees_with_salary,
3926 period_start,
3927 period_end,
3928 currency,
3929 );
3930 snapshot.payroll_runs.push(run);
3931 snapshot.payroll_run_count += 1;
3932 snapshot.payroll_line_item_count += items.len();
3933 snapshot.payroll_line_items.extend(items);
3934 }
3935 }
3936
3937 if self.config.hr.time_attendance.enabled {
3939 let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3940 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3941 let entries = time_gen.generate(
3942 &employee_ids,
3943 start_date,
3944 end_date,
3945 &self.config.hr.time_attendance,
3946 );
3947 snapshot.time_entry_count = entries.len();
3948 snapshot.time_entries = entries;
3949 }
3950
3951 if self.config.hr.expenses.enabled {
3953 let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3954 .with_pools(employee_ids.clone(), cost_center_ids.clone());
3955 expense_gen.set_country_pack(self.primary_pack().clone());
3956 let company_currency = self
3957 .config
3958 .companies
3959 .first()
3960 .map(|c| c.currency.as_str())
3961 .unwrap_or("USD");
3962 let reports = expense_gen.generate_with_currency(
3963 &employee_ids,
3964 start_date,
3965 end_date,
3966 &self.config.hr.expenses,
3967 company_currency,
3968 );
3969 snapshot.expense_report_count = reports.len();
3970 snapshot.expense_reports = reports;
3971 }
3972
3973 if self.config.hr.payroll.enabled {
3975 let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
3976 let employee_pairs: Vec<(String, String)> = self
3977 .master_data
3978 .employees
3979 .iter()
3980 .map(|e| (e.employee_id.clone(), e.display_name.clone()))
3981 .collect();
3982 let enrollments =
3983 benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
3984 snapshot.benefit_enrollment_count = enrollments.len();
3985 snapshot.benefit_enrollments = enrollments;
3986 }
3987
3988 stats.payroll_run_count = snapshot.payroll_run_count;
3989 stats.time_entry_count = snapshot.time_entry_count;
3990 stats.expense_report_count = snapshot.expense_report_count;
3991 stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
3992
3993 info!(
3994 "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments",
3995 snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3996 snapshot.time_entry_count, snapshot.expense_report_count,
3997 snapshot.benefit_enrollment_count
3998 );
3999 self.check_resources_with_log("post-hr")?;
4000
4001 Ok(snapshot)
4002 }
4003
4004 fn phase_accounting_standards(
4006 &mut self,
4007 stats: &mut EnhancedGenerationStatistics,
4008 ) -> SynthResult<AccountingStandardsSnapshot> {
4009 if !self.phase_config.generate_accounting_standards
4010 || !self.config.accounting_standards.enabled
4011 {
4012 debug!("Phase 17: Skipped (accounting standards generation disabled)");
4013 return Ok(AccountingStandardsSnapshot::default());
4014 }
4015 info!("Phase 17: Generating Accounting Standards Data");
4016
4017 let seed = self.seed;
4018 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4019 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4020 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4021 let company_code = self
4022 .config
4023 .companies
4024 .first()
4025 .map(|c| c.code.as_str())
4026 .unwrap_or("1000");
4027 let currency = self
4028 .config
4029 .companies
4030 .first()
4031 .map(|c| c.currency.as_str())
4032 .unwrap_or("USD");
4033
4034 let framework = match self.config.accounting_standards.framework {
4039 Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
4040 datasynth_standards::framework::AccountingFramework::UsGaap
4041 }
4042 Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
4043 datasynth_standards::framework::AccountingFramework::Ifrs
4044 }
4045 Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
4046 datasynth_standards::framework::AccountingFramework::DualReporting
4047 }
4048 Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
4049 datasynth_standards::framework::AccountingFramework::FrenchGaap
4050 }
4051 Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
4052 datasynth_standards::framework::AccountingFramework::GermanGaap
4053 }
4054 None => {
4055 let pack = self.primary_pack();
4057 let pack_fw = pack.accounting.framework.as_str();
4058 match pack_fw {
4059 "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
4060 "dual_reporting" => {
4061 datasynth_standards::framework::AccountingFramework::DualReporting
4062 }
4063 "french_gaap" => {
4064 datasynth_standards::framework::AccountingFramework::FrenchGaap
4065 }
4066 "german_gaap" | "hgb" => {
4067 datasynth_standards::framework::AccountingFramework::GermanGaap
4068 }
4069 _ => datasynth_standards::framework::AccountingFramework::UsGaap,
4071 }
4072 }
4073 };
4074
4075 let mut snapshot = AccountingStandardsSnapshot::default();
4076
4077 if self.config.accounting_standards.revenue_recognition.enabled {
4079 let customer_ids: Vec<String> = self
4080 .master_data
4081 .customers
4082 .iter()
4083 .map(|c| c.customer_id.clone())
4084 .collect();
4085
4086 if !customer_ids.is_empty() {
4087 let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
4088 let contracts = rev_gen.generate(
4089 company_code,
4090 &customer_ids,
4091 start_date,
4092 end_date,
4093 currency,
4094 &self.config.accounting_standards.revenue_recognition,
4095 framework,
4096 );
4097 snapshot.revenue_contract_count = contracts.len();
4098 snapshot.contracts = contracts;
4099 }
4100 }
4101
4102 if self.config.accounting_standards.impairment.enabled {
4104 let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
4105 .master_data
4106 .assets
4107 .iter()
4108 .map(|a| {
4109 (
4110 a.asset_id.clone(),
4111 a.description.clone(),
4112 a.acquisition_cost,
4113 )
4114 })
4115 .collect();
4116
4117 if !asset_data.is_empty() {
4118 let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
4119 let tests = imp_gen.generate(
4120 company_code,
4121 &asset_data,
4122 end_date,
4123 &self.config.accounting_standards.impairment,
4124 framework,
4125 );
4126 snapshot.impairment_test_count = tests.len();
4127 snapshot.impairment_tests = tests;
4128 }
4129 }
4130
4131 stats.revenue_contract_count = snapshot.revenue_contract_count;
4132 stats.impairment_test_count = snapshot.impairment_test_count;
4133
4134 info!(
4135 "Accounting standards data generated: {} revenue contracts, {} impairment tests",
4136 snapshot.revenue_contract_count, snapshot.impairment_test_count
4137 );
4138 self.check_resources_with_log("post-accounting-standards")?;
4139
4140 Ok(snapshot)
4141 }
4142
4143 fn phase_manufacturing(
4145 &mut self,
4146 stats: &mut EnhancedGenerationStatistics,
4147 ) -> SynthResult<ManufacturingSnapshot> {
4148 if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
4149 debug!("Phase 18: Skipped (manufacturing generation disabled)");
4150 return Ok(ManufacturingSnapshot::default());
4151 }
4152 info!("Phase 18: Generating Manufacturing Data");
4153
4154 let seed = self.seed;
4155 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4156 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4157 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4158 let company_code = self
4159 .config
4160 .companies
4161 .first()
4162 .map(|c| c.code.as_str())
4163 .unwrap_or("1000");
4164
4165 let material_data: Vec<(String, String)> = self
4166 .master_data
4167 .materials
4168 .iter()
4169 .map(|m| (m.material_id.clone(), m.description.clone()))
4170 .collect();
4171
4172 if material_data.is_empty() {
4173 debug!("Phase 18: Skipped (no materials available)");
4174 return Ok(ManufacturingSnapshot::default());
4175 }
4176
4177 let mut snapshot = ManufacturingSnapshot::default();
4178
4179 let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
4181 let production_orders = prod_gen.generate(
4182 company_code,
4183 &material_data,
4184 start_date,
4185 end_date,
4186 &self.config.manufacturing.production_orders,
4187 &self.config.manufacturing.costing,
4188 &self.config.manufacturing.routing,
4189 );
4190 snapshot.production_order_count = production_orders.len();
4191
4192 let inspection_data: Vec<(String, String, String)> = production_orders
4194 .iter()
4195 .map(|po| {
4196 (
4197 po.order_id.clone(),
4198 po.material_id.clone(),
4199 po.material_description.clone(),
4200 )
4201 })
4202 .collect();
4203
4204 snapshot.production_orders = production_orders;
4205
4206 if !inspection_data.is_empty() {
4207 let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
4208 let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
4209 snapshot.quality_inspection_count = inspections.len();
4210 snapshot.quality_inspections = inspections;
4211 }
4212
4213 let storage_locations: Vec<(String, String)> = material_data
4215 .iter()
4216 .enumerate()
4217 .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
4218 .collect();
4219
4220 let employee_ids: Vec<String> = self
4221 .master_data
4222 .employees
4223 .iter()
4224 .map(|e| e.employee_id.clone())
4225 .collect();
4226 let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
4227 .with_employee_pool(employee_ids);
4228 let mut cycle_count_total = 0usize;
4229 for month in 0..self.config.global.period_months {
4230 let count_date = start_date + chrono::Months::new(month);
4231 let items_per_count = storage_locations.len().clamp(10, 50);
4232 let cc = cc_gen.generate(
4233 company_code,
4234 &storage_locations,
4235 count_date,
4236 items_per_count,
4237 );
4238 snapshot.cycle_counts.push(cc);
4239 cycle_count_total += 1;
4240 }
4241 snapshot.cycle_count_count = cycle_count_total;
4242
4243 let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
4245 let bom_components = bom_gen.generate(company_code, &material_data);
4246 snapshot.bom_component_count = bom_components.len();
4247 snapshot.bom_components = bom_components;
4248
4249 let currency = self
4251 .config
4252 .companies
4253 .first()
4254 .map(|c| c.currency.as_str())
4255 .unwrap_or("USD");
4256 let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
4257 let inventory_movements = inv_mov_gen.generate(
4258 company_code,
4259 &material_data,
4260 start_date,
4261 end_date,
4262 2,
4263 currency,
4264 );
4265 snapshot.inventory_movement_count = inventory_movements.len();
4266 snapshot.inventory_movements = inventory_movements;
4267
4268 stats.production_order_count = snapshot.production_order_count;
4269 stats.quality_inspection_count = snapshot.quality_inspection_count;
4270 stats.cycle_count_count = snapshot.cycle_count_count;
4271 stats.bom_component_count = snapshot.bom_component_count;
4272 stats.inventory_movement_count = snapshot.inventory_movement_count;
4273
4274 info!(
4275 "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
4276 snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
4277 snapshot.bom_component_count, snapshot.inventory_movement_count
4278 );
4279 self.check_resources_with_log("post-manufacturing")?;
4280
4281 Ok(snapshot)
4282 }
4283
4284 fn phase_sales_kpi_budgets(
4286 &mut self,
4287 coa: &Arc<ChartOfAccounts>,
4288 financial_reporting: &FinancialReportingSnapshot,
4289 stats: &mut EnhancedGenerationStatistics,
4290 ) -> SynthResult<SalesKpiBudgetsSnapshot> {
4291 if !self.phase_config.generate_sales_kpi_budgets {
4292 debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4293 return Ok(SalesKpiBudgetsSnapshot::default());
4294 }
4295 info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4296
4297 let seed = self.seed;
4298 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4299 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4300 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4301 let company_code = self
4302 .config
4303 .companies
4304 .first()
4305 .map(|c| c.code.as_str())
4306 .unwrap_or("1000");
4307
4308 let mut snapshot = SalesKpiBudgetsSnapshot::default();
4309
4310 if self.config.sales_quotes.enabled {
4312 let customer_data: Vec<(String, String)> = self
4313 .master_data
4314 .customers
4315 .iter()
4316 .map(|c| (c.customer_id.clone(), c.name.clone()))
4317 .collect();
4318 let material_data: Vec<(String, String)> = self
4319 .master_data
4320 .materials
4321 .iter()
4322 .map(|m| (m.material_id.clone(), m.description.clone()))
4323 .collect();
4324
4325 if !customer_data.is_empty() && !material_data.is_empty() {
4326 let employee_ids: Vec<String> = self
4327 .master_data
4328 .employees
4329 .iter()
4330 .map(|e| e.employee_id.clone())
4331 .collect();
4332 let customer_ids: Vec<String> = self
4333 .master_data
4334 .customers
4335 .iter()
4336 .map(|c| c.customer_id.clone())
4337 .collect();
4338 let company_currency = self
4339 .config
4340 .companies
4341 .first()
4342 .map(|c| c.currency.as_str())
4343 .unwrap_or("USD");
4344
4345 let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4346 .with_pools(employee_ids, customer_ids);
4347 let quotes = quote_gen.generate_with_currency(
4348 company_code,
4349 &customer_data,
4350 &material_data,
4351 start_date,
4352 end_date,
4353 &self.config.sales_quotes,
4354 company_currency,
4355 );
4356 snapshot.sales_quote_count = quotes.len();
4357 snapshot.sales_quotes = quotes;
4358 }
4359 }
4360
4361 if self.config.financial_reporting.management_kpis.enabled {
4363 let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4364 let mut kpis = kpi_gen.generate(
4365 company_code,
4366 start_date,
4367 end_date,
4368 &self.config.financial_reporting.management_kpis,
4369 );
4370
4371 {
4373 use rust_decimal::Decimal;
4374
4375 if let Some(income_stmt) =
4376 financial_reporting.financial_statements.iter().find(|fs| {
4377 fs.statement_type == StatementType::IncomeStatement
4378 && fs.company_code == company_code
4379 })
4380 {
4381 let total_revenue: Decimal = income_stmt
4383 .line_items
4384 .iter()
4385 .filter(|li| li.section.contains("Revenue") && !li.is_total)
4386 .map(|li| li.amount)
4387 .sum();
4388 let total_cogs: Decimal = income_stmt
4389 .line_items
4390 .iter()
4391 .filter(|li| {
4392 (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4393 && !li.is_total
4394 })
4395 .map(|li| li.amount.abs())
4396 .sum();
4397 let total_opex: Decimal = income_stmt
4398 .line_items
4399 .iter()
4400 .filter(|li| {
4401 li.section.contains("Expense")
4402 && !li.is_total
4403 && !li.section.contains("Cost")
4404 })
4405 .map(|li| li.amount.abs())
4406 .sum();
4407
4408 if total_revenue > Decimal::ZERO {
4409 let hundred = Decimal::from(100);
4410 let gross_margin_pct =
4411 ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4412 let operating_income = total_revenue - total_cogs - total_opex;
4413 let op_margin_pct =
4414 (operating_income * hundred / total_revenue).round_dp(2);
4415
4416 for kpi in &mut kpis {
4418 if kpi.name == "Gross Margin" {
4419 kpi.value = gross_margin_pct;
4420 } else if kpi.name == "Operating Margin" {
4421 kpi.value = op_margin_pct;
4422 }
4423 }
4424 }
4425 }
4426
4427 if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4429 fs.statement_type == StatementType::BalanceSheet
4430 && fs.company_code == company_code
4431 }) {
4432 let current_assets: Decimal = bs
4433 .line_items
4434 .iter()
4435 .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4436 .map(|li| li.amount)
4437 .sum();
4438 let current_liabilities: Decimal = bs
4439 .line_items
4440 .iter()
4441 .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4442 .map(|li| li.amount.abs())
4443 .sum();
4444
4445 if current_liabilities > Decimal::ZERO {
4446 let current_ratio = (current_assets / current_liabilities).round_dp(2);
4447 for kpi in &mut kpis {
4448 if kpi.name == "Current Ratio" {
4449 kpi.value = current_ratio;
4450 }
4451 }
4452 }
4453 }
4454 }
4455
4456 snapshot.kpi_count = kpis.len();
4457 snapshot.kpis = kpis;
4458 }
4459
4460 if self.config.financial_reporting.budgets.enabled {
4462 let account_data: Vec<(String, String)> = coa
4463 .accounts
4464 .iter()
4465 .map(|a| (a.account_number.clone(), a.short_description.clone()))
4466 .collect();
4467
4468 if !account_data.is_empty() {
4469 let fiscal_year = start_date.year() as u32;
4470 let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4471 let budget = budget_gen.generate(
4472 company_code,
4473 fiscal_year,
4474 &account_data,
4475 &self.config.financial_reporting.budgets,
4476 );
4477 snapshot.budget_line_count = budget.line_items.len();
4478 snapshot.budgets.push(budget);
4479 }
4480 }
4481
4482 stats.sales_quote_count = snapshot.sales_quote_count;
4483 stats.kpi_count = snapshot.kpi_count;
4484 stats.budget_line_count = snapshot.budget_line_count;
4485
4486 info!(
4487 "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4488 snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4489 );
4490 self.check_resources_with_log("post-sales-kpi-budgets")?;
4491
4492 Ok(snapshot)
4493 }
4494
4495 fn phase_tax_generation(
4497 &mut self,
4498 document_flows: &DocumentFlowSnapshot,
4499 stats: &mut EnhancedGenerationStatistics,
4500 ) -> SynthResult<TaxSnapshot> {
4501 if !self.phase_config.generate_tax || !self.config.tax.enabled {
4502 debug!("Phase 20: Skipped (tax generation disabled)");
4503 return Ok(TaxSnapshot::default());
4504 }
4505 info!("Phase 20: Generating Tax Data");
4506
4507 let seed = self.seed;
4508 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4509 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4510 let fiscal_year = start_date.year();
4511 let company_code = self
4512 .config
4513 .companies
4514 .first()
4515 .map(|c| c.code.as_str())
4516 .unwrap_or("1000");
4517
4518 let mut gen =
4519 datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4520
4521 let pack = self.primary_pack().clone();
4522 let (jurisdictions, codes) =
4523 gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4524
4525 let mut provisions = Vec::new();
4527 if self.config.tax.provisions.enabled {
4528 let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4529 for company in &self.config.companies {
4530 let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4531 let statutory_rate = rust_decimal::Decimal::new(
4532 (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
4533 2,
4534 );
4535 let provision = provision_gen.generate(
4536 &company.code,
4537 start_date,
4538 pre_tax_income,
4539 statutory_rate,
4540 );
4541 provisions.push(provision);
4542 }
4543 }
4544
4545 let mut tax_lines = Vec::new();
4547 if !codes.is_empty() {
4548 let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4549 datasynth_generators::TaxLineGeneratorConfig::default(),
4550 codes.clone(),
4551 seed + 72,
4552 );
4553
4554 let buyer_country = self
4557 .config
4558 .companies
4559 .first()
4560 .map(|c| c.country.as_str())
4561 .unwrap_or("US");
4562 for vi in &document_flows.vendor_invoices {
4563 let lines = tax_line_gen.generate_for_document(
4564 datasynth_core::models::TaxableDocumentType::VendorInvoice,
4565 &vi.header.document_id,
4566 buyer_country, buyer_country,
4568 vi.payable_amount,
4569 vi.header.document_date,
4570 None,
4571 );
4572 tax_lines.extend(lines);
4573 }
4574
4575 for ci in &document_flows.customer_invoices {
4577 let lines = tax_line_gen.generate_for_document(
4578 datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4579 &ci.header.document_id,
4580 buyer_country, buyer_country,
4582 ci.total_gross_amount,
4583 ci.header.document_date,
4584 None,
4585 );
4586 tax_lines.extend(lines);
4587 }
4588 }
4589
4590 let snapshot = TaxSnapshot {
4591 jurisdiction_count: jurisdictions.len(),
4592 code_count: codes.len(),
4593 jurisdictions,
4594 codes,
4595 tax_provisions: provisions,
4596 tax_lines,
4597 tax_returns: Vec::new(),
4598 withholding_records: Vec::new(),
4599 tax_anomaly_labels: Vec::new(),
4600 };
4601
4602 stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4603 stats.tax_code_count = snapshot.code_count;
4604 stats.tax_provision_count = snapshot.tax_provisions.len();
4605 stats.tax_line_count = snapshot.tax_lines.len();
4606
4607 info!(
4608 "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4609 snapshot.jurisdiction_count,
4610 snapshot.code_count,
4611 snapshot.tax_provisions.len()
4612 );
4613 self.check_resources_with_log("post-tax")?;
4614
4615 Ok(snapshot)
4616 }
4617
4618 fn phase_esg_generation(
4620 &mut self,
4621 document_flows: &DocumentFlowSnapshot,
4622 stats: &mut EnhancedGenerationStatistics,
4623 ) -> SynthResult<EsgSnapshot> {
4624 if !self.phase_config.generate_esg || !self.config.esg.enabled {
4625 debug!("Phase 21: Skipped (ESG generation disabled)");
4626 return Ok(EsgSnapshot::default());
4627 }
4628 info!("Phase 21: Generating ESG Data");
4629
4630 let seed = self.seed;
4631 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4632 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4633 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4634 let entity_id = self
4635 .config
4636 .companies
4637 .first()
4638 .map(|c| c.code.as_str())
4639 .unwrap_or("1000");
4640
4641 let esg_cfg = &self.config.esg;
4642 let mut snapshot = EsgSnapshot::default();
4643
4644 let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4646 esg_cfg.environmental.energy.clone(),
4647 seed + 80,
4648 );
4649 let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4650
4651 let facility_count = esg_cfg.environmental.energy.facility_count;
4653 let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4654 snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4655
4656 let mut waste_gen = datasynth_generators::WasteGenerator::new(
4658 seed + 82,
4659 esg_cfg.environmental.waste.diversion_target,
4660 facility_count,
4661 );
4662 snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4663
4664 let mut emission_gen =
4666 datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4667
4668 let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4670 .iter()
4671 .map(|e| datasynth_generators::EnergyInput {
4672 facility_id: e.facility_id.clone(),
4673 energy_type: match e.energy_source {
4674 EnergySourceType::NaturalGas => {
4675 datasynth_generators::EnergyInputType::NaturalGas
4676 }
4677 EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4678 EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4679 _ => datasynth_generators::EnergyInputType::Electricity,
4680 },
4681 consumption_kwh: e.consumption_kwh,
4682 period: e.period,
4683 })
4684 .collect();
4685
4686 let mut emissions = Vec::new();
4687 emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4688 emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4689
4690 let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4692 let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4693 for payment in &document_flows.payments {
4694 if payment.is_vendor {
4695 *totals
4696 .entry(payment.business_partner_id.clone())
4697 .or_default() += payment.amount;
4698 }
4699 }
4700 totals
4701 };
4702 let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4703 .master_data
4704 .vendors
4705 .iter()
4706 .map(|v| {
4707 let spend = vendor_payment_totals
4708 .get(&v.vendor_id)
4709 .copied()
4710 .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4711 datasynth_generators::VendorSpendInput {
4712 vendor_id: v.vendor_id.clone(),
4713 category: format!("{:?}", v.vendor_type).to_lowercase(),
4714 spend,
4715 country: v.country.clone(),
4716 }
4717 })
4718 .collect();
4719 if !vendor_spend.is_empty() {
4720 emissions.extend(emission_gen.generate_scope3_purchased_goods(
4721 entity_id,
4722 &vendor_spend,
4723 start_date,
4724 end_date,
4725 ));
4726 }
4727
4728 let headcount = self.master_data.employees.len() as u32;
4730 if headcount > 0 {
4731 let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4732 emissions.extend(emission_gen.generate_scope3_business_travel(
4733 entity_id,
4734 travel_spend,
4735 start_date,
4736 ));
4737 emissions
4738 .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4739 }
4740
4741 snapshot.emission_count = emissions.len();
4742 snapshot.emissions = emissions;
4743 snapshot.energy = energy_records;
4744
4745 let mut workforce_gen =
4747 datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4748 let total_headcount = headcount.max(100);
4749 snapshot.diversity =
4750 workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4751 snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4752 snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4753 entity_id,
4754 facility_count,
4755 start_date,
4756 end_date,
4757 );
4758
4759 let total_hours = total_headcount as u64 * 2000; let safety_metric = workforce_gen.compute_safety_metrics(
4762 entity_id,
4763 &snapshot.safety_incidents,
4764 total_hours,
4765 start_date,
4766 );
4767 snapshot.safety_metrics = vec![safety_metric];
4768
4769 let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4771 seed + 85,
4772 esg_cfg.governance.board_size,
4773 esg_cfg.governance.independence_target,
4774 );
4775 snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4776
4777 let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4779 esg_cfg.supply_chain_esg.clone(),
4780 seed + 86,
4781 );
4782 let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4783 .master_data
4784 .vendors
4785 .iter()
4786 .map(|v| datasynth_generators::VendorInput {
4787 vendor_id: v.vendor_id.clone(),
4788 country: v.country.clone(),
4789 industry: format!("{:?}", v.vendor_type).to_lowercase(),
4790 quality_score: None,
4791 })
4792 .collect();
4793 snapshot.supplier_assessments =
4794 supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4795
4796 let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4798 seed + 87,
4799 esg_cfg.reporting.clone(),
4800 esg_cfg.climate_scenarios.clone(),
4801 );
4802 snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4803 snapshot.disclosures = disclosure_gen.generate_disclosures(
4804 entity_id,
4805 &snapshot.materiality,
4806 start_date,
4807 end_date,
4808 );
4809 snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4810 snapshot.disclosure_count = snapshot.disclosures.len();
4811
4812 if esg_cfg.anomaly_rate > 0.0 {
4814 let mut anomaly_injector =
4815 datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4816 let mut labels = Vec::new();
4817 labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4818 labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4819 labels.extend(
4820 anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4821 );
4822 labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4823 labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4824 snapshot.anomaly_labels = labels;
4825 }
4826
4827 stats.esg_emission_count = snapshot.emission_count;
4828 stats.esg_disclosure_count = snapshot.disclosure_count;
4829
4830 info!(
4831 "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4832 snapshot.emission_count,
4833 snapshot.disclosure_count,
4834 snapshot.supplier_assessments.len()
4835 );
4836 self.check_resources_with_log("post-esg")?;
4837
4838 Ok(snapshot)
4839 }
4840
4841 fn phase_treasury_data(
4843 &mut self,
4844 document_flows: &DocumentFlowSnapshot,
4845 subledger: &SubledgerSnapshot,
4846 intercompany: &IntercompanySnapshot,
4847 stats: &mut EnhancedGenerationStatistics,
4848 ) -> SynthResult<TreasurySnapshot> {
4849 if !self.config.treasury.enabled {
4850 debug!("Phase 22: Skipped (treasury generation disabled)");
4851 return Ok(TreasurySnapshot::default());
4852 }
4853 info!("Phase 22: Generating Treasury Data");
4854
4855 let seed = self.seed;
4856 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4857 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4858 let currency = self
4859 .config
4860 .companies
4861 .first()
4862 .map(|c| c.currency.as_str())
4863 .unwrap_or("USD");
4864 let entity_id = self
4865 .config
4866 .companies
4867 .first()
4868 .map(|c| c.code.as_str())
4869 .unwrap_or("1000");
4870
4871 let mut snapshot = TreasurySnapshot::default();
4872
4873 let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4875 self.config.treasury.debt.clone(),
4876 seed + 90,
4877 );
4878 snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4879
4880 let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4882 self.config.treasury.hedging.clone(),
4883 seed + 91,
4884 );
4885 for debt in &snapshot.debt_instruments {
4886 if debt.rate_type == InterestRateType::Variable {
4887 let swap = hedge_gen.generate_ir_swap(
4888 currency,
4889 debt.principal,
4890 debt.origination_date,
4891 debt.maturity_date,
4892 );
4893 snapshot.hedging_instruments.push(swap);
4894 }
4895 }
4896
4897 {
4900 let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4901 for payment in &document_flows.payments {
4902 if payment.currency != currency {
4903 let entry = fx_map
4904 .entry(payment.currency.clone())
4905 .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4906 entry.0 += payment.amount;
4907 if payment.header.document_date > entry.1 {
4909 entry.1 = payment.header.document_date;
4910 }
4911 }
4912 }
4913 if !fx_map.is_empty() {
4914 let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4915 .into_iter()
4916 .map(|(foreign_ccy, (net_amount, settlement_date))| {
4917 datasynth_generators::treasury::FxExposure {
4918 currency_pair: format!("{foreign_ccy}/{currency}"),
4919 foreign_currency: foreign_ccy,
4920 net_amount,
4921 settlement_date,
4922 description: "AP payment FX exposure".to_string(),
4923 }
4924 })
4925 .collect();
4926 let (fx_instruments, fx_relationships) =
4927 hedge_gen.generate(start_date, &fx_exposures);
4928 snapshot.hedging_instruments.extend(fx_instruments);
4929 snapshot.hedge_relationships.extend(fx_relationships);
4930 }
4931 }
4932
4933 if self.config.treasury.anomaly_rate > 0.0 {
4935 let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4936 seed + 92,
4937 self.config.treasury.anomaly_rate,
4938 );
4939 let mut labels = Vec::new();
4940 labels.extend(
4941 anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4942 );
4943 snapshot.treasury_anomaly_labels = labels;
4944 }
4945
4946 if self.config.treasury.cash_positioning.enabled {
4948 let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4949
4950 for payment in &document_flows.payments {
4952 cash_flows.push(datasynth_generators::treasury::CashFlow {
4953 date: payment.header.document_date,
4954 account_id: format!("{entity_id}-MAIN"),
4955 amount: payment.amount,
4956 direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4957 });
4958 }
4959
4960 for chain in &document_flows.o2c_chains {
4962 if let Some(ref receipt) = chain.customer_receipt {
4963 cash_flows.push(datasynth_generators::treasury::CashFlow {
4964 date: receipt.header.document_date,
4965 account_id: format!("{entity_id}-MAIN"),
4966 amount: receipt.amount,
4967 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4968 });
4969 }
4970 for receipt in &chain.remainder_receipts {
4972 cash_flows.push(datasynth_generators::treasury::CashFlow {
4973 date: receipt.header.document_date,
4974 account_id: format!("{entity_id}-MAIN"),
4975 amount: receipt.amount,
4976 direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4977 });
4978 }
4979 }
4980
4981 if !cash_flows.is_empty() {
4982 let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4983 self.config.treasury.cash_positioning.clone(),
4984 seed + 93,
4985 );
4986 let account_id = format!("{entity_id}-MAIN");
4987 snapshot.cash_positions = cash_gen.generate(
4988 entity_id,
4989 &account_id,
4990 currency,
4991 &cash_flows,
4992 start_date,
4993 start_date + chrono::Months::new(self.config.global.period_months),
4994 rust_decimal::Decimal::new(1_000_000, 0), );
4996 }
4997 }
4998
4999 if self.config.treasury.cash_forecasting.enabled {
5001 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5002
5003 let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
5005 .ar_invoices
5006 .iter()
5007 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
5008 .map(|inv| {
5009 let days_past_due = if inv.due_date < end_date {
5010 (end_date - inv.due_date).num_days().max(0) as u32
5011 } else {
5012 0
5013 };
5014 datasynth_generators::treasury::ArAgingItem {
5015 expected_date: inv.due_date,
5016 amount: inv.amount_remaining,
5017 days_past_due,
5018 document_id: inv.invoice_number.clone(),
5019 }
5020 })
5021 .collect();
5022
5023 let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
5025 .ap_invoices
5026 .iter()
5027 .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
5028 .map(|inv| datasynth_generators::treasury::ApAgingItem {
5029 payment_date: inv.due_date,
5030 amount: inv.amount_remaining,
5031 document_id: inv.invoice_number.clone(),
5032 })
5033 .collect();
5034
5035 let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
5036 self.config.treasury.cash_forecasting.clone(),
5037 seed + 94,
5038 );
5039 let forecast = forecast_gen.generate(
5040 entity_id,
5041 currency,
5042 end_date,
5043 &ar_items,
5044 &ap_items,
5045 &[], );
5047 snapshot.cash_forecasts.push(forecast);
5048 }
5049
5050 if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
5052 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5053 let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
5054 self.config.treasury.cash_pooling.clone(),
5055 seed + 95,
5056 );
5057
5058 let account_ids: Vec<String> = snapshot
5060 .cash_positions
5061 .iter()
5062 .map(|cp| cp.bank_account_id.clone())
5063 .collect::<std::collections::HashSet<_>>()
5064 .into_iter()
5065 .collect();
5066
5067 if let Some(pool) =
5068 pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
5069 {
5070 let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
5072 for cp in &snapshot.cash_positions {
5073 latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
5074 }
5075
5076 let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
5077 latest_balances
5078 .into_iter()
5079 .filter(|(id, _)| pool.participant_accounts.contains(id))
5080 .map(
5081 |(id, balance)| datasynth_generators::treasury::AccountBalance {
5082 account_id: id,
5083 balance,
5084 },
5085 )
5086 .collect();
5087
5088 let sweeps =
5089 pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
5090 snapshot.cash_pool_sweeps = sweeps;
5091 snapshot.cash_pools.push(pool);
5092 }
5093 }
5094
5095 if self.config.treasury.bank_guarantees.enabled {
5097 let vendor_names: Vec<String> = self
5098 .master_data
5099 .vendors
5100 .iter()
5101 .map(|v| v.name.clone())
5102 .collect();
5103 if !vendor_names.is_empty() {
5104 let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
5105 self.config.treasury.bank_guarantees.clone(),
5106 seed + 96,
5107 );
5108 snapshot.bank_guarantees =
5109 bg_gen.generate(entity_id, currency, start_date, &vendor_names);
5110 }
5111 }
5112
5113 if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
5115 let entity_ids: Vec<String> = self
5116 .config
5117 .companies
5118 .iter()
5119 .map(|c| c.code.clone())
5120 .collect();
5121 let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
5122 .matched_pairs
5123 .iter()
5124 .map(|mp| {
5125 (
5126 mp.seller_company.clone(),
5127 mp.buyer_company.clone(),
5128 mp.amount,
5129 )
5130 })
5131 .collect();
5132 if entity_ids.len() >= 2 {
5133 let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
5134 self.config.treasury.netting.clone(),
5135 seed + 97,
5136 );
5137 snapshot.netting_runs = netting_gen.generate(
5138 &entity_ids,
5139 currency,
5140 start_date,
5141 self.config.global.period_months,
5142 &ic_amounts,
5143 );
5144 }
5145 }
5146
5147 stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
5148 stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
5149 stats.cash_position_count = snapshot.cash_positions.len();
5150 stats.cash_forecast_count = snapshot.cash_forecasts.len();
5151 stats.cash_pool_count = snapshot.cash_pools.len();
5152
5153 info!(
5154 "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
5155 snapshot.debt_instruments.len(),
5156 snapshot.hedging_instruments.len(),
5157 snapshot.cash_positions.len(),
5158 snapshot.cash_forecasts.len(),
5159 snapshot.cash_pools.len(),
5160 snapshot.bank_guarantees.len(),
5161 snapshot.netting_runs.len(),
5162 );
5163 self.check_resources_with_log("post-treasury")?;
5164
5165 Ok(snapshot)
5166 }
5167
5168 fn phase_project_accounting(
5170 &mut self,
5171 document_flows: &DocumentFlowSnapshot,
5172 hr: &HrSnapshot,
5173 stats: &mut EnhancedGenerationStatistics,
5174 ) -> SynthResult<ProjectAccountingSnapshot> {
5175 if !self.config.project_accounting.enabled {
5176 debug!("Phase 23: Skipped (project accounting disabled)");
5177 return Ok(ProjectAccountingSnapshot::default());
5178 }
5179 info!("Phase 23: Generating Project Accounting Data");
5180
5181 let seed = self.seed;
5182 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5183 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5184 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5185 let company_code = self
5186 .config
5187 .companies
5188 .first()
5189 .map(|c| c.code.as_str())
5190 .unwrap_or("1000");
5191
5192 let mut snapshot = ProjectAccountingSnapshot::default();
5193
5194 let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
5196 self.config.project_accounting.clone(),
5197 seed + 95,
5198 );
5199 let pool = project_gen.generate(company_code, start_date, end_date);
5200 snapshot.projects = pool.projects.clone();
5201
5202 {
5204 let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
5205 Vec::new();
5206
5207 for te in &hr.time_entries {
5209 let total_hours = te.hours_regular + te.hours_overtime;
5210 if total_hours > 0.0 {
5211 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5212 id: te.entry_id.clone(),
5213 entity_id: company_code.to_string(),
5214 date: te.date,
5215 amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
5216 .unwrap_or(rust_decimal::Decimal::ZERO),
5217 source_type: CostSourceType::TimeEntry,
5218 hours: Some(
5219 rust_decimal::Decimal::from_f64_retain(total_hours)
5220 .unwrap_or(rust_decimal::Decimal::ZERO),
5221 ),
5222 });
5223 }
5224 }
5225
5226 for er in &hr.expense_reports {
5228 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5229 id: er.report_id.clone(),
5230 entity_id: company_code.to_string(),
5231 date: er.submission_date,
5232 amount: er.total_amount,
5233 source_type: CostSourceType::ExpenseReport,
5234 hours: None,
5235 });
5236 }
5237
5238 for po in &document_flows.purchase_orders {
5240 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5241 id: po.header.document_id.clone(),
5242 entity_id: company_code.to_string(),
5243 date: po.header.document_date,
5244 amount: po.total_net_amount,
5245 source_type: CostSourceType::PurchaseOrder,
5246 hours: None,
5247 });
5248 }
5249
5250 for vi in &document_flows.vendor_invoices {
5252 source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5253 id: vi.header.document_id.clone(),
5254 entity_id: company_code.to_string(),
5255 date: vi.header.document_date,
5256 amount: vi.payable_amount,
5257 source_type: CostSourceType::VendorInvoice,
5258 hours: None,
5259 });
5260 }
5261
5262 if !source_docs.is_empty() && !pool.projects.is_empty() {
5263 let mut cost_gen =
5264 datasynth_generators::project_accounting::ProjectCostGenerator::new(
5265 self.config.project_accounting.cost_allocation.clone(),
5266 seed + 99,
5267 );
5268 snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
5269 }
5270 }
5271
5272 if self.config.project_accounting.change_orders.enabled {
5274 let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
5275 self.config.project_accounting.change_orders.clone(),
5276 seed + 96,
5277 );
5278 snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
5279 }
5280
5281 if self.config.project_accounting.milestones.enabled {
5283 let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
5284 self.config.project_accounting.milestones.clone(),
5285 seed + 97,
5286 );
5287 snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
5288 }
5289
5290 if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
5292 let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
5293 self.config.project_accounting.earned_value.clone(),
5294 seed + 98,
5295 );
5296 snapshot.earned_value_metrics =
5297 evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
5298 }
5299
5300 stats.project_count = snapshot.projects.len();
5301 stats.project_change_order_count = snapshot.change_orders.len();
5302 stats.project_cost_line_count = snapshot.cost_lines.len();
5303
5304 info!(
5305 "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
5306 snapshot.projects.len(),
5307 snapshot.change_orders.len(),
5308 snapshot.milestones.len(),
5309 snapshot.earned_value_metrics.len()
5310 );
5311 self.check_resources_with_log("post-project-accounting")?;
5312
5313 Ok(snapshot)
5314 }
5315
5316 fn phase_evolution_events(
5318 &mut self,
5319 stats: &mut EnhancedGenerationStatistics,
5320 ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
5321 if !self.phase_config.generate_evolution_events {
5322 debug!("Phase 24: Skipped (evolution events disabled)");
5323 return Ok((Vec::new(), Vec::new()));
5324 }
5325 info!("Phase 24: Generating Process Evolution + Organizational Events");
5326
5327 let seed = self.seed;
5328 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5329 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5330 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5331
5332 let mut proc_gen =
5334 datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
5335 seed + 100,
5336 );
5337 let process_events = proc_gen.generate_events(start_date, end_date);
5338
5339 let company_codes: Vec<String> = self
5341 .config
5342 .companies
5343 .iter()
5344 .map(|c| c.code.clone())
5345 .collect();
5346 let mut org_gen =
5347 datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
5348 seed + 101,
5349 );
5350 let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
5351
5352 stats.process_evolution_event_count = process_events.len();
5353 stats.organizational_event_count = org_events.len();
5354
5355 info!(
5356 "Evolution events generated: {} process evolution, {} organizational",
5357 process_events.len(),
5358 org_events.len()
5359 );
5360 self.check_resources_with_log("post-evolution-events")?;
5361
5362 Ok((process_events, org_events))
5363 }
5364
5365 fn phase_disruption_events(
5368 &self,
5369 stats: &mut EnhancedGenerationStatistics,
5370 ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
5371 if !self.config.organizational_events.enabled {
5372 debug!("Phase 24b: Skipped (organizational events disabled)");
5373 return Ok(Vec::new());
5374 }
5375 info!("Phase 24b: Generating Disruption Events");
5376
5377 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5378 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5379 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5380
5381 let company_codes: Vec<String> = self
5382 .config
5383 .companies
5384 .iter()
5385 .map(|c| c.code.clone())
5386 .collect();
5387
5388 let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
5389 let events = gen.generate(start_date, end_date, &company_codes);
5390
5391 stats.disruption_event_count = events.len();
5392 info!("Disruption events generated: {} events", events.len());
5393 self.check_resources_with_log("post-disruption-events")?;
5394
5395 Ok(events)
5396 }
5397
5398 fn phase_counterfactuals(
5405 &self,
5406 journal_entries: &[JournalEntry],
5407 stats: &mut EnhancedGenerationStatistics,
5408 ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
5409 if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
5410 debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
5411 return Ok(Vec::new());
5412 }
5413 info!("Phase 25: Generating Counterfactual Pairs for ML Training");
5414
5415 use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
5416
5417 let mut gen = CounterfactualGenerator::new(self.seed + 110);
5418
5419 let specs = [
5421 CounterfactualSpec::ScaleAmount { factor: 2.5 },
5422 CounterfactualSpec::ShiftDate { days: -14 },
5423 CounterfactualSpec::SelfApprove,
5424 CounterfactualSpec::SplitTransaction { split_count: 3 },
5425 ];
5426
5427 let pairs: Vec<_> = journal_entries
5428 .iter()
5429 .enumerate()
5430 .map(|(i, je)| {
5431 let spec = &specs[i % specs.len()];
5432 gen.generate(je, spec)
5433 })
5434 .collect();
5435
5436 stats.counterfactual_pair_count = pairs.len();
5437 info!(
5438 "Counterfactual pairs generated: {} pairs from {} journal entries",
5439 pairs.len(),
5440 journal_entries.len()
5441 );
5442 self.check_resources_with_log("post-counterfactuals")?;
5443
5444 Ok(pairs)
5445 }
5446
5447 fn phase_red_flags(
5454 &self,
5455 anomaly_labels: &AnomalyLabels,
5456 document_flows: &DocumentFlowSnapshot,
5457 stats: &mut EnhancedGenerationStatistics,
5458 ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
5459 if !self.config.fraud.enabled {
5460 debug!("Phase 26: Skipped (fraud generation disabled)");
5461 return Ok(Vec::new());
5462 }
5463 info!("Phase 26: Generating Fraud Red-Flag Indicators");
5464
5465 use datasynth_generators::fraud::RedFlagGenerator;
5466
5467 let generator = RedFlagGenerator::new();
5468 let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
5469
5470 let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
5472 .labels
5473 .iter()
5474 .filter(|label| label.anomaly_type.is_intentional())
5475 .map(|label| label.document_id.as_str())
5476 .collect();
5477
5478 let mut flags = Vec::new();
5479
5480 for chain in &document_flows.p2p_chains {
5482 let doc_id = &chain.purchase_order.header.document_id;
5483 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5484 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5485 }
5486
5487 for chain in &document_flows.o2c_chains {
5489 let doc_id = &chain.sales_order.header.document_id;
5490 let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5491 flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5492 }
5493
5494 stats.red_flag_count = flags.len();
5495 info!(
5496 "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
5497 flags.len(),
5498 document_flows.p2p_chains.len(),
5499 document_flows.o2c_chains.len(),
5500 fraud_doc_ids.len()
5501 );
5502 self.check_resources_with_log("post-red-flags")?;
5503
5504 Ok(flags)
5505 }
5506
5507 fn phase_collusion_rings(
5513 &mut self,
5514 stats: &mut EnhancedGenerationStatistics,
5515 ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
5516 if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
5517 debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
5518 return Ok(Vec::new());
5519 }
5520 info!("Phase 26b: Generating Collusion Rings");
5521
5522 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5523 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5524 let months = self.config.global.period_months;
5525
5526 let employee_ids: Vec<String> = self
5527 .master_data
5528 .employees
5529 .iter()
5530 .map(|e| e.employee_id.clone())
5531 .collect();
5532 let vendor_ids: Vec<String> = self
5533 .master_data
5534 .vendors
5535 .iter()
5536 .map(|v| v.vendor_id.clone())
5537 .collect();
5538
5539 let mut generator =
5540 datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
5541 let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
5542
5543 stats.collusion_ring_count = rings.len();
5544 info!(
5545 "Collusion rings generated: {} rings, total members: {}",
5546 rings.len(),
5547 rings
5548 .iter()
5549 .map(datasynth_generators::fraud::CollusionRing::size)
5550 .sum::<usize>()
5551 );
5552 self.check_resources_with_log("post-collusion-rings")?;
5553
5554 Ok(rings)
5555 }
5556
5557 fn phase_temporal_attributes(
5562 &mut self,
5563 stats: &mut EnhancedGenerationStatistics,
5564 ) -> SynthResult<
5565 Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
5566 > {
5567 if !self.config.temporal_attributes.enabled {
5568 debug!("Phase 27: Skipped (temporal attributes disabled)");
5569 return Ok(Vec::new());
5570 }
5571 info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
5572
5573 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5574 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5575
5576 let generate_version_chains = self.config.temporal_attributes.generate_version_chains
5580 || self.config.temporal_attributes.enabled;
5581 let temporal_config = {
5582 let ta = &self.config.temporal_attributes;
5583 datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
5584 .enabled(ta.enabled)
5585 .closed_probability(ta.valid_time.closed_probability)
5586 .avg_validity_days(ta.valid_time.avg_validity_days)
5587 .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
5588 .with_version_chains(if generate_version_chains {
5589 ta.avg_versions_per_entity
5590 } else {
5591 1.0
5592 })
5593 .build()
5594 };
5595 let temporal_config = if self
5597 .config
5598 .temporal_attributes
5599 .transaction_time
5600 .allow_backdating
5601 {
5602 let mut c = temporal_config;
5603 c.transaction_time.allow_backdating = true;
5604 c.transaction_time.backdating_probability = self
5605 .config
5606 .temporal_attributes
5607 .transaction_time
5608 .backdating_probability;
5609 c.transaction_time.max_backdate_days = self
5610 .config
5611 .temporal_attributes
5612 .transaction_time
5613 .max_backdate_days;
5614 c
5615 } else {
5616 temporal_config
5617 };
5618 let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
5619 temporal_config,
5620 self.seed + 130,
5621 start_date,
5622 );
5623
5624 let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
5625 self.seed + 130,
5626 datasynth_core::GeneratorType::Vendor,
5627 );
5628
5629 let chains: Vec<_> = self
5630 .master_data
5631 .vendors
5632 .iter()
5633 .map(|vendor| {
5634 let id = uuid_factory.next();
5635 gen.generate_version_chain(vendor.clone(), id)
5636 })
5637 .collect();
5638
5639 stats.temporal_version_chain_count = chains.len();
5640 info!("Temporal version chains generated: {} chains", chains.len());
5641 self.check_resources_with_log("post-temporal-attributes")?;
5642
5643 Ok(chains)
5644 }
5645
5646 fn phase_entity_relationships(
5656 &self,
5657 journal_entries: &[JournalEntry],
5658 document_flows: &DocumentFlowSnapshot,
5659 stats: &mut EnhancedGenerationStatistics,
5660 ) -> SynthResult<(
5661 Option<datasynth_core::models::EntityGraph>,
5662 Vec<datasynth_core::models::CrossProcessLink>,
5663 )> {
5664 use datasynth_generators::relationships::{
5665 DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
5666 TransactionSummary,
5667 };
5668
5669 let rs_enabled = self.config.relationship_strength.enabled;
5670 let cpl_enabled = self.config.cross_process_links.enabled
5671 || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
5672
5673 if !rs_enabled && !cpl_enabled {
5674 debug!(
5675 "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
5676 );
5677 return Ok((None, Vec::new()));
5678 }
5679
5680 info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
5681
5682 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5683 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5684
5685 let company_code = self
5686 .config
5687 .companies
5688 .first()
5689 .map(|c| c.code.as_str())
5690 .unwrap_or("1000");
5691
5692 let gen_config = EntityGraphConfig {
5694 enabled: rs_enabled,
5695 cross_process: datasynth_generators::relationships::CrossProcessConfig {
5696 enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
5697 enable_return_flows: false,
5698 enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
5699 enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
5700 inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
5702 1.0
5703 } else {
5704 0.30
5705 },
5706 ..Default::default()
5707 },
5708 strength_config: datasynth_generators::relationships::StrengthConfig {
5709 transaction_volume_weight: self
5710 .config
5711 .relationship_strength
5712 .calculation
5713 .transaction_volume_weight,
5714 transaction_count_weight: self
5715 .config
5716 .relationship_strength
5717 .calculation
5718 .transaction_count_weight,
5719 duration_weight: self
5720 .config
5721 .relationship_strength
5722 .calculation
5723 .relationship_duration_weight,
5724 recency_weight: self.config.relationship_strength.calculation.recency_weight,
5725 mutual_connections_weight: self
5726 .config
5727 .relationship_strength
5728 .calculation
5729 .mutual_connections_weight,
5730 recency_half_life_days: self
5731 .config
5732 .relationship_strength
5733 .calculation
5734 .recency_half_life_days,
5735 },
5736 ..Default::default()
5737 };
5738
5739 let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
5740
5741 let entity_graph = if rs_enabled {
5743 let vendor_summaries: Vec<EntitySummary> = self
5745 .master_data
5746 .vendors
5747 .iter()
5748 .map(|v| {
5749 EntitySummary::new(
5750 &v.vendor_id,
5751 &v.name,
5752 datasynth_core::models::GraphEntityType::Vendor,
5753 start_date,
5754 )
5755 })
5756 .collect();
5757
5758 let customer_summaries: Vec<EntitySummary> = self
5759 .master_data
5760 .customers
5761 .iter()
5762 .map(|c| {
5763 EntitySummary::new(
5764 &c.customer_id,
5765 &c.name,
5766 datasynth_core::models::GraphEntityType::Customer,
5767 start_date,
5768 )
5769 })
5770 .collect();
5771
5772 let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
5777 std::collections::HashMap::new();
5778
5779 for je in journal_entries {
5780 let cc = je.header.company_code.clone();
5781 let posting_date = je.header.posting_date;
5782 for line in &je.lines {
5783 if let Some(ref tp) = line.trading_partner {
5784 let amount = if line.debit_amount > line.credit_amount {
5785 line.debit_amount
5786 } else {
5787 line.credit_amount
5788 };
5789 let entry = txn_summaries
5790 .entry((cc.clone(), tp.clone()))
5791 .or_insert_with(|| TransactionSummary {
5792 total_volume: rust_decimal::Decimal::ZERO,
5793 transaction_count: 0,
5794 first_transaction_date: posting_date,
5795 last_transaction_date: posting_date,
5796 related_entities: std::collections::HashSet::new(),
5797 });
5798 entry.total_volume += amount;
5799 entry.transaction_count += 1;
5800 if posting_date < entry.first_transaction_date {
5801 entry.first_transaction_date = posting_date;
5802 }
5803 if posting_date > entry.last_transaction_date {
5804 entry.last_transaction_date = posting_date;
5805 }
5806 entry.related_entities.insert(cc.clone());
5807 }
5808 }
5809 }
5810
5811 for chain in &document_flows.p2p_chains {
5814 let cc = chain.purchase_order.header.company_code.clone();
5815 let vendor_id = chain.purchase_order.vendor_id.clone();
5816 let po_date = chain.purchase_order.header.document_date;
5817 let amount = chain.purchase_order.total_net_amount;
5818
5819 let entry = txn_summaries
5820 .entry((cc.clone(), vendor_id))
5821 .or_insert_with(|| TransactionSummary {
5822 total_volume: rust_decimal::Decimal::ZERO,
5823 transaction_count: 0,
5824 first_transaction_date: po_date,
5825 last_transaction_date: po_date,
5826 related_entities: std::collections::HashSet::new(),
5827 });
5828 entry.total_volume += amount;
5829 entry.transaction_count += 1;
5830 if po_date < entry.first_transaction_date {
5831 entry.first_transaction_date = po_date;
5832 }
5833 if po_date > entry.last_transaction_date {
5834 entry.last_transaction_date = po_date;
5835 }
5836 entry.related_entities.insert(cc);
5837 }
5838
5839 for chain in &document_flows.o2c_chains {
5841 let cc = chain.sales_order.header.company_code.clone();
5842 let customer_id = chain.sales_order.customer_id.clone();
5843 let so_date = chain.sales_order.header.document_date;
5844 let amount = chain.sales_order.total_net_amount;
5845
5846 let entry = txn_summaries
5847 .entry((cc.clone(), customer_id))
5848 .or_insert_with(|| TransactionSummary {
5849 total_volume: rust_decimal::Decimal::ZERO,
5850 transaction_count: 0,
5851 first_transaction_date: so_date,
5852 last_transaction_date: so_date,
5853 related_entities: std::collections::HashSet::new(),
5854 });
5855 entry.total_volume += amount;
5856 entry.transaction_count += 1;
5857 if so_date < entry.first_transaction_date {
5858 entry.first_transaction_date = so_date;
5859 }
5860 if so_date > entry.last_transaction_date {
5861 entry.last_transaction_date = so_date;
5862 }
5863 entry.related_entities.insert(cc);
5864 }
5865
5866 let as_of_date = journal_entries
5867 .last()
5868 .map(|je| je.header.posting_date)
5869 .unwrap_or(start_date);
5870
5871 let graph = gen.generate_entity_graph(
5872 company_code,
5873 as_of_date,
5874 &vendor_summaries,
5875 &customer_summaries,
5876 &txn_summaries,
5877 );
5878
5879 info!(
5880 "Entity relationship graph: {} nodes, {} edges",
5881 graph.nodes.len(),
5882 graph.edges.len()
5883 );
5884 stats.entity_relationship_node_count = graph.nodes.len();
5885 stats.entity_relationship_edge_count = graph.edges.len();
5886 Some(graph)
5887 } else {
5888 None
5889 };
5890
5891 let cross_process_links = if cpl_enabled {
5893 let gr_refs: Vec<GoodsReceiptRef> = document_flows
5895 .p2p_chains
5896 .iter()
5897 .flat_map(|chain| {
5898 let vendor_id = chain.purchase_order.vendor_id.clone();
5899 let cc = chain.purchase_order.header.company_code.clone();
5900 chain.goods_receipts.iter().flat_map(move |gr| {
5901 gr.items.iter().filter_map({
5902 let doc_id = gr.header.document_id.clone();
5903 let v_id = vendor_id.clone();
5904 let company = cc.clone();
5905 let receipt_date = gr.header.document_date;
5906 move |item| {
5907 item.base
5908 .material_id
5909 .as_ref()
5910 .map(|mat_id| GoodsReceiptRef {
5911 document_id: doc_id.clone(),
5912 material_id: mat_id.clone(),
5913 quantity: item.base.quantity,
5914 receipt_date,
5915 vendor_id: v_id.clone(),
5916 company_code: company.clone(),
5917 })
5918 }
5919 })
5920 })
5921 })
5922 .collect();
5923
5924 let del_refs: Vec<DeliveryRef> = document_flows
5926 .o2c_chains
5927 .iter()
5928 .flat_map(|chain| {
5929 let customer_id = chain.sales_order.customer_id.clone();
5930 let cc = chain.sales_order.header.company_code.clone();
5931 chain.deliveries.iter().flat_map(move |del| {
5932 let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
5933 del.items.iter().filter_map({
5934 let doc_id = del.header.document_id.clone();
5935 let c_id = customer_id.clone();
5936 let company = cc.clone();
5937 move |item| {
5938 item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
5939 document_id: doc_id.clone(),
5940 material_id: mat_id.clone(),
5941 quantity: item.base.quantity,
5942 delivery_date,
5943 customer_id: c_id.clone(),
5944 company_code: company.clone(),
5945 })
5946 }
5947 })
5948 })
5949 })
5950 .collect();
5951
5952 let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
5953 info!("Cross-process links generated: {} links", links.len());
5954 stats.cross_process_link_count = links.len();
5955 links
5956 } else {
5957 Vec::new()
5958 };
5959
5960 self.check_resources_with_log("post-entity-relationships")?;
5961 Ok((entity_graph, cross_process_links))
5962 }
5963
5964 fn phase_industry_data(
5966 &self,
5967 stats: &mut EnhancedGenerationStatistics,
5968 ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
5969 if !self.config.industry_specific.enabled {
5970 return None;
5971 }
5972 info!("Phase 29: Generating industry-specific data");
5973 let output = datasynth_generators::industry::factory::generate_industry_output(
5974 self.config.global.industry,
5975 );
5976 stats.industry_gl_account_count = output.gl_accounts.len();
5977 info!(
5978 "Industry data generated: {} GL accounts for {:?}",
5979 output.gl_accounts.len(),
5980 self.config.global.industry
5981 );
5982 Some(output)
5983 }
5984
5985 fn phase_opening_balances(
5987 &mut self,
5988 coa: &Arc<ChartOfAccounts>,
5989 stats: &mut EnhancedGenerationStatistics,
5990 ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
5991 if !self.config.balance.generate_opening_balances {
5992 debug!("Phase 3b: Skipped (opening balance generation disabled)");
5993 return Ok(Vec::new());
5994 }
5995 info!("Phase 3b: Generating Opening Balances");
5996
5997 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5998 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5999 let fiscal_year = start_date.year();
6000
6001 let industry = match self.config.global.industry {
6002 IndustrySector::Manufacturing => IndustryType::Manufacturing,
6003 IndustrySector::Retail => IndustryType::Retail,
6004 IndustrySector::FinancialServices => IndustryType::Financial,
6005 IndustrySector::Healthcare => IndustryType::Healthcare,
6006 IndustrySector::Technology => IndustryType::Technology,
6007 _ => IndustryType::Manufacturing,
6008 };
6009
6010 let config = datasynth_generators::OpeningBalanceConfig {
6011 industry,
6012 ..Default::default()
6013 };
6014 let mut gen =
6015 datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
6016
6017 let mut results = Vec::new();
6018 for company in &self.config.companies {
6019 let spec = OpeningBalanceSpec::new(
6020 company.code.clone(),
6021 start_date,
6022 fiscal_year,
6023 company.currency.clone(),
6024 rust_decimal::Decimal::new(10_000_000, 0),
6025 industry,
6026 );
6027 let ob = gen.generate(&spec, coa, start_date, &company.code);
6028 results.push(ob);
6029 }
6030
6031 stats.opening_balance_count = results.len();
6032 info!("Opening balances generated: {} companies", results.len());
6033 self.check_resources_with_log("post-opening-balances")?;
6034
6035 Ok(results)
6036 }
6037
6038 fn phase_subledger_reconciliation(
6040 &mut self,
6041 subledger: &SubledgerSnapshot,
6042 entries: &[JournalEntry],
6043 stats: &mut EnhancedGenerationStatistics,
6044 ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
6045 if !self.config.balance.reconcile_subledgers {
6046 debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
6047 return Ok(Vec::new());
6048 }
6049 info!("Phase 9b: Reconciling GL to subledger balances");
6050
6051 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6052 .map(|d| d + chrono::Months::new(self.config.global.period_months))
6053 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6054
6055 let tracker_config = BalanceTrackerConfig {
6057 validate_on_each_entry: false,
6058 track_history: false,
6059 fail_on_validation_error: false,
6060 ..Default::default()
6061 };
6062 let recon_currency = self
6063 .config
6064 .companies
6065 .first()
6066 .map(|c| c.currency.clone())
6067 .unwrap_or_else(|| "USD".to_string());
6068 let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
6069 let validation_errors = tracker.apply_entries(entries);
6070 if !validation_errors.is_empty() {
6071 warn!(
6072 error_count = validation_errors.len(),
6073 "Balance tracker encountered validation errors during subledger reconciliation"
6074 );
6075 for err in &validation_errors {
6076 debug!("Balance validation error: {:?}", err);
6077 }
6078 }
6079
6080 let mut engine = datasynth_generators::ReconciliationEngine::new(
6081 datasynth_generators::ReconciliationConfig::default(),
6082 );
6083
6084 let mut results = Vec::new();
6085 let company_code = self
6086 .config
6087 .companies
6088 .first()
6089 .map(|c| c.code.as_str())
6090 .unwrap_or("1000");
6091
6092 if !subledger.ar_invoices.is_empty() {
6094 let gl_balance = tracker
6095 .get_account_balance(
6096 company_code,
6097 datasynth_core::accounts::control_accounts::AR_CONTROL,
6098 )
6099 .map(|b| b.closing_balance)
6100 .unwrap_or_default();
6101 let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
6102 results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
6103 }
6104
6105 if !subledger.ap_invoices.is_empty() {
6107 let gl_balance = tracker
6108 .get_account_balance(
6109 company_code,
6110 datasynth_core::accounts::control_accounts::AP_CONTROL,
6111 )
6112 .map(|b| b.closing_balance)
6113 .unwrap_or_default();
6114 let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
6115 results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
6116 }
6117
6118 if !subledger.fa_records.is_empty() {
6120 let gl_asset_balance = tracker
6121 .get_account_balance(
6122 company_code,
6123 datasynth_core::accounts::control_accounts::FIXED_ASSETS,
6124 )
6125 .map(|b| b.closing_balance)
6126 .unwrap_or_default();
6127 let gl_accum_depr_balance = tracker
6128 .get_account_balance(
6129 company_code,
6130 datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
6131 )
6132 .map(|b| b.closing_balance)
6133 .unwrap_or_default();
6134 let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
6135 subledger.fa_records.iter().collect();
6136 let (asset_recon, depr_recon) = engine.reconcile_fa(
6137 company_code,
6138 end_date,
6139 gl_asset_balance,
6140 gl_accum_depr_balance,
6141 &fa_refs,
6142 );
6143 results.push(asset_recon);
6144 results.push(depr_recon);
6145 }
6146
6147 if !subledger.inventory_positions.is_empty() {
6149 let gl_balance = tracker
6150 .get_account_balance(
6151 company_code,
6152 datasynth_core::accounts::control_accounts::INVENTORY,
6153 )
6154 .map(|b| b.closing_balance)
6155 .unwrap_or_default();
6156 let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
6157 subledger.inventory_positions.iter().collect();
6158 results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
6159 }
6160
6161 stats.subledger_reconciliation_count = results.len();
6162 info!(
6163 "Subledger reconciliation complete: {} reconciliations",
6164 results.len()
6165 );
6166 self.check_resources_with_log("post-subledger-reconciliation")?;
6167
6168 Ok(results)
6169 }
6170
6171 fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
6173 let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
6174
6175 let coa_framework = self.resolve_coa_framework();
6176
6177 let mut gen = ChartOfAccountsGenerator::new(
6178 self.config.chart_of_accounts.complexity,
6179 self.config.global.industry,
6180 self.seed,
6181 )
6182 .with_coa_framework(coa_framework);
6183
6184 let coa = Arc::new(gen.generate());
6185 self.coa = Some(Arc::clone(&coa));
6186
6187 if let Some(pb) = pb {
6188 pb.finish_with_message("Chart of Accounts complete");
6189 }
6190
6191 Ok(coa)
6192 }
6193
6194 fn generate_master_data(&mut self) -> SynthResult<()> {
6196 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6197 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6198 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6199
6200 let total = self.config.companies.len() as u64 * 5; let pb = self.create_progress_bar(total, "Generating Master Data");
6202
6203 let pack = self.primary_pack().clone();
6205
6206 let vendors_per_company = self.phase_config.vendors_per_company;
6208 let customers_per_company = self.phase_config.customers_per_company;
6209 let materials_per_company = self.phase_config.materials_per_company;
6210 let assets_per_company = self.phase_config.assets_per_company;
6211 let coa_framework = self.resolve_coa_framework();
6212
6213 let per_company_results: Vec<_> = self
6216 .config
6217 .companies
6218 .par_iter()
6219 .enumerate()
6220 .map(|(i, company)| {
6221 let company_seed = self.seed.wrapping_add(i as u64 * 1000);
6222 let pack = pack.clone();
6223
6224 let mut vendor_gen = VendorGenerator::new(company_seed);
6226 vendor_gen.set_country_pack(pack.clone());
6227 vendor_gen.set_coa_framework(coa_framework);
6228 vendor_gen.set_counter_offset(i * vendors_per_company);
6229 let vendor_pool =
6230 vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
6231
6232 let mut customer_gen = CustomerGenerator::new(company_seed + 100);
6234 customer_gen.set_country_pack(pack.clone());
6235 customer_gen.set_coa_framework(coa_framework);
6236 customer_gen.set_counter_offset(i * customers_per_company);
6237 let customer_pool = customer_gen.generate_customer_pool(
6238 customers_per_company,
6239 &company.code,
6240 start_date,
6241 );
6242
6243 let mut material_gen = MaterialGenerator::new(company_seed + 200);
6245 material_gen.set_country_pack(pack.clone());
6246 material_gen.set_counter_offset(i * materials_per_company);
6247 let material_pool = material_gen.generate_material_pool(
6248 materials_per_company,
6249 &company.code,
6250 start_date,
6251 );
6252
6253 let mut asset_gen = AssetGenerator::new(company_seed + 300);
6255 let asset_pool = asset_gen.generate_asset_pool(
6256 assets_per_company,
6257 &company.code,
6258 (start_date, end_date),
6259 );
6260
6261 let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
6263 employee_gen.set_country_pack(pack);
6264 let employee_pool =
6265 employee_gen.generate_company_pool(&company.code, (start_date, end_date));
6266
6267 (
6268 vendor_pool.vendors,
6269 customer_pool.customers,
6270 material_pool.materials,
6271 asset_pool.assets,
6272 employee_pool.employees,
6273 )
6274 })
6275 .collect();
6276
6277 for (vendors, customers, materials, assets, employees) in per_company_results {
6279 self.master_data.vendors.extend(vendors);
6280 self.master_data.customers.extend(customers);
6281 self.master_data.materials.extend(materials);
6282 self.master_data.assets.extend(assets);
6283 self.master_data.employees.extend(employees);
6284 }
6285
6286 if let Some(pb) = &pb {
6287 pb.inc(total);
6288 }
6289 if let Some(pb) = pb {
6290 pb.finish_with_message("Master data generation complete");
6291 }
6292
6293 Ok(())
6294 }
6295
6296 fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
6298 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6299 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6300
6301 let months = (self.config.global.period_months as usize).max(1);
6304 let p2p_count = self
6305 .phase_config
6306 .p2p_chains
6307 .min(self.master_data.vendors.len() * 2 * months);
6308 let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
6309
6310 let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
6312 let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
6313 p2p_gen.set_country_pack(self.primary_pack().clone());
6314
6315 for i in 0..p2p_count {
6316 let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
6317 let materials: Vec<&Material> = self
6318 .master_data
6319 .materials
6320 .iter()
6321 .skip(i % self.master_data.materials.len().max(1))
6322 .take(2.min(self.master_data.materials.len()))
6323 .collect();
6324
6325 if materials.is_empty() {
6326 continue;
6327 }
6328
6329 let company = &self.config.companies[i % self.config.companies.len()];
6330 let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
6331 let fiscal_period = po_date.month() as u8;
6332 let created_by = if self.master_data.employees.is_empty() {
6333 "SYSTEM"
6334 } else {
6335 self.master_data.employees[i % self.master_data.employees.len()]
6336 .user_id
6337 .as_str()
6338 };
6339
6340 let chain = p2p_gen.generate_chain(
6341 &company.code,
6342 vendor,
6343 &materials,
6344 po_date,
6345 start_date.year() as u16,
6346 fiscal_period,
6347 created_by,
6348 );
6349
6350 flows.purchase_orders.push(chain.purchase_order.clone());
6352 flows.goods_receipts.extend(chain.goods_receipts.clone());
6353 if let Some(vi) = &chain.vendor_invoice {
6354 flows.vendor_invoices.push(vi.clone());
6355 }
6356 if let Some(payment) = &chain.payment {
6357 flows.payments.push(payment.clone());
6358 }
6359 for remainder in &chain.remainder_payments {
6360 flows.payments.push(remainder.clone());
6361 }
6362 flows.p2p_chains.push(chain);
6363
6364 if let Some(pb) = &pb {
6365 pb.inc(1);
6366 }
6367 }
6368
6369 if let Some(pb) = pb {
6370 pb.finish_with_message("P2P document flows complete");
6371 }
6372
6373 let o2c_count = self
6376 .phase_config
6377 .o2c_chains
6378 .min(self.master_data.customers.len() * 2 * months);
6379 let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
6380
6381 let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
6383 let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
6384 o2c_gen.set_country_pack(self.primary_pack().clone());
6385
6386 for i in 0..o2c_count {
6387 let customer = &self.master_data.customers[i % self.master_data.customers.len()];
6388 let materials: Vec<&Material> = self
6389 .master_data
6390 .materials
6391 .iter()
6392 .skip(i % self.master_data.materials.len().max(1))
6393 .take(2.min(self.master_data.materials.len()))
6394 .collect();
6395
6396 if materials.is_empty() {
6397 continue;
6398 }
6399
6400 let company = &self.config.companies[i % self.config.companies.len()];
6401 let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
6402 let fiscal_period = so_date.month() as u8;
6403 let created_by = if self.master_data.employees.is_empty() {
6404 "SYSTEM"
6405 } else {
6406 self.master_data.employees[i % self.master_data.employees.len()]
6407 .user_id
6408 .as_str()
6409 };
6410
6411 let chain = o2c_gen.generate_chain(
6412 &company.code,
6413 customer,
6414 &materials,
6415 so_date,
6416 start_date.year() as u16,
6417 fiscal_period,
6418 created_by,
6419 );
6420
6421 flows.sales_orders.push(chain.sales_order.clone());
6423 flows.deliveries.extend(chain.deliveries.clone());
6424 if let Some(ci) = &chain.customer_invoice {
6425 flows.customer_invoices.push(ci.clone());
6426 }
6427 if let Some(receipt) = &chain.customer_receipt {
6428 flows.payments.push(receipt.clone());
6429 }
6430 for receipt in &chain.remainder_receipts {
6432 flows.payments.push(receipt.clone());
6433 }
6434 flows.o2c_chains.push(chain);
6435
6436 if let Some(pb) = &pb {
6437 pb.inc(1);
6438 }
6439 }
6440
6441 if let Some(pb) = pb {
6442 pb.finish_with_message("O2C document flows complete");
6443 }
6444
6445 Ok(())
6446 }
6447
6448 fn generate_journal_entries(
6450 &mut self,
6451 coa: &Arc<ChartOfAccounts>,
6452 ) -> SynthResult<Vec<JournalEntry>> {
6453 use datasynth_core::traits::ParallelGenerator;
6454
6455 let total = self.calculate_total_transactions();
6456 let pb = self.create_progress_bar(total, "Generating Journal Entries");
6457
6458 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6459 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6460 let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6461
6462 let company_codes: Vec<String> = self
6463 .config
6464 .companies
6465 .iter()
6466 .map(|c| c.code.clone())
6467 .collect();
6468
6469 let generator = JournalEntryGenerator::new_with_params(
6470 self.config.transactions.clone(),
6471 Arc::clone(coa),
6472 company_codes,
6473 start_date,
6474 end_date,
6475 self.seed,
6476 );
6477
6478 let je_pack = self.primary_pack();
6482
6483 let mut generator = generator
6484 .with_master_data(
6485 &self.master_data.vendors,
6486 &self.master_data.customers,
6487 &self.master_data.materials,
6488 )
6489 .with_country_pack_names(je_pack)
6490 .with_country_pack_temporal(
6491 self.config.temporal_patterns.clone(),
6492 self.seed + 200,
6493 je_pack,
6494 )
6495 .with_persona_errors(true)
6496 .with_fraud_config(self.config.fraud.clone());
6497
6498 if self.config.temporal.enabled {
6500 let drift_config = self.config.temporal.to_core_config();
6501 generator = generator.with_drift_config(drift_config, self.seed + 100);
6502 }
6503
6504 self.check_memory_limit()?;
6506
6507 let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
6509
6510 let entries = if total >= 10_000 && num_threads > 1 {
6514 let sub_generators = generator.split(num_threads);
6517 let entries_per_thread = total as usize / num_threads;
6518 let remainder = total as usize % num_threads;
6519
6520 let batches: Vec<Vec<JournalEntry>> = sub_generators
6521 .into_par_iter()
6522 .enumerate()
6523 .map(|(i, mut gen)| {
6524 let count = entries_per_thread + if i < remainder { 1 } else { 0 };
6525 gen.generate_batch(count)
6526 })
6527 .collect();
6528
6529 let entries = JournalEntryGenerator::merge_results(batches);
6531
6532 if let Some(pb) = &pb {
6533 pb.inc(total);
6534 }
6535 entries
6536 } else {
6537 let mut entries = Vec::with_capacity(total as usize);
6539 for _ in 0..total {
6540 let entry = generator.generate();
6541 entries.push(entry);
6542 if let Some(pb) = &pb {
6543 pb.inc(1);
6544 }
6545 }
6546 entries
6547 };
6548
6549 if let Some(pb) = pb {
6550 pb.finish_with_message("Journal entries complete");
6551 }
6552
6553 Ok(entries)
6554 }
6555
6556 fn generate_jes_from_document_flows(
6561 &mut self,
6562 flows: &DocumentFlowSnapshot,
6563 ) -> SynthResult<Vec<JournalEntry>> {
6564 let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
6565 let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
6566
6567 let je_config = match self.resolve_coa_framework() {
6568 CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
6569 CoAFramework::GermanSkr04 => {
6570 let fa = datasynth_core::FrameworkAccounts::german_gaap();
6571 DocumentFlowJeConfig::from(&fa)
6572 }
6573 CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
6574 };
6575
6576 let populate_fec = je_config.populate_fec_fields;
6577 let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
6578
6579 if populate_fec {
6583 let mut aux_lookup = std::collections::HashMap::new();
6584 for vendor in &self.master_data.vendors {
6585 if let Some(ref aux) = vendor.auxiliary_gl_account {
6586 aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
6587 }
6588 }
6589 for customer in &self.master_data.customers {
6590 if let Some(ref aux) = customer.auxiliary_gl_account {
6591 aux_lookup.insert(customer.customer_id.clone(), aux.clone());
6592 }
6593 }
6594 if !aux_lookup.is_empty() {
6595 generator.set_auxiliary_account_lookup(aux_lookup);
6596 }
6597 }
6598
6599 let mut entries = Vec::new();
6600
6601 for chain in &flows.p2p_chains {
6603 let chain_entries = generator.generate_from_p2p_chain(chain);
6604 entries.extend(chain_entries);
6605 if let Some(pb) = &pb {
6606 pb.inc(1);
6607 }
6608 }
6609
6610 for chain in &flows.o2c_chains {
6612 let chain_entries = generator.generate_from_o2c_chain(chain);
6613 entries.extend(chain_entries);
6614 if let Some(pb) = &pb {
6615 pb.inc(1);
6616 }
6617 }
6618
6619 if let Some(pb) = pb {
6620 pb.finish_with_message(format!(
6621 "Generated {} JEs from document flows",
6622 entries.len()
6623 ));
6624 }
6625
6626 Ok(entries)
6627 }
6628
6629 fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
6635 use datasynth_core::accounts::{expense_accounts, suspense_accounts};
6636
6637 let mut jes = Vec::with_capacity(payroll_runs.len());
6638
6639 for run in payroll_runs {
6640 let mut je = JournalEntry::new_simple(
6641 format!("JE-PAYROLL-{}", run.payroll_id),
6642 run.company_code.clone(),
6643 run.run_date,
6644 format!("Payroll {}", run.payroll_id),
6645 );
6646
6647 je.add_line(JournalEntryLine {
6649 line_number: 1,
6650 gl_account: expense_accounts::SALARIES_WAGES.to_string(),
6651 debit_amount: run.total_gross,
6652 reference: Some(run.payroll_id.clone()),
6653 text: Some(format!(
6654 "Payroll {} ({} employees)",
6655 run.payroll_id, run.employee_count
6656 )),
6657 ..Default::default()
6658 });
6659
6660 je.add_line(JournalEntryLine {
6662 line_number: 2,
6663 gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
6664 credit_amount: run.total_gross,
6665 reference: Some(run.payroll_id.clone()),
6666 ..Default::default()
6667 });
6668
6669 jes.push(je);
6670 }
6671
6672 jes
6673 }
6674
6675 fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
6681 use datasynth_core::accounts::{control_accounts, expense_accounts};
6682 use datasynth_core::models::ProductionOrderStatus;
6683
6684 let mut jes = Vec::new();
6685
6686 for order in production_orders {
6687 if !matches!(
6689 order.status,
6690 ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
6691 ) {
6692 continue;
6693 }
6694
6695 let mut je = JournalEntry::new_simple(
6696 format!("JE-MFG-{}", order.order_id),
6697 order.company_code.clone(),
6698 order.actual_end.unwrap_or(order.planned_end),
6699 format!(
6700 "Production Order {} - {}",
6701 order.order_id, order.material_description
6702 ),
6703 );
6704
6705 je.add_line(JournalEntryLine {
6707 line_number: 1,
6708 gl_account: expense_accounts::RAW_MATERIALS.to_string(),
6709 debit_amount: order.actual_cost,
6710 reference: Some(order.order_id.clone()),
6711 text: Some(format!(
6712 "Material consumption for {}",
6713 order.material_description
6714 )),
6715 quantity: Some(order.actual_quantity),
6716 unit: Some("EA".to_string()),
6717 ..Default::default()
6718 });
6719
6720 je.add_line(JournalEntryLine {
6722 line_number: 2,
6723 gl_account: control_accounts::INVENTORY.to_string(),
6724 credit_amount: order.actual_cost,
6725 reference: Some(order.order_id.clone()),
6726 ..Default::default()
6727 });
6728
6729 jes.push(je);
6730 }
6731
6732 jes
6733 }
6734
6735 fn link_document_flows_to_subledgers(
6740 &mut self,
6741 flows: &DocumentFlowSnapshot,
6742 ) -> SynthResult<SubledgerSnapshot> {
6743 let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
6744 let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
6745
6746 let vendor_names: std::collections::HashMap<String, String> = self
6748 .master_data
6749 .vendors
6750 .iter()
6751 .map(|v| (v.vendor_id.clone(), v.name.clone()))
6752 .collect();
6753 let customer_names: std::collections::HashMap<String, String> = self
6754 .master_data
6755 .customers
6756 .iter()
6757 .map(|c| (c.customer_id.clone(), c.name.clone()))
6758 .collect();
6759
6760 let mut linker = DocumentFlowLinker::new()
6761 .with_vendor_names(vendor_names)
6762 .with_customer_names(customer_names);
6763
6764 let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
6766 if let Some(pb) = &pb {
6767 pb.inc(flows.vendor_invoices.len() as u64);
6768 }
6769
6770 let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
6772 if let Some(pb) = &pb {
6773 pb.inc(flows.customer_invoices.len() as u64);
6774 }
6775
6776 if let Some(pb) = pb {
6777 pb.finish_with_message(format!(
6778 "Linked {} AP and {} AR invoices",
6779 ap_invoices.len(),
6780 ar_invoices.len()
6781 ));
6782 }
6783
6784 Ok(SubledgerSnapshot {
6785 ap_invoices,
6786 ar_invoices,
6787 fa_records: Vec::new(),
6788 inventory_positions: Vec::new(),
6789 inventory_movements: Vec::new(),
6790 })
6791 }
6792
6793 #[allow(clippy::too_many_arguments)]
6798 fn generate_ocpm_events(
6799 &mut self,
6800 flows: &DocumentFlowSnapshot,
6801 sourcing: &SourcingSnapshot,
6802 hr: &HrSnapshot,
6803 manufacturing: &ManufacturingSnapshot,
6804 banking: &BankingSnapshot,
6805 audit: &AuditSnapshot,
6806 financial_reporting: &FinancialReportingSnapshot,
6807 ) -> SynthResult<OcpmSnapshot> {
6808 let total_chains = flows.p2p_chains.len()
6809 + flows.o2c_chains.len()
6810 + sourcing.sourcing_projects.len()
6811 + hr.payroll_runs.len()
6812 + manufacturing.production_orders.len()
6813 + banking.customers.len()
6814 + audit.engagements.len()
6815 + financial_reporting.bank_reconciliations.len();
6816 let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
6817
6818 let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
6820 let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
6821
6822 let ocpm_config = OcpmGeneratorConfig {
6824 generate_p2p: true,
6825 generate_o2c: true,
6826 generate_s2c: !sourcing.sourcing_projects.is_empty(),
6827 generate_h2r: !hr.payroll_runs.is_empty(),
6828 generate_mfg: !manufacturing.production_orders.is_empty(),
6829 generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
6830 generate_bank: !banking.customers.is_empty(),
6831 generate_audit: !audit.engagements.is_empty(),
6832 happy_path_rate: 0.75,
6833 exception_path_rate: 0.20,
6834 error_path_rate: 0.05,
6835 add_duration_variability: true,
6836 duration_std_dev_factor: 0.3,
6837 };
6838 let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
6839 let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
6840
6841 let available_users: Vec<String> = self
6843 .master_data
6844 .employees
6845 .iter()
6846 .take(20)
6847 .map(|e| e.user_id.clone())
6848 .collect();
6849
6850 let fallback_date =
6852 NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
6853 let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6854 .unwrap_or(fallback_date);
6855 let base_midnight = base_date
6856 .and_hms_opt(0, 0, 0)
6857 .expect("midnight is always valid");
6858 let base_datetime =
6859 chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
6860
6861 let add_result = |event_log: &mut OcpmEventLog,
6863 result: datasynth_ocpm::CaseGenerationResult| {
6864 for event in result.events {
6865 event_log.add_event(event);
6866 }
6867 for object in result.objects {
6868 event_log.add_object(object);
6869 }
6870 for relationship in result.relationships {
6871 event_log.add_relationship(relationship);
6872 }
6873 for corr in result.correlation_events {
6874 event_log.add_correlation_event(corr);
6875 }
6876 event_log.add_case(result.case_trace);
6877 };
6878
6879 for chain in &flows.p2p_chains {
6881 let po = &chain.purchase_order;
6882 let documents = P2pDocuments::new(
6883 &po.header.document_id,
6884 &po.vendor_id,
6885 &po.header.company_code,
6886 po.total_net_amount,
6887 &po.header.currency,
6888 &ocpm_uuid_factory,
6889 )
6890 .with_goods_receipt(
6891 chain
6892 .goods_receipts
6893 .first()
6894 .map(|gr| gr.header.document_id.as_str())
6895 .unwrap_or(""),
6896 &ocpm_uuid_factory,
6897 )
6898 .with_invoice(
6899 chain
6900 .vendor_invoice
6901 .as_ref()
6902 .map(|vi| vi.header.document_id.as_str())
6903 .unwrap_or(""),
6904 &ocpm_uuid_factory,
6905 )
6906 .with_payment(
6907 chain
6908 .payment
6909 .as_ref()
6910 .map(|p| p.header.document_id.as_str())
6911 .unwrap_or(""),
6912 &ocpm_uuid_factory,
6913 );
6914
6915 let start_time =
6916 chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
6917 let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
6918 add_result(&mut event_log, result);
6919
6920 if let Some(pb) = &pb {
6921 pb.inc(1);
6922 }
6923 }
6924
6925 for chain in &flows.o2c_chains {
6927 let so = &chain.sales_order;
6928 let documents = O2cDocuments::new(
6929 &so.header.document_id,
6930 &so.customer_id,
6931 &so.header.company_code,
6932 so.total_net_amount,
6933 &so.header.currency,
6934 &ocpm_uuid_factory,
6935 )
6936 .with_delivery(
6937 chain
6938 .deliveries
6939 .first()
6940 .map(|d| d.header.document_id.as_str())
6941 .unwrap_or(""),
6942 &ocpm_uuid_factory,
6943 )
6944 .with_invoice(
6945 chain
6946 .customer_invoice
6947 .as_ref()
6948 .map(|ci| ci.header.document_id.as_str())
6949 .unwrap_or(""),
6950 &ocpm_uuid_factory,
6951 )
6952 .with_receipt(
6953 chain
6954 .customer_receipt
6955 .as_ref()
6956 .map(|r| r.header.document_id.as_str())
6957 .unwrap_or(""),
6958 &ocpm_uuid_factory,
6959 );
6960
6961 let start_time =
6962 chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
6963 let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
6964 add_result(&mut event_log, result);
6965
6966 if let Some(pb) = &pb {
6967 pb.inc(1);
6968 }
6969 }
6970
6971 for project in &sourcing.sourcing_projects {
6973 let vendor_id = sourcing
6975 .contracts
6976 .iter()
6977 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6978 .map(|c| c.vendor_id.clone())
6979 .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
6980 .or_else(|| {
6981 self.master_data
6982 .vendors
6983 .first()
6984 .map(|v| v.vendor_id.clone())
6985 })
6986 .unwrap_or_else(|| "V000".to_string());
6987 let mut docs = S2cDocuments::new(
6988 &project.project_id,
6989 &vendor_id,
6990 &project.company_code,
6991 project.estimated_annual_spend,
6992 &ocpm_uuid_factory,
6993 );
6994 if let Some(rfx) = sourcing
6996 .rfx_events
6997 .iter()
6998 .find(|r| r.sourcing_project_id == project.project_id)
6999 {
7000 docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
7001 if let Some(bid) = sourcing.bids.iter().find(|b| {
7003 b.rfx_id == rfx.rfx_id
7004 && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
7005 }) {
7006 docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
7007 }
7008 }
7009 if let Some(contract) = sourcing
7011 .contracts
7012 .iter()
7013 .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
7014 {
7015 docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
7016 }
7017 let start_time = base_datetime - chrono::Duration::days(90);
7018 let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
7019 add_result(&mut event_log, result);
7020
7021 if let Some(pb) = &pb {
7022 pb.inc(1);
7023 }
7024 }
7025
7026 for run in &hr.payroll_runs {
7028 let employee_id = hr
7030 .payroll_line_items
7031 .iter()
7032 .find(|li| li.payroll_id == run.payroll_id)
7033 .map(|li| li.employee_id.as_str())
7034 .unwrap_or("EMP000");
7035 let docs = H2rDocuments::new(
7036 &run.payroll_id,
7037 employee_id,
7038 &run.company_code,
7039 run.total_gross,
7040 &ocpm_uuid_factory,
7041 )
7042 .with_time_entries(
7043 hr.time_entries
7044 .iter()
7045 .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
7046 .take(5)
7047 .map(|t| t.entry_id.as_str())
7048 .collect(),
7049 );
7050 let start_time = base_datetime - chrono::Duration::days(30);
7051 let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
7052 add_result(&mut event_log, result);
7053
7054 if let Some(pb) = &pb {
7055 pb.inc(1);
7056 }
7057 }
7058
7059 for order in &manufacturing.production_orders {
7061 let mut docs = MfgDocuments::new(
7062 &order.order_id,
7063 &order.material_id,
7064 &order.company_code,
7065 order.planned_quantity,
7066 &ocpm_uuid_factory,
7067 )
7068 .with_operations(
7069 order
7070 .operations
7071 .iter()
7072 .map(|o| format!("OP-{:04}", o.operation_number))
7073 .collect::<Vec<_>>()
7074 .iter()
7075 .map(std::string::String::as_str)
7076 .collect(),
7077 );
7078 if let Some(insp) = manufacturing
7080 .quality_inspections
7081 .iter()
7082 .find(|i| i.reference_id == order.order_id)
7083 {
7084 docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
7085 }
7086 if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
7088 cc.items
7089 .iter()
7090 .any(|item| item.material_id == order.material_id)
7091 }) {
7092 docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
7093 }
7094 let start_time = base_datetime - chrono::Duration::days(60);
7095 let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
7096 add_result(&mut event_log, result);
7097
7098 if let Some(pb) = &pb {
7099 pb.inc(1);
7100 }
7101 }
7102
7103 for customer in &banking.customers {
7105 let customer_id_str = customer.customer_id.to_string();
7106 let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
7107 if let Some(account) = banking
7109 .accounts
7110 .iter()
7111 .find(|a| a.primary_owner_id == customer.customer_id)
7112 {
7113 let account_id_str = account.account_id.to_string();
7114 docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
7115 let txn_strs: Vec<String> = banking
7117 .transactions
7118 .iter()
7119 .filter(|t| t.account_id == account.account_id)
7120 .take(10)
7121 .map(|t| t.transaction_id.to_string())
7122 .collect();
7123 let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
7124 let txn_amounts: Vec<rust_decimal::Decimal> = banking
7125 .transactions
7126 .iter()
7127 .filter(|t| t.account_id == account.account_id)
7128 .take(10)
7129 .map(|t| t.amount)
7130 .collect();
7131 if !txn_ids.is_empty() {
7132 docs = docs.with_transactions(txn_ids, txn_amounts);
7133 }
7134 }
7135 let start_time = base_datetime - chrono::Duration::days(180);
7136 let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
7137 add_result(&mut event_log, result);
7138
7139 if let Some(pb) = &pb {
7140 pb.inc(1);
7141 }
7142 }
7143
7144 for engagement in &audit.engagements {
7146 let engagement_id_str = engagement.engagement_id.to_string();
7147 let docs = AuditDocuments::new(
7148 &engagement_id_str,
7149 &engagement.client_entity_id,
7150 &ocpm_uuid_factory,
7151 )
7152 .with_workpapers(
7153 audit
7154 .workpapers
7155 .iter()
7156 .filter(|w| w.engagement_id == engagement.engagement_id)
7157 .take(10)
7158 .map(|w| w.workpaper_id.to_string())
7159 .collect::<Vec<_>>()
7160 .iter()
7161 .map(std::string::String::as_str)
7162 .collect(),
7163 )
7164 .with_evidence(
7165 audit
7166 .evidence
7167 .iter()
7168 .filter(|e| e.engagement_id == engagement.engagement_id)
7169 .take(10)
7170 .map(|e| e.evidence_id.to_string())
7171 .collect::<Vec<_>>()
7172 .iter()
7173 .map(std::string::String::as_str)
7174 .collect(),
7175 )
7176 .with_risks(
7177 audit
7178 .risk_assessments
7179 .iter()
7180 .filter(|r| r.engagement_id == engagement.engagement_id)
7181 .take(5)
7182 .map(|r| r.risk_id.to_string())
7183 .collect::<Vec<_>>()
7184 .iter()
7185 .map(std::string::String::as_str)
7186 .collect(),
7187 )
7188 .with_findings(
7189 audit
7190 .findings
7191 .iter()
7192 .filter(|f| f.engagement_id == engagement.engagement_id)
7193 .take(5)
7194 .map(|f| f.finding_id.to_string())
7195 .collect::<Vec<_>>()
7196 .iter()
7197 .map(std::string::String::as_str)
7198 .collect(),
7199 )
7200 .with_judgments(
7201 audit
7202 .judgments
7203 .iter()
7204 .filter(|j| j.engagement_id == engagement.engagement_id)
7205 .take(5)
7206 .map(|j| j.judgment_id.to_string())
7207 .collect::<Vec<_>>()
7208 .iter()
7209 .map(std::string::String::as_str)
7210 .collect(),
7211 );
7212 let start_time = base_datetime - chrono::Duration::days(120);
7213 let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
7214 add_result(&mut event_log, result);
7215
7216 if let Some(pb) = &pb {
7217 pb.inc(1);
7218 }
7219 }
7220
7221 for recon in &financial_reporting.bank_reconciliations {
7223 let docs = BankReconDocuments::new(
7224 &recon.reconciliation_id,
7225 &recon.bank_account_id,
7226 &recon.company_code,
7227 recon.bank_ending_balance,
7228 &ocpm_uuid_factory,
7229 )
7230 .with_statement_lines(
7231 recon
7232 .statement_lines
7233 .iter()
7234 .take(20)
7235 .map(|l| l.line_id.as_str())
7236 .collect(),
7237 )
7238 .with_reconciling_items(
7239 recon
7240 .reconciling_items
7241 .iter()
7242 .take(10)
7243 .map(|i| i.item_id.as_str())
7244 .collect(),
7245 );
7246 let start_time = base_datetime - chrono::Duration::days(30);
7247 let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
7248 add_result(&mut event_log, result);
7249
7250 if let Some(pb) = &pb {
7251 pb.inc(1);
7252 }
7253 }
7254
7255 event_log.compute_variants();
7257
7258 let summary = event_log.summary();
7259
7260 if let Some(pb) = pb {
7261 pb.finish_with_message(format!(
7262 "Generated {} OCPM events, {} objects",
7263 summary.event_count, summary.object_count
7264 ));
7265 }
7266
7267 Ok(OcpmSnapshot {
7268 event_count: summary.event_count,
7269 object_count: summary.object_count,
7270 case_count: summary.case_count,
7271 event_log: Some(event_log),
7272 })
7273 }
7274
7275 fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
7277 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
7278
7279 let total_rate = if self.config.anomaly_injection.enabled {
7282 self.config.anomaly_injection.rates.total_rate
7283 } else if self.config.fraud.enabled {
7284 self.config.fraud.fraud_rate
7285 } else {
7286 0.02
7287 };
7288
7289 let fraud_rate = if self.config.anomaly_injection.enabled {
7290 self.config.anomaly_injection.rates.fraud_rate
7291 } else {
7292 AnomalyRateConfig::default().fraud_rate
7293 };
7294
7295 let error_rate = if self.config.anomaly_injection.enabled {
7296 self.config.anomaly_injection.rates.error_rate
7297 } else {
7298 AnomalyRateConfig::default().error_rate
7299 };
7300
7301 let process_issue_rate = if self.config.anomaly_injection.enabled {
7302 self.config.anomaly_injection.rates.process_rate
7303 } else {
7304 AnomalyRateConfig::default().process_issue_rate
7305 };
7306
7307 let anomaly_config = AnomalyInjectorConfig {
7308 rates: AnomalyRateConfig {
7309 total_rate,
7310 fraud_rate,
7311 error_rate,
7312 process_issue_rate,
7313 ..Default::default()
7314 },
7315 seed: self.seed + 5000,
7316 ..Default::default()
7317 };
7318
7319 let mut injector = AnomalyInjector::new(anomaly_config);
7320 let result = injector.process_entries(entries);
7321
7322 if let Some(pb) = &pb {
7323 pb.inc(entries.len() as u64);
7324 pb.finish_with_message("Anomaly injection complete");
7325 }
7326
7327 let mut by_type = HashMap::new();
7328 for label in &result.labels {
7329 *by_type
7330 .entry(format!("{:?}", label.anomaly_type))
7331 .or_insert(0) += 1;
7332 }
7333
7334 Ok(AnomalyLabels {
7335 labels: result.labels,
7336 summary: Some(result.summary),
7337 by_type,
7338 })
7339 }
7340
7341 fn validate_journal_entries(
7350 &mut self,
7351 entries: &[JournalEntry],
7352 ) -> SynthResult<BalanceValidationResult> {
7353 let clean_entries: Vec<&JournalEntry> = entries
7355 .iter()
7356 .filter(|e| {
7357 e.header
7358 .header_text
7359 .as_ref()
7360 .map(|t| !t.contains("[HUMAN_ERROR:"))
7361 .unwrap_or(true)
7362 })
7363 .collect();
7364
7365 let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
7366
7367 let config = BalanceTrackerConfig {
7369 validate_on_each_entry: false, track_history: false, fail_on_validation_error: false, ..Default::default()
7373 };
7374 let validation_currency = self
7375 .config
7376 .companies
7377 .first()
7378 .map(|c| c.currency.clone())
7379 .unwrap_or_else(|| "USD".to_string());
7380
7381 let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
7382
7383 let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
7385 let errors = tracker.apply_entries(&clean_refs);
7386
7387 if let Some(pb) = &pb {
7388 pb.inc(entries.len() as u64);
7389 }
7390
7391 let has_unbalanced = tracker
7394 .get_validation_errors()
7395 .iter()
7396 .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
7397
7398 let mut all_errors = errors;
7401 all_errors.extend(tracker.get_validation_errors().iter().cloned());
7402 let company_codes: Vec<String> = self
7403 .config
7404 .companies
7405 .iter()
7406 .map(|c| c.code.clone())
7407 .collect();
7408
7409 let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7410 .map(|d| d + chrono::Months::new(self.config.global.period_months))
7411 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7412
7413 for company_code in &company_codes {
7414 if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
7415 all_errors.push(e);
7416 }
7417 }
7418
7419 let stats = tracker.get_statistics();
7421
7422 let is_balanced = all_errors.is_empty();
7424
7425 if let Some(pb) = pb {
7426 let msg = if is_balanced {
7427 "Balance validation passed"
7428 } else {
7429 "Balance validation completed with errors"
7430 };
7431 pb.finish_with_message(msg);
7432 }
7433
7434 Ok(BalanceValidationResult {
7435 validated: true,
7436 is_balanced,
7437 entries_processed: stats.entries_processed,
7438 total_debits: stats.total_debits,
7439 total_credits: stats.total_credits,
7440 accounts_tracked: stats.accounts_tracked,
7441 companies_tracked: stats.companies_tracked,
7442 validation_errors: all_errors,
7443 has_unbalanced_entries: has_unbalanced,
7444 })
7445 }
7446
7447 fn inject_data_quality(
7452 &mut self,
7453 entries: &mut [JournalEntry],
7454 ) -> SynthResult<DataQualityStats> {
7455 let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
7456
7457 let config = if self.config.data_quality.enabled {
7460 let dq = &self.config.data_quality;
7461 DataQualityConfig {
7462 enable_missing_values: dq.missing_values.enabled,
7463 missing_values: datasynth_generators::MissingValueConfig {
7464 global_rate: dq.effective_missing_rate(),
7465 ..Default::default()
7466 },
7467 enable_format_variations: dq.format_variations.enabled,
7468 format_variations: datasynth_generators::FormatVariationConfig {
7469 date_variation_rate: dq.format_variations.dates.rate,
7470 amount_variation_rate: dq.format_variations.amounts.rate,
7471 identifier_variation_rate: dq.format_variations.identifiers.rate,
7472 ..Default::default()
7473 },
7474 enable_duplicates: dq.duplicates.enabled,
7475 duplicates: datasynth_generators::DuplicateConfig {
7476 duplicate_rate: dq.effective_duplicate_rate(),
7477 ..Default::default()
7478 },
7479 enable_typos: dq.typos.enabled,
7480 typos: datasynth_generators::TypoConfig {
7481 char_error_rate: dq.effective_typo_rate(),
7482 ..Default::default()
7483 },
7484 enable_encoding_issues: dq.encoding_issues.enabled,
7485 encoding_issue_rate: dq.encoding_issues.rate,
7486 seed: self.seed.wrapping_add(77), track_statistics: true,
7488 }
7489 } else {
7490 DataQualityConfig::minimal()
7491 };
7492 let mut injector = DataQualityInjector::new(config);
7493
7494 injector.set_country_pack(self.primary_pack().clone());
7496
7497 let context = HashMap::new();
7499
7500 for entry in entries.iter_mut() {
7501 if let Some(text) = &entry.header.header_text {
7503 let processed = injector.process_text_field(
7504 "header_text",
7505 text,
7506 &entry.header.document_id.to_string(),
7507 &context,
7508 );
7509 match processed {
7510 Some(new_text) if new_text != *text => {
7511 entry.header.header_text = Some(new_text);
7512 }
7513 None => {
7514 entry.header.header_text = None; }
7516 _ => {}
7517 }
7518 }
7519
7520 if let Some(ref_text) = &entry.header.reference {
7522 let processed = injector.process_text_field(
7523 "reference",
7524 ref_text,
7525 &entry.header.document_id.to_string(),
7526 &context,
7527 );
7528 match processed {
7529 Some(new_text) if new_text != *ref_text => {
7530 entry.header.reference = Some(new_text);
7531 }
7532 None => {
7533 entry.header.reference = None;
7534 }
7535 _ => {}
7536 }
7537 }
7538
7539 let user_persona = entry.header.user_persona.clone();
7541 if let Some(processed) = injector.process_text_field(
7542 "user_persona",
7543 &user_persona,
7544 &entry.header.document_id.to_string(),
7545 &context,
7546 ) {
7547 if processed != user_persona {
7548 entry.header.user_persona = processed;
7549 }
7550 }
7551
7552 for line in &mut entry.lines {
7554 if let Some(ref text) = line.line_text {
7556 let processed = injector.process_text_field(
7557 "line_text",
7558 text,
7559 &entry.header.document_id.to_string(),
7560 &context,
7561 );
7562 match processed {
7563 Some(new_text) if new_text != *text => {
7564 line.line_text = Some(new_text);
7565 }
7566 None => {
7567 line.line_text = None;
7568 }
7569 _ => {}
7570 }
7571 }
7572
7573 if let Some(cc) = &line.cost_center {
7575 let processed = injector.process_text_field(
7576 "cost_center",
7577 cc,
7578 &entry.header.document_id.to_string(),
7579 &context,
7580 );
7581 match processed {
7582 Some(new_cc) if new_cc != *cc => {
7583 line.cost_center = Some(new_cc);
7584 }
7585 None => {
7586 line.cost_center = None;
7587 }
7588 _ => {}
7589 }
7590 }
7591 }
7592
7593 if let Some(pb) = &pb {
7594 pb.inc(1);
7595 }
7596 }
7597
7598 if let Some(pb) = pb {
7599 pb.finish_with_message("Data quality injection complete");
7600 }
7601
7602 Ok(injector.stats().clone())
7603 }
7604
7605 fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
7616 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7617 .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7618 let fiscal_year = start_date.year() as u16;
7619 let period_end = start_date + chrono::Months::new(self.config.global.period_months);
7620
7621 let total_revenue: rust_decimal::Decimal = entries
7623 .iter()
7624 .flat_map(|e| e.lines.iter())
7625 .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
7626 .map(|l| l.credit_amount)
7627 .sum();
7628
7629 let total_items = (self.phase_config.audit_engagements * 50) as u64; let pb = self.create_progress_bar(total_items, "Generating Audit Data");
7631
7632 let mut snapshot = AuditSnapshot::default();
7633
7634 let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
7636 let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
7637 let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
7638 let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
7639 let mut finding_gen = FindingGenerator::new(self.seed + 7400);
7640 let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
7641 let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
7642 let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
7643 let mut sample_gen = SampleGenerator::new(self.seed + 7800);
7644 let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
7645 let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
7646 let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
7647
7648 let accounts: Vec<String> = self
7650 .coa
7651 .as_ref()
7652 .map(|coa| {
7653 coa.get_postable_accounts()
7654 .iter()
7655 .map(|acc| acc.account_code().to_string())
7656 .collect()
7657 })
7658 .unwrap_or_default();
7659
7660 for (i, company) in self.config.companies.iter().enumerate() {
7662 let company_revenue = total_revenue
7664 * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
7665
7666 let engagements_for_company =
7668 self.phase_config.audit_engagements / self.config.companies.len().max(1);
7669 let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
7670 1
7671 } else {
7672 0
7673 };
7674
7675 for _eng_idx in 0..(engagements_for_company + extra) {
7676 let mut engagement = engagement_gen.generate_engagement(
7678 &company.code,
7679 &company.name,
7680 fiscal_year,
7681 period_end,
7682 company_revenue,
7683 None, );
7685
7686 if !self.master_data.employees.is_empty() {
7688 let emp_count = self.master_data.employees.len();
7689 let base = (i * 10 + _eng_idx) % emp_count;
7691 engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
7692 .employee_id
7693 .clone();
7694 engagement.engagement_manager_id = self.master_data.employees
7695 [(base + 1) % emp_count]
7696 .employee_id
7697 .clone();
7698 let real_team: Vec<String> = engagement
7699 .team_member_ids
7700 .iter()
7701 .enumerate()
7702 .map(|(j, _)| {
7703 self.master_data.employees[(base + 2 + j) % emp_count]
7704 .employee_id
7705 .clone()
7706 })
7707 .collect();
7708 engagement.team_member_ids = real_team;
7709 }
7710
7711 if let Some(pb) = &pb {
7712 pb.inc(1);
7713 }
7714
7715 let team_members: Vec<String> = engagement.team_member_ids.clone();
7717
7718 let workpapers =
7720 workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
7721
7722 for wp in &workpapers {
7723 if let Some(pb) = &pb {
7724 pb.inc(1);
7725 }
7726
7727 let evidence = evidence_gen.generate_evidence_for_workpaper(
7729 wp,
7730 &team_members,
7731 wp.preparer_date,
7732 );
7733
7734 for _ in &evidence {
7735 if let Some(pb) = &pb {
7736 pb.inc(1);
7737 }
7738 }
7739
7740 snapshot.evidence.extend(evidence);
7741 }
7742
7743 let risks =
7745 risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
7746
7747 for _ in &risks {
7748 if let Some(pb) = &pb {
7749 pb.inc(1);
7750 }
7751 }
7752 snapshot.risk_assessments.extend(risks);
7753
7754 let findings = finding_gen.generate_findings_for_engagement(
7756 &engagement,
7757 &workpapers,
7758 &team_members,
7759 );
7760
7761 for _ in &findings {
7762 if let Some(pb) = &pb {
7763 pb.inc(1);
7764 }
7765 }
7766 snapshot.findings.extend(findings);
7767
7768 let judgments =
7770 judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
7771
7772 for _ in &judgments {
7773 if let Some(pb) = &pb {
7774 pb.inc(1);
7775 }
7776 }
7777 snapshot.judgments.extend(judgments);
7778
7779 let (confs, resps) =
7781 confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
7782 snapshot.confirmations.extend(confs);
7783 snapshot.confirmation_responses.extend(resps);
7784
7785 let team_pairs: Vec<(String, String)> = team_members
7787 .iter()
7788 .map(|id| {
7789 let name = self
7790 .master_data
7791 .employees
7792 .iter()
7793 .find(|e| e.employee_id == *id)
7794 .map(|e| e.display_name.clone())
7795 .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
7796 (id.clone(), name)
7797 })
7798 .collect();
7799 for wp in &workpapers {
7800 let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
7801 snapshot.procedure_steps.extend(steps);
7802 }
7803
7804 for wp in &workpapers {
7806 if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
7807 snapshot.samples.push(sample);
7808 }
7809 }
7810
7811 let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
7813 snapshot.analytical_results.extend(analytical);
7814
7815 let (ia_func, ia_reports) = ia_gen.generate(&engagement);
7817 snapshot.ia_functions.push(ia_func);
7818 snapshot.ia_reports.extend(ia_reports);
7819
7820 let vendor_names: Vec<String> = self
7822 .master_data
7823 .vendors
7824 .iter()
7825 .map(|v| v.name.clone())
7826 .collect();
7827 let customer_names: Vec<String> = self
7828 .master_data
7829 .customers
7830 .iter()
7831 .map(|c| c.name.clone())
7832 .collect();
7833 let (parties, rp_txns) =
7834 related_party_gen.generate(&engagement, &vendor_names, &customer_names);
7835 snapshot.related_parties.extend(parties);
7836 snapshot.related_party_transactions.extend(rp_txns);
7837
7838 snapshot.workpapers.extend(workpapers);
7840 snapshot.engagements.push(engagement);
7841 }
7842 }
7843
7844 if let Some(pb) = pb {
7845 pb.finish_with_message(format!(
7846 "Audit data: {} engagements, {} workpapers, {} evidence, \
7847 {} confirmations, {} procedure steps, {} samples, \
7848 {} analytical, {} IA funcs, {} related parties",
7849 snapshot.engagements.len(),
7850 snapshot.workpapers.len(),
7851 snapshot.evidence.len(),
7852 snapshot.confirmations.len(),
7853 snapshot.procedure_steps.len(),
7854 snapshot.samples.len(),
7855 snapshot.analytical_results.len(),
7856 snapshot.ia_functions.len(),
7857 snapshot.related_parties.len(),
7858 ));
7859 }
7860
7861 Ok(snapshot)
7862 }
7863
7864 fn export_graphs(
7871 &mut self,
7872 entries: &[JournalEntry],
7873 _coa: &Arc<ChartOfAccounts>,
7874 stats: &mut EnhancedGenerationStatistics,
7875 ) -> SynthResult<GraphExportSnapshot> {
7876 let pb = self.create_progress_bar(100, "Exporting Graphs");
7877
7878 let mut snapshot = GraphExportSnapshot::default();
7879
7880 let output_dir = self
7882 .output_path
7883 .clone()
7884 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7885 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7886
7887 for graph_type in &self.config.graph_export.graph_types {
7889 if let Some(pb) = &pb {
7890 pb.inc(10);
7891 }
7892
7893 let graph_config = TransactionGraphConfig {
7895 include_vendors: false,
7896 include_customers: false,
7897 create_debit_credit_edges: true,
7898 include_document_nodes: graph_type.include_document_nodes,
7899 min_edge_weight: graph_type.min_edge_weight,
7900 aggregate_parallel_edges: graph_type.aggregate_edges,
7901 framework: None,
7902 };
7903
7904 let mut builder = TransactionGraphBuilder::new(graph_config);
7905 builder.add_journal_entries(entries);
7906 let graph = builder.build();
7907
7908 stats.graph_node_count += graph.node_count();
7910 stats.graph_edge_count += graph.edge_count();
7911
7912 if let Some(pb) = &pb {
7913 pb.inc(40);
7914 }
7915
7916 for format in &self.config.graph_export.formats {
7918 let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
7919
7920 if let Err(e) = std::fs::create_dir_all(&format_dir) {
7922 warn!("Failed to create graph output directory: {}", e);
7923 continue;
7924 }
7925
7926 match format {
7927 datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
7928 let pyg_config = PyGExportConfig {
7929 common: datasynth_graph::CommonExportConfig {
7930 export_node_features: true,
7931 export_edge_features: true,
7932 export_node_labels: true,
7933 export_edge_labels: true,
7934 export_masks: true,
7935 train_ratio: self.config.graph_export.train_ratio,
7936 val_ratio: self.config.graph_export.validation_ratio,
7937 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7938 },
7939 one_hot_categoricals: false,
7940 };
7941
7942 let exporter = PyGExporter::new(pyg_config);
7943 match exporter.export(&graph, &format_dir) {
7944 Ok(metadata) => {
7945 snapshot.exports.insert(
7946 format!("{}_{}", graph_type.name, "pytorch_geometric"),
7947 GraphExportInfo {
7948 name: graph_type.name.clone(),
7949 format: "pytorch_geometric".to_string(),
7950 output_path: format_dir.clone(),
7951 node_count: metadata.num_nodes,
7952 edge_count: metadata.num_edges,
7953 },
7954 );
7955 snapshot.graph_count += 1;
7956 }
7957 Err(e) => {
7958 warn!("Failed to export PyTorch Geometric graph: {}", e);
7959 }
7960 }
7961 }
7962 datasynth_config::schema::GraphExportFormat::Neo4j => {
7963 use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
7964
7965 let neo4j_config = Neo4jExportConfig {
7966 export_node_properties: true,
7967 export_edge_properties: true,
7968 export_features: true,
7969 generate_cypher: true,
7970 generate_admin_import: true,
7971 database_name: "synth".to_string(),
7972 cypher_batch_size: 1000,
7973 };
7974
7975 let exporter = Neo4jExporter::new(neo4j_config);
7976 match exporter.export(&graph, &format_dir) {
7977 Ok(metadata) => {
7978 snapshot.exports.insert(
7979 format!("{}_{}", graph_type.name, "neo4j"),
7980 GraphExportInfo {
7981 name: graph_type.name.clone(),
7982 format: "neo4j".to_string(),
7983 output_path: format_dir.clone(),
7984 node_count: metadata.num_nodes,
7985 edge_count: metadata.num_edges,
7986 },
7987 );
7988 snapshot.graph_count += 1;
7989 }
7990 Err(e) => {
7991 warn!("Failed to export Neo4j graph: {}", e);
7992 }
7993 }
7994 }
7995 datasynth_config::schema::GraphExportFormat::Dgl => {
7996 use datasynth_graph::{DGLExportConfig, DGLExporter};
7997
7998 let dgl_config = DGLExportConfig {
7999 common: datasynth_graph::CommonExportConfig {
8000 export_node_features: true,
8001 export_edge_features: true,
8002 export_node_labels: true,
8003 export_edge_labels: true,
8004 export_masks: true,
8005 train_ratio: self.config.graph_export.train_ratio,
8006 val_ratio: self.config.graph_export.validation_ratio,
8007 seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
8008 },
8009 heterogeneous: false,
8010 include_pickle_script: true, };
8012
8013 let exporter = DGLExporter::new(dgl_config);
8014 match exporter.export(&graph, &format_dir) {
8015 Ok(metadata) => {
8016 snapshot.exports.insert(
8017 format!("{}_{}", graph_type.name, "dgl"),
8018 GraphExportInfo {
8019 name: graph_type.name.clone(),
8020 format: "dgl".to_string(),
8021 output_path: format_dir.clone(),
8022 node_count: metadata.common.num_nodes,
8023 edge_count: metadata.common.num_edges,
8024 },
8025 );
8026 snapshot.graph_count += 1;
8027 }
8028 Err(e) => {
8029 warn!("Failed to export DGL graph: {}", e);
8030 }
8031 }
8032 }
8033 datasynth_config::schema::GraphExportFormat::RustGraph => {
8034 use datasynth_graph::{
8035 RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
8036 };
8037
8038 let rustgraph_config = RustGraphExportConfig {
8039 include_features: true,
8040 include_temporal: true,
8041 include_labels: true,
8042 source_name: "datasynth".to_string(),
8043 batch_id: None,
8044 output_format: RustGraphOutputFormat::JsonLines,
8045 export_node_properties: true,
8046 export_edge_properties: true,
8047 pretty_print: false,
8048 };
8049
8050 let exporter = RustGraphExporter::new(rustgraph_config);
8051 match exporter.export(&graph, &format_dir) {
8052 Ok(metadata) => {
8053 snapshot.exports.insert(
8054 format!("{}_{}", graph_type.name, "rustgraph"),
8055 GraphExportInfo {
8056 name: graph_type.name.clone(),
8057 format: "rustgraph".to_string(),
8058 output_path: format_dir.clone(),
8059 node_count: metadata.num_nodes,
8060 edge_count: metadata.num_edges,
8061 },
8062 );
8063 snapshot.graph_count += 1;
8064 }
8065 Err(e) => {
8066 warn!("Failed to export RustGraph: {}", e);
8067 }
8068 }
8069 }
8070 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
8071 debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
8073 }
8074 }
8075 }
8076
8077 if let Some(pb) = &pb {
8078 pb.inc(40);
8079 }
8080 }
8081
8082 stats.graph_export_count = snapshot.graph_count;
8083 snapshot.exported = snapshot.graph_count > 0;
8084
8085 if let Some(pb) = pb {
8086 pb.finish_with_message(format!(
8087 "Graphs exported: {} graphs ({} nodes, {} edges)",
8088 snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
8089 ));
8090 }
8091
8092 Ok(snapshot)
8093 }
8094
8095 fn build_additional_graphs(
8100 &self,
8101 banking: &BankingSnapshot,
8102 intercompany: &IntercompanySnapshot,
8103 entries: &[JournalEntry],
8104 stats: &mut EnhancedGenerationStatistics,
8105 ) {
8106 let output_dir = self
8107 .output_path
8108 .clone()
8109 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8110 let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
8111
8112 if !banking.customers.is_empty() && !banking.transactions.is_empty() {
8114 info!("Phase 10c: Building banking network graph");
8115 let config = BankingGraphConfig::default();
8116 let mut builder = BankingGraphBuilder::new(config);
8117 builder.add_customers(&banking.customers);
8118 builder.add_accounts(&banking.accounts, &banking.customers);
8119 builder.add_transactions(&banking.transactions);
8120 let graph = builder.build();
8121
8122 let node_count = graph.node_count();
8123 let edge_count = graph.edge_count();
8124 stats.graph_node_count += node_count;
8125 stats.graph_edge_count += edge_count;
8126
8127 for format in &self.config.graph_export.formats {
8129 if matches!(
8130 format,
8131 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8132 ) {
8133 let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
8134 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8135 warn!("Failed to create banking graph output dir: {}", e);
8136 continue;
8137 }
8138 let pyg_config = PyGExportConfig::default();
8139 let exporter = PyGExporter::new(pyg_config);
8140 if let Err(e) = exporter.export(&graph, &format_dir) {
8141 warn!("Failed to export banking graph as PyG: {}", e);
8142 } else {
8143 info!(
8144 "Banking network graph exported: {} nodes, {} edges",
8145 node_count, edge_count
8146 );
8147 }
8148 }
8149 }
8150 }
8151
8152 let approval_entries: Vec<_> = entries
8154 .iter()
8155 .filter(|je| je.header.approval_workflow.is_some())
8156 .collect();
8157
8158 if !approval_entries.is_empty() {
8159 info!(
8160 "Phase 10c: Building approval network graph ({} entries with approvals)",
8161 approval_entries.len()
8162 );
8163 let config = ApprovalGraphConfig::default();
8164 let mut builder = ApprovalGraphBuilder::new(config);
8165
8166 for je in &approval_entries {
8167 if let Some(ref wf) = je.header.approval_workflow {
8168 for action in &wf.actions {
8169 let record = datasynth_core::models::ApprovalRecord {
8170 approval_id: format!(
8171 "APR-{}-{}",
8172 je.header.document_id, action.approval_level
8173 ),
8174 document_number: je.header.document_id.to_string(),
8175 document_type: "JE".to_string(),
8176 company_code: je.company_code().to_string(),
8177 requester_id: wf.preparer_id.clone(),
8178 requester_name: Some(wf.preparer_name.clone()),
8179 approver_id: action.actor_id.clone(),
8180 approver_name: action.actor_name.clone(),
8181 approval_date: je.posting_date(),
8182 action: format!("{:?}", action.action),
8183 amount: wf.amount,
8184 approval_limit: None,
8185 comments: action.comments.clone(),
8186 delegation_from: None,
8187 is_auto_approved: false,
8188 };
8189 builder.add_approval(&record);
8190 }
8191 }
8192 }
8193
8194 let graph = builder.build();
8195 let node_count = graph.node_count();
8196 let edge_count = graph.edge_count();
8197 stats.graph_node_count += node_count;
8198 stats.graph_edge_count += edge_count;
8199
8200 for format in &self.config.graph_export.formats {
8202 if matches!(
8203 format,
8204 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8205 ) {
8206 let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
8207 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8208 warn!("Failed to create approval graph output dir: {}", e);
8209 continue;
8210 }
8211 let pyg_config = PyGExportConfig::default();
8212 let exporter = PyGExporter::new(pyg_config);
8213 if let Err(e) = exporter.export(&graph, &format_dir) {
8214 warn!("Failed to export approval graph as PyG: {}", e);
8215 } else {
8216 info!(
8217 "Approval network graph exported: {} nodes, {} edges",
8218 node_count, edge_count
8219 );
8220 }
8221 }
8222 }
8223 }
8224
8225 if self.config.companies.len() >= 2 {
8227 info!(
8228 "Phase 10c: Building entity relationship graph ({} companies)",
8229 self.config.companies.len()
8230 );
8231
8232 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8233 .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
8234
8235 let parent_code = &self.config.companies[0].code;
8237 let mut companies: Vec<datasynth_core::models::Company> =
8238 Vec::with_capacity(self.config.companies.len());
8239
8240 let first = &self.config.companies[0];
8242 companies.push(datasynth_core::models::Company::parent(
8243 &first.code,
8244 &first.name,
8245 &first.country,
8246 &first.currency,
8247 ));
8248
8249 for cc in self.config.companies.iter().skip(1) {
8251 companies.push(datasynth_core::models::Company::subsidiary(
8252 &cc.code,
8253 &cc.name,
8254 &cc.country,
8255 &cc.currency,
8256 parent_code,
8257 rust_decimal::Decimal::from(100),
8258 ));
8259 }
8260
8261 let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
8263 self.config
8264 .companies
8265 .iter()
8266 .skip(1)
8267 .enumerate()
8268 .map(|(i, cc)| {
8269 let mut rel =
8270 datasynth_core::models::intercompany::IntercompanyRelationship::new(
8271 format!("REL{:03}", i + 1),
8272 parent_code.clone(),
8273 cc.code.clone(),
8274 rust_decimal::Decimal::from(100),
8275 start_date,
8276 );
8277 rel.functional_currency = cc.currency.clone();
8278 rel
8279 })
8280 .collect();
8281
8282 let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
8283 builder.add_companies(&companies);
8284 builder.add_ownership_relationships(&relationships);
8285
8286 for pair in &intercompany.matched_pairs {
8288 builder.add_intercompany_edge(
8289 &pair.seller_company,
8290 &pair.buyer_company,
8291 pair.amount,
8292 &format!("{:?}", pair.transaction_type),
8293 );
8294 }
8295
8296 let graph = builder.build();
8297 let node_count = graph.node_count();
8298 let edge_count = graph.edge_count();
8299 stats.graph_node_count += node_count;
8300 stats.graph_edge_count += edge_count;
8301
8302 for format in &self.config.graph_export.formats {
8304 if matches!(
8305 format,
8306 datasynth_config::schema::GraphExportFormat::PytorchGeometric
8307 ) {
8308 let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
8309 if let Err(e) = std::fs::create_dir_all(&format_dir) {
8310 warn!("Failed to create entity graph output dir: {}", e);
8311 continue;
8312 }
8313 let pyg_config = PyGExportConfig::default();
8314 let exporter = PyGExporter::new(pyg_config);
8315 if let Err(e) = exporter.export(&graph, &format_dir) {
8316 warn!("Failed to export entity graph as PyG: {}", e);
8317 } else {
8318 info!(
8319 "Entity relationship graph exported: {} nodes, {} edges",
8320 node_count, edge_count
8321 );
8322 }
8323 }
8324 }
8325 } else {
8326 debug!(
8327 "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
8328 self.config.companies.len()
8329 );
8330 }
8331 }
8332
8333 #[allow(clippy::too_many_arguments)]
8340 fn export_hypergraph(
8341 &self,
8342 coa: &Arc<ChartOfAccounts>,
8343 entries: &[JournalEntry],
8344 document_flows: &DocumentFlowSnapshot,
8345 sourcing: &SourcingSnapshot,
8346 hr: &HrSnapshot,
8347 manufacturing: &ManufacturingSnapshot,
8348 banking: &BankingSnapshot,
8349 audit: &AuditSnapshot,
8350 financial_reporting: &FinancialReportingSnapshot,
8351 ocpm: &OcpmSnapshot,
8352 compliance: &ComplianceRegulationsSnapshot,
8353 stats: &mut EnhancedGenerationStatistics,
8354 ) -> SynthResult<HypergraphExportInfo> {
8355 use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
8356 use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
8357 use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
8358 use datasynth_graph::models::hypergraph::AggregationStrategy;
8359
8360 let hg_settings = &self.config.graph_export.hypergraph;
8361
8362 let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
8364 "truncate" => AggregationStrategy::Truncate,
8365 "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
8366 "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
8367 "importance_sample" => AggregationStrategy::ImportanceSample,
8368 _ => AggregationStrategy::PoolByCounterparty,
8369 };
8370
8371 let builder_config = HypergraphConfig {
8372 max_nodes: hg_settings.max_nodes,
8373 aggregation_strategy,
8374 include_coso: hg_settings.governance_layer.include_coso,
8375 include_controls: hg_settings.governance_layer.include_controls,
8376 include_sox: hg_settings.governance_layer.include_sox,
8377 include_vendors: hg_settings.governance_layer.include_vendors,
8378 include_customers: hg_settings.governance_layer.include_customers,
8379 include_employees: hg_settings.governance_layer.include_employees,
8380 include_p2p: hg_settings.process_layer.include_p2p,
8381 include_o2c: hg_settings.process_layer.include_o2c,
8382 include_s2c: hg_settings.process_layer.include_s2c,
8383 include_h2r: hg_settings.process_layer.include_h2r,
8384 include_mfg: hg_settings.process_layer.include_mfg,
8385 include_bank: hg_settings.process_layer.include_bank,
8386 include_audit: hg_settings.process_layer.include_audit,
8387 include_r2r: hg_settings.process_layer.include_r2r,
8388 events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
8389 docs_per_counterparty_threshold: hg_settings
8390 .process_layer
8391 .docs_per_counterparty_threshold,
8392 include_accounts: hg_settings.accounting_layer.include_accounts,
8393 je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
8394 include_cross_layer_edges: hg_settings.cross_layer.enabled,
8395 include_compliance: self.config.compliance_regulations.enabled,
8396 include_tax: true,
8397 include_treasury: true,
8398 include_esg: true,
8399 include_project: true,
8400 include_intercompany: true,
8401 include_temporal_events: true,
8402 };
8403
8404 let mut builder = HypergraphBuilder::new(builder_config);
8405
8406 builder.add_coso_framework();
8408
8409 if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
8412 let controls = InternalControl::standard_controls();
8413 builder.add_controls(&controls);
8414 }
8415
8416 builder.add_vendors(&self.master_data.vendors);
8418 builder.add_customers(&self.master_data.customers);
8419 builder.add_employees(&self.master_data.employees);
8420
8421 builder.add_p2p_documents(
8423 &document_flows.purchase_orders,
8424 &document_flows.goods_receipts,
8425 &document_flows.vendor_invoices,
8426 &document_flows.payments,
8427 );
8428 builder.add_o2c_documents(
8429 &document_flows.sales_orders,
8430 &document_flows.deliveries,
8431 &document_flows.customer_invoices,
8432 );
8433 builder.add_s2c_documents(
8434 &sourcing.sourcing_projects,
8435 &sourcing.qualifications,
8436 &sourcing.rfx_events,
8437 &sourcing.bids,
8438 &sourcing.bid_evaluations,
8439 &sourcing.contracts,
8440 );
8441 builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
8442 builder.add_mfg_documents(
8443 &manufacturing.production_orders,
8444 &manufacturing.quality_inspections,
8445 &manufacturing.cycle_counts,
8446 );
8447 builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
8448 builder.add_audit_documents(
8449 &audit.engagements,
8450 &audit.workpapers,
8451 &audit.findings,
8452 &audit.evidence,
8453 &audit.risk_assessments,
8454 &audit.judgments,
8455 );
8456 builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
8457
8458 if let Some(ref event_log) = ocpm.event_log {
8460 builder.add_ocpm_events(event_log);
8461 }
8462
8463 if self.config.compliance_regulations.enabled
8465 && hg_settings.governance_layer.include_controls
8466 {
8467 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8469 let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
8470 .standard_records
8471 .iter()
8472 .filter_map(|r| {
8473 let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
8474 registry.get(&sid).cloned()
8475 })
8476 .collect();
8477
8478 builder.add_compliance_regulations(
8479 &standards,
8480 &compliance.findings,
8481 &compliance.filings,
8482 );
8483 }
8484
8485 builder.add_accounts(coa);
8487 builder.add_journal_entries_as_hyperedges(entries);
8488
8489 let hypergraph = builder.build();
8491
8492 let output_dir = self
8494 .output_path
8495 .clone()
8496 .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8497 let hg_dir = output_dir
8498 .join(&self.config.graph_export.output_subdirectory)
8499 .join(&hg_settings.output_subdirectory);
8500
8501 let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
8503 "unified" => {
8504 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8505 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8506 SynthError::generation(format!("Unified hypergraph export failed: {e}"))
8507 })?;
8508 (
8509 metadata.num_nodes,
8510 metadata.num_edges,
8511 metadata.num_hyperedges,
8512 )
8513 }
8514 _ => {
8515 let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
8517 let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8518 SynthError::generation(format!("Hypergraph export failed: {e}"))
8519 })?;
8520 (
8521 metadata.num_nodes,
8522 metadata.num_edges,
8523 metadata.num_hyperedges,
8524 )
8525 }
8526 };
8527
8528 #[cfg(feature = "streaming")]
8530 if let Some(ref target_url) = hg_settings.stream_target {
8531 use crate::stream_client::{StreamClient, StreamConfig};
8532 use std::io::Write as _;
8533
8534 let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
8535 let stream_config = StreamConfig {
8536 target_url: target_url.clone(),
8537 batch_size: hg_settings.stream_batch_size,
8538 api_key,
8539 ..StreamConfig::default()
8540 };
8541
8542 match StreamClient::new(stream_config) {
8543 Ok(mut client) => {
8544 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8545 match exporter.export_to_writer(&hypergraph, &mut client) {
8546 Ok(_) => {
8547 if let Err(e) = client.flush() {
8548 warn!("Failed to flush stream client: {}", e);
8549 } else {
8550 info!("Streamed {} records to {}", client.total_sent(), target_url);
8551 }
8552 }
8553 Err(e) => {
8554 warn!("Streaming export failed: {}", e);
8555 }
8556 }
8557 }
8558 Err(e) => {
8559 warn!("Failed to create stream client: {}", e);
8560 }
8561 }
8562 }
8563
8564 stats.graph_node_count += num_nodes;
8566 stats.graph_edge_count += num_edges;
8567 stats.graph_export_count += 1;
8568
8569 Ok(HypergraphExportInfo {
8570 node_count: num_nodes,
8571 edge_count: num_edges,
8572 hyperedge_count: num_hyperedges,
8573 output_path: hg_dir,
8574 })
8575 }
8576
8577 fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
8582 let pb = self.create_progress_bar(100, "Generating Banking Data");
8583
8584 let orchestrator = BankingOrchestratorBuilder::new()
8586 .config(self.config.banking.clone())
8587 .seed(self.seed + 9000)
8588 .country_pack(self.primary_pack().clone())
8589 .build();
8590
8591 if let Some(pb) = &pb {
8592 pb.inc(10);
8593 }
8594
8595 let result = orchestrator.generate();
8597
8598 if let Some(pb) = &pb {
8599 pb.inc(90);
8600 pb.finish_with_message(format!(
8601 "Banking: {} customers, {} transactions",
8602 result.customers.len(),
8603 result.transactions.len()
8604 ));
8605 }
8606
8607 let mut banking_customers = result.customers;
8612 let core_customers = &self.master_data.customers;
8613 if !core_customers.is_empty() {
8614 for (i, bc) in banking_customers.iter_mut().enumerate() {
8615 let core = &core_customers[i % core_customers.len()];
8616 bc.name = CustomerName::business(&core.name);
8617 bc.residence_country = core.country.clone();
8618 bc.enterprise_customer_id = Some(core.customer_id.clone());
8619 }
8620 debug!(
8621 "Cross-referenced {} banking customers with {} core customers",
8622 banking_customers.len(),
8623 core_customers.len()
8624 );
8625 }
8626
8627 Ok(BankingSnapshot {
8628 customers: banking_customers,
8629 accounts: result.accounts,
8630 transactions: result.transactions,
8631 transaction_labels: result.transaction_labels,
8632 customer_labels: result.customer_labels,
8633 account_labels: result.account_labels,
8634 relationship_labels: result.relationship_labels,
8635 narratives: result.narratives,
8636 suspicious_count: result.stats.suspicious_count,
8637 scenario_count: result.scenarios.len(),
8638 })
8639 }
8640
8641 fn calculate_total_transactions(&self) -> u64 {
8643 let months = self.config.global.period_months as f64;
8644 self.config
8645 .companies
8646 .iter()
8647 .map(|c| {
8648 let annual = c.annual_transaction_volume.count() as f64;
8649 let weighted = annual * c.volume_weight;
8650 (weighted * months / 12.0) as u64
8651 })
8652 .sum()
8653 }
8654
8655 fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
8657 if !self.phase_config.show_progress {
8658 return None;
8659 }
8660
8661 let pb = if let Some(mp) = &self.multi_progress {
8662 mp.add(ProgressBar::new(total))
8663 } else {
8664 ProgressBar::new(total)
8665 };
8666
8667 pb.set_style(
8668 ProgressStyle::default_bar()
8669 .template(&format!(
8670 "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
8671 ))
8672 .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
8673 .progress_chars("#>-"),
8674 );
8675
8676 Some(pb)
8677 }
8678
8679 pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
8681 self.coa.clone()
8682 }
8683
8684 pub fn get_master_data(&self) -> &MasterDataSnapshot {
8686 &self.master_data
8687 }
8688
8689 fn phase_compliance_regulations(
8691 &mut self,
8692 _stats: &mut EnhancedGenerationStatistics,
8693 ) -> SynthResult<ComplianceRegulationsSnapshot> {
8694 if !self.phase_config.generate_compliance_regulations {
8695 return Ok(ComplianceRegulationsSnapshot::default());
8696 }
8697
8698 info!("Phase: Generating Compliance Regulations Data");
8699
8700 let cr_config = &self.config.compliance_regulations;
8701
8702 let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
8704 self.config
8705 .companies
8706 .iter()
8707 .map(|c| c.country.clone())
8708 .collect::<std::collections::HashSet<_>>()
8709 .into_iter()
8710 .collect()
8711 } else {
8712 cr_config.jurisdictions.clone()
8713 };
8714
8715 let fallback_date =
8717 NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
8718 let reference_date = cr_config
8719 .reference_date
8720 .as_ref()
8721 .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
8722 .unwrap_or_else(|| {
8723 NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8724 .unwrap_or(fallback_date)
8725 });
8726
8727 let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
8729 let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
8730 let cross_reference_records = reg_gen.generate_cross_reference_records();
8731 let jurisdiction_records =
8732 reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
8733
8734 info!(
8735 " Standards: {} records, {} cross-references, {} jurisdictions",
8736 standard_records.len(),
8737 cross_reference_records.len(),
8738 jurisdiction_records.len()
8739 );
8740
8741 let audit_procedures = if cr_config.audit_procedures.enabled {
8743 let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
8744 procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
8745 sampling_method: cr_config.audit_procedures.sampling_method.clone(),
8746 confidence_level: cr_config.audit_procedures.confidence_level,
8747 tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
8748 };
8749 let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
8750 self.seed + 9000,
8751 proc_config,
8752 );
8753 let registry = reg_gen.registry();
8754 let mut all_procs = Vec::new();
8755 for jurisdiction in &jurisdictions {
8756 let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
8757 all_procs.extend(procs);
8758 }
8759 info!(" Audit procedures: {}", all_procs.len());
8760 all_procs
8761 } else {
8762 Vec::new()
8763 };
8764
8765 let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
8767 let finding_config =
8768 datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
8769 finding_rate: cr_config.findings.finding_rate,
8770 material_weakness_rate: cr_config.findings.material_weakness_rate,
8771 significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
8772 generate_remediation: cr_config.findings.generate_remediation,
8773 };
8774 let mut finding_gen =
8775 datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
8776 self.seed + 9100,
8777 finding_config,
8778 );
8779 let mut all_findings = Vec::new();
8780 for company in &self.config.companies {
8781 let company_findings =
8782 finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
8783 all_findings.extend(company_findings);
8784 }
8785 info!(" Compliance findings: {}", all_findings.len());
8786 all_findings
8787 } else {
8788 Vec::new()
8789 };
8790
8791 let filings = if cr_config.filings.enabled {
8793 let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
8794 filing_types: cr_config.filings.filing_types.clone(),
8795 generate_status_progression: cr_config.filings.generate_status_progression,
8796 };
8797 let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
8798 self.seed + 9200,
8799 filing_config,
8800 );
8801 let company_codes: Vec<String> = self
8802 .config
8803 .companies
8804 .iter()
8805 .map(|c| c.code.clone())
8806 .collect();
8807 let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8808 .unwrap_or(fallback_date);
8809 let filings = filing_gen.generate_filings(
8810 &company_codes,
8811 &jurisdictions,
8812 start_date,
8813 self.config.global.period_months,
8814 );
8815 info!(" Regulatory filings: {}", filings.len());
8816 filings
8817 } else {
8818 Vec::new()
8819 };
8820
8821 let compliance_graph = if cr_config.graph.enabled {
8823 let graph_config = datasynth_graph::ComplianceGraphConfig {
8824 include_standard_nodes: cr_config.graph.include_compliance_nodes,
8825 include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
8826 include_cross_references: cr_config.graph.include_cross_references,
8827 include_supersession_edges: cr_config.graph.include_supersession_edges,
8828 include_account_links: cr_config.graph.include_account_links,
8829 include_control_links: cr_config.graph.include_control_links,
8830 include_company_links: cr_config.graph.include_company_links,
8831 };
8832 let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
8833
8834 let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
8836 .iter()
8837 .map(|r| datasynth_graph::StandardNodeInput {
8838 standard_id: r.standard_id.clone(),
8839 title: r.title.clone(),
8840 category: r.category.clone(),
8841 domain: r.domain.clone(),
8842 is_active: r.is_active,
8843 features: vec![if r.is_active { 1.0 } else { 0.0 }],
8844 applicable_account_types: r.applicable_account_types.clone(),
8845 applicable_processes: r.applicable_processes.clone(),
8846 })
8847 .collect();
8848 builder.add_standards(&standard_inputs);
8849
8850 let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
8852 jurisdiction_records
8853 .iter()
8854 .map(|r| datasynth_graph::JurisdictionNodeInput {
8855 country_code: r.country_code.clone(),
8856 country_name: r.country_name.clone(),
8857 framework: r.accounting_framework.clone(),
8858 standard_count: r.standard_count,
8859 tax_rate: r.statutory_tax_rate,
8860 })
8861 .collect();
8862 builder.add_jurisdictions(&jurisdiction_inputs);
8863
8864 let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
8866 cross_reference_records
8867 .iter()
8868 .map(|r| datasynth_graph::CrossReferenceEdgeInput {
8869 from_standard: r.from_standard.clone(),
8870 to_standard: r.to_standard.clone(),
8871 relationship: r.relationship.clone(),
8872 convergence_level: r.convergence_level,
8873 })
8874 .collect();
8875 builder.add_cross_references(&xref_inputs);
8876
8877 let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
8879 .iter()
8880 .map(|r| datasynth_graph::JurisdictionMappingInput {
8881 country_code: r.jurisdiction.clone(),
8882 standard_id: r.standard_id.clone(),
8883 })
8884 .collect();
8885 builder.add_jurisdiction_mappings(&mapping_inputs);
8886
8887 let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
8889 .iter()
8890 .map(|p| datasynth_graph::ProcedureNodeInput {
8891 procedure_id: p.procedure_id.clone(),
8892 standard_id: p.standard_id.clone(),
8893 procedure_type: p.procedure_type.clone(),
8894 sample_size: p.sample_size,
8895 confidence_level: p.confidence_level,
8896 })
8897 .collect();
8898 builder.add_procedures(&proc_inputs);
8899
8900 let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
8902 .iter()
8903 .map(|f| datasynth_graph::FindingNodeInput {
8904 finding_id: f.finding_id.to_string(),
8905 standard_id: f
8906 .related_standards
8907 .first()
8908 .map(|s| s.as_str().to_string())
8909 .unwrap_or_default(),
8910 severity: f.severity.to_string(),
8911 deficiency_level: f.deficiency_level.to_string(),
8912 severity_score: f.deficiency_level.severity_score(),
8913 control_id: f.control_id.clone(),
8914 affected_accounts: f.affected_accounts.clone(),
8915 })
8916 .collect();
8917 builder.add_findings(&finding_inputs);
8918
8919 if cr_config.graph.include_account_links {
8921 let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8922 let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
8923 for std_record in &standard_records {
8924 if let Some(std_obj) =
8925 registry.get(&datasynth_core::models::compliance::StandardId::parse(
8926 &std_record.standard_id,
8927 ))
8928 {
8929 for acct_type in &std_obj.applicable_account_types {
8930 account_links.push(datasynth_graph::AccountLinkInput {
8931 standard_id: std_record.standard_id.clone(),
8932 account_code: acct_type.clone(),
8933 account_name: acct_type.clone(),
8934 });
8935 }
8936 }
8937 }
8938 builder.add_account_links(&account_links);
8939 }
8940
8941 if cr_config.graph.include_control_links {
8943 let mut control_links = Vec::new();
8944 let sox_like_ids: Vec<String> = standard_records
8946 .iter()
8947 .filter(|r| {
8948 r.standard_id.starts_with("SOX")
8949 || r.standard_id.starts_with("PCAOB-AS-2201")
8950 })
8951 .map(|r| r.standard_id.clone())
8952 .collect();
8953 let control_ids = [
8955 ("C001", "Cash Controls"),
8956 ("C002", "Large Transaction Approval"),
8957 ("C010", "PO Approval"),
8958 ("C011", "Three-Way Match"),
8959 ("C020", "Revenue Recognition"),
8960 ("C021", "Credit Check"),
8961 ("C030", "Manual JE Approval"),
8962 ("C031", "Period Close Review"),
8963 ("C032", "Account Reconciliation"),
8964 ("C040", "Payroll Processing"),
8965 ("C050", "Fixed Asset Capitalization"),
8966 ("C060", "Intercompany Elimination"),
8967 ];
8968 for sox_id in &sox_like_ids {
8969 for (ctrl_id, ctrl_name) in &control_ids {
8970 control_links.push(datasynth_graph::ControlLinkInput {
8971 standard_id: sox_id.clone(),
8972 control_id: ctrl_id.to_string(),
8973 control_name: ctrl_name.to_string(),
8974 });
8975 }
8976 }
8977 builder.add_control_links(&control_links);
8978 }
8979
8980 if cr_config.graph.include_company_links {
8982 let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
8983 .iter()
8984 .enumerate()
8985 .map(|(i, f)| datasynth_graph::FilingNodeInput {
8986 filing_id: format!("F{:04}", i + 1),
8987 filing_type: f.filing_type.to_string(),
8988 company_code: f.company_code.clone(),
8989 jurisdiction: f.jurisdiction.clone(),
8990 status: format!("{:?}", f.status),
8991 })
8992 .collect();
8993 builder.add_filings(&filing_inputs);
8994 }
8995
8996 let graph = builder.build();
8997 info!(
8998 " Compliance graph: {} nodes, {} edges",
8999 graph.nodes.len(),
9000 graph.edges.len()
9001 );
9002 Some(graph)
9003 } else {
9004 None
9005 };
9006
9007 self.check_resources_with_log("post-compliance-regulations")?;
9008
9009 Ok(ComplianceRegulationsSnapshot {
9010 standard_records,
9011 cross_reference_records,
9012 jurisdiction_records,
9013 audit_procedures,
9014 findings,
9015 filings,
9016 compliance_graph,
9017 })
9018 }
9019
9020 fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
9022 use super::lineage::LineageGraphBuilder;
9023
9024 let mut builder = LineageGraphBuilder::new();
9025
9026 builder.add_config_section("config:global", "Global Config");
9028 builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
9029 builder.add_config_section("config:transactions", "Transaction Config");
9030
9031 builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
9033 builder.add_generator_phase("phase:je", "Journal Entry Generation");
9034
9035 builder.configured_by("phase:coa", "config:chart_of_accounts");
9037 builder.configured_by("phase:je", "config:transactions");
9038
9039 builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
9041 builder.produced_by("output:je", "phase:je");
9042
9043 if self.phase_config.generate_master_data {
9045 builder.add_config_section("config:master_data", "Master Data Config");
9046 builder.add_generator_phase("phase:master_data", "Master Data Generation");
9047 builder.configured_by("phase:master_data", "config:master_data");
9048 builder.input_to("phase:master_data", "phase:je");
9049 }
9050
9051 if self.phase_config.generate_document_flows {
9052 builder.add_config_section("config:document_flows", "Document Flow Config");
9053 builder.add_generator_phase("phase:p2p", "P2P Document Flow");
9054 builder.add_generator_phase("phase:o2c", "O2C Document Flow");
9055 builder.configured_by("phase:p2p", "config:document_flows");
9056 builder.configured_by("phase:o2c", "config:document_flows");
9057
9058 builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
9059 builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
9060 builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
9061 builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
9062 builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
9063
9064 builder.produced_by("output:po", "phase:p2p");
9065 builder.produced_by("output:gr", "phase:p2p");
9066 builder.produced_by("output:vi", "phase:p2p");
9067 builder.produced_by("output:so", "phase:o2c");
9068 builder.produced_by("output:ci", "phase:o2c");
9069 }
9070
9071 if self.phase_config.inject_anomalies {
9072 builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
9073 builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
9074 builder.configured_by("phase:anomaly", "config:fraud");
9075 builder.add_output_file(
9076 "output:labels",
9077 "Anomaly Labels",
9078 "labels/anomaly_labels.csv",
9079 );
9080 builder.produced_by("output:labels", "phase:anomaly");
9081 }
9082
9083 if self.phase_config.generate_audit {
9084 builder.add_config_section("config:audit", "Audit Config");
9085 builder.add_generator_phase("phase:audit", "Audit Data Generation");
9086 builder.configured_by("phase:audit", "config:audit");
9087 }
9088
9089 if self.phase_config.generate_banking {
9090 builder.add_config_section("config:banking", "Banking Config");
9091 builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
9092 builder.configured_by("phase:banking", "config:banking");
9093 }
9094
9095 if self.config.llm.enabled {
9096 builder.add_config_section("config:llm", "LLM Enrichment Config");
9097 builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
9098 builder.configured_by("phase:llm_enrichment", "config:llm");
9099 }
9100
9101 if self.config.diffusion.enabled {
9102 builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
9103 builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
9104 builder.configured_by("phase:diffusion", "config:diffusion");
9105 }
9106
9107 if self.config.causal.enabled {
9108 builder.add_config_section("config:causal", "Causal Generation Config");
9109 builder.add_generator_phase("phase:causal", "Causal Overlay");
9110 builder.configured_by("phase:causal", "config:causal");
9111 }
9112
9113 builder.build()
9114 }
9115}
9116
9117fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
9119 match format {
9120 datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
9121 datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
9122 datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
9123 datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
9124 datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
9125 }
9126}
9127
9128#[cfg(test)]
9129#[allow(clippy::unwrap_used)]
9130mod tests {
9131 use super::*;
9132 use datasynth_config::schema::*;
9133
9134 fn create_test_config() -> GeneratorConfig {
9135 GeneratorConfig {
9136 global: GlobalConfig {
9137 industry: IndustrySector::Manufacturing,
9138 start_date: "2024-01-01".to_string(),
9139 period_months: 1,
9140 seed: Some(42),
9141 parallel: false,
9142 group_currency: "USD".to_string(),
9143 worker_threads: 0,
9144 memory_limit_mb: 0,
9145 fiscal_year_months: None,
9146 },
9147 companies: vec![CompanyConfig {
9148 code: "1000".to_string(),
9149 name: "Test Company".to_string(),
9150 currency: "USD".to_string(),
9151 country: "US".to_string(),
9152 annual_transaction_volume: TransactionVolume::TenK,
9153 volume_weight: 1.0,
9154 fiscal_year_variant: "K4".to_string(),
9155 }],
9156 chart_of_accounts: ChartOfAccountsConfig {
9157 complexity: CoAComplexity::Small,
9158 industry_specific: true,
9159 custom_accounts: None,
9160 min_hierarchy_depth: 2,
9161 max_hierarchy_depth: 4,
9162 },
9163 transactions: TransactionConfig::default(),
9164 output: OutputConfig::default(),
9165 fraud: FraudConfig::default(),
9166 internal_controls: InternalControlsConfig::default(),
9167 business_processes: BusinessProcessConfig::default(),
9168 user_personas: UserPersonaConfig::default(),
9169 templates: TemplateConfig::default(),
9170 approval: ApprovalConfig::default(),
9171 departments: DepartmentConfig::default(),
9172 master_data: MasterDataConfig::default(),
9173 document_flows: DocumentFlowConfig::default(),
9174 intercompany: IntercompanyConfig::default(),
9175 balance: BalanceConfig::default(),
9176 ocpm: OcpmConfig::default(),
9177 audit: AuditGenerationConfig::default(),
9178 banking: datasynth_banking::BankingConfig::default(),
9179 data_quality: DataQualitySchemaConfig::default(),
9180 scenario: ScenarioConfig::default(),
9181 temporal: TemporalDriftConfig::default(),
9182 graph_export: GraphExportConfig::default(),
9183 streaming: StreamingSchemaConfig::default(),
9184 rate_limit: RateLimitSchemaConfig::default(),
9185 temporal_attributes: TemporalAttributeSchemaConfig::default(),
9186 relationships: RelationshipSchemaConfig::default(),
9187 accounting_standards: AccountingStandardsConfig::default(),
9188 audit_standards: AuditStandardsConfig::default(),
9189 distributions: Default::default(),
9190 temporal_patterns: Default::default(),
9191 vendor_network: VendorNetworkSchemaConfig::default(),
9192 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
9193 relationship_strength: RelationshipStrengthSchemaConfig::default(),
9194 cross_process_links: CrossProcessLinksSchemaConfig::default(),
9195 organizational_events: OrganizationalEventsSchemaConfig::default(),
9196 behavioral_drift: BehavioralDriftSchemaConfig::default(),
9197 market_drift: MarketDriftSchemaConfig::default(),
9198 drift_labeling: DriftLabelingSchemaConfig::default(),
9199 anomaly_injection: Default::default(),
9200 industry_specific: Default::default(),
9201 fingerprint_privacy: Default::default(),
9202 quality_gates: Default::default(),
9203 compliance: Default::default(),
9204 webhooks: Default::default(),
9205 llm: Default::default(),
9206 diffusion: Default::default(),
9207 causal: Default::default(),
9208 source_to_pay: Default::default(),
9209 financial_reporting: Default::default(),
9210 hr: Default::default(),
9211 manufacturing: Default::default(),
9212 sales_quotes: Default::default(),
9213 tax: Default::default(),
9214 treasury: Default::default(),
9215 project_accounting: Default::default(),
9216 esg: Default::default(),
9217 country_packs: None,
9218 scenarios: Default::default(),
9219 session: Default::default(),
9220 compliance_regulations: Default::default(),
9221 }
9222 }
9223
9224 #[test]
9225 fn test_enhanced_orchestrator_creation() {
9226 let config = create_test_config();
9227 let orchestrator = EnhancedOrchestrator::with_defaults(config);
9228 assert!(orchestrator.is_ok());
9229 }
9230
9231 #[test]
9232 fn test_minimal_generation() {
9233 let config = create_test_config();
9234 let phase_config = PhaseConfig {
9235 generate_master_data: false,
9236 generate_document_flows: false,
9237 generate_journal_entries: true,
9238 inject_anomalies: false,
9239 show_progress: false,
9240 ..Default::default()
9241 };
9242
9243 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9244 let result = orchestrator.generate();
9245
9246 assert!(result.is_ok());
9247 let result = result.unwrap();
9248 assert!(!result.journal_entries.is_empty());
9249 }
9250
9251 #[test]
9252 fn test_master_data_generation() {
9253 let config = create_test_config();
9254 let phase_config = PhaseConfig {
9255 generate_master_data: true,
9256 generate_document_flows: false,
9257 generate_journal_entries: false,
9258 inject_anomalies: false,
9259 show_progress: false,
9260 vendors_per_company: 5,
9261 customers_per_company: 5,
9262 materials_per_company: 10,
9263 assets_per_company: 5,
9264 employees_per_company: 10,
9265 ..Default::default()
9266 };
9267
9268 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9269 let result = orchestrator.generate().unwrap();
9270
9271 assert!(!result.master_data.vendors.is_empty());
9272 assert!(!result.master_data.customers.is_empty());
9273 assert!(!result.master_data.materials.is_empty());
9274 }
9275
9276 #[test]
9277 fn test_document_flow_generation() {
9278 let config = create_test_config();
9279 let phase_config = PhaseConfig {
9280 generate_master_data: true,
9281 generate_document_flows: true,
9282 generate_journal_entries: false,
9283 inject_anomalies: false,
9284 inject_data_quality: false,
9285 validate_balances: false,
9286 generate_ocpm_events: false,
9287 show_progress: false,
9288 vendors_per_company: 5,
9289 customers_per_company: 5,
9290 materials_per_company: 10,
9291 assets_per_company: 5,
9292 employees_per_company: 10,
9293 p2p_chains: 5,
9294 o2c_chains: 5,
9295 ..Default::default()
9296 };
9297
9298 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9299 let result = orchestrator.generate().unwrap();
9300
9301 assert!(!result.document_flows.p2p_chains.is_empty());
9303 assert!(!result.document_flows.o2c_chains.is_empty());
9304
9305 assert!(!result.document_flows.purchase_orders.is_empty());
9307 assert!(!result.document_flows.sales_orders.is_empty());
9308 }
9309
9310 #[test]
9311 fn test_anomaly_injection() {
9312 let config = create_test_config();
9313 let phase_config = PhaseConfig {
9314 generate_master_data: false,
9315 generate_document_flows: false,
9316 generate_journal_entries: true,
9317 inject_anomalies: true,
9318 show_progress: false,
9319 ..Default::default()
9320 };
9321
9322 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9323 let result = orchestrator.generate().unwrap();
9324
9325 assert!(!result.journal_entries.is_empty());
9327
9328 assert!(result.anomaly_labels.summary.is_some());
9331 }
9332
9333 #[test]
9334 fn test_full_generation_pipeline() {
9335 let config = create_test_config();
9336 let phase_config = PhaseConfig {
9337 generate_master_data: true,
9338 generate_document_flows: true,
9339 generate_journal_entries: true,
9340 inject_anomalies: false,
9341 inject_data_quality: false,
9342 validate_balances: true,
9343 generate_ocpm_events: false,
9344 show_progress: false,
9345 vendors_per_company: 3,
9346 customers_per_company: 3,
9347 materials_per_company: 5,
9348 assets_per_company: 3,
9349 employees_per_company: 5,
9350 p2p_chains: 3,
9351 o2c_chains: 3,
9352 ..Default::default()
9353 };
9354
9355 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9356 let result = orchestrator.generate().unwrap();
9357
9358 assert!(!result.master_data.vendors.is_empty());
9360 assert!(!result.master_data.customers.is_empty());
9361 assert!(!result.document_flows.p2p_chains.is_empty());
9362 assert!(!result.document_flows.o2c_chains.is_empty());
9363 assert!(!result.journal_entries.is_empty());
9364 assert!(result.statistics.accounts_count > 0);
9365
9366 assert!(!result.subledger.ap_invoices.is_empty());
9368 assert!(!result.subledger.ar_invoices.is_empty());
9369
9370 assert!(result.balance_validation.validated);
9372 assert!(result.balance_validation.entries_processed > 0);
9373 }
9374
9375 #[test]
9376 fn test_subledger_linking() {
9377 let config = create_test_config();
9378 let phase_config = PhaseConfig {
9379 generate_master_data: true,
9380 generate_document_flows: true,
9381 generate_journal_entries: false,
9382 inject_anomalies: false,
9383 inject_data_quality: false,
9384 validate_balances: false,
9385 generate_ocpm_events: false,
9386 show_progress: false,
9387 vendors_per_company: 5,
9388 customers_per_company: 5,
9389 materials_per_company: 10,
9390 assets_per_company: 3,
9391 employees_per_company: 5,
9392 p2p_chains: 5,
9393 o2c_chains: 5,
9394 ..Default::default()
9395 };
9396
9397 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9398 let result = orchestrator.generate().unwrap();
9399
9400 assert!(!result.document_flows.vendor_invoices.is_empty());
9402 assert!(!result.document_flows.customer_invoices.is_empty());
9403
9404 assert!(!result.subledger.ap_invoices.is_empty());
9406 assert!(!result.subledger.ar_invoices.is_empty());
9407
9408 assert_eq!(
9410 result.subledger.ap_invoices.len(),
9411 result.document_flows.vendor_invoices.len()
9412 );
9413
9414 assert_eq!(
9416 result.subledger.ar_invoices.len(),
9417 result.document_flows.customer_invoices.len()
9418 );
9419
9420 assert_eq!(
9422 result.statistics.ap_invoice_count,
9423 result.subledger.ap_invoices.len()
9424 );
9425 assert_eq!(
9426 result.statistics.ar_invoice_count,
9427 result.subledger.ar_invoices.len()
9428 );
9429 }
9430
9431 #[test]
9432 fn test_balance_validation() {
9433 let config = create_test_config();
9434 let phase_config = PhaseConfig {
9435 generate_master_data: false,
9436 generate_document_flows: false,
9437 generate_journal_entries: true,
9438 inject_anomalies: false,
9439 validate_balances: true,
9440 show_progress: false,
9441 ..Default::default()
9442 };
9443
9444 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9445 let result = orchestrator.generate().unwrap();
9446
9447 assert!(result.balance_validation.validated);
9449 assert!(result.balance_validation.entries_processed > 0);
9450
9451 assert!(!result.balance_validation.has_unbalanced_entries);
9453
9454 assert_eq!(
9456 result.balance_validation.total_debits,
9457 result.balance_validation.total_credits
9458 );
9459 }
9460
9461 #[test]
9462 fn test_statistics_accuracy() {
9463 let config = create_test_config();
9464 let phase_config = PhaseConfig {
9465 generate_master_data: true,
9466 generate_document_flows: false,
9467 generate_journal_entries: true,
9468 inject_anomalies: false,
9469 show_progress: false,
9470 vendors_per_company: 10,
9471 customers_per_company: 20,
9472 materials_per_company: 15,
9473 assets_per_company: 5,
9474 employees_per_company: 8,
9475 ..Default::default()
9476 };
9477
9478 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9479 let result = orchestrator.generate().unwrap();
9480
9481 assert_eq!(
9483 result.statistics.vendor_count,
9484 result.master_data.vendors.len()
9485 );
9486 assert_eq!(
9487 result.statistics.customer_count,
9488 result.master_data.customers.len()
9489 );
9490 assert_eq!(
9491 result.statistics.material_count,
9492 result.master_data.materials.len()
9493 );
9494 assert_eq!(
9495 result.statistics.total_entries as usize,
9496 result.journal_entries.len()
9497 );
9498 }
9499
9500 #[test]
9501 fn test_phase_config_defaults() {
9502 let config = PhaseConfig::default();
9503 assert!(config.generate_master_data);
9504 assert!(config.generate_document_flows);
9505 assert!(config.generate_journal_entries);
9506 assert!(!config.inject_anomalies);
9507 assert!(config.validate_balances);
9508 assert!(config.show_progress);
9509 assert!(config.vendors_per_company > 0);
9510 assert!(config.customers_per_company > 0);
9511 }
9512
9513 #[test]
9514 fn test_get_coa_before_generation() {
9515 let config = create_test_config();
9516 let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
9517
9518 assert!(orchestrator.get_coa().is_none());
9520 }
9521
9522 #[test]
9523 fn test_get_coa_after_generation() {
9524 let config = create_test_config();
9525 let phase_config = PhaseConfig {
9526 generate_master_data: false,
9527 generate_document_flows: false,
9528 generate_journal_entries: true,
9529 inject_anomalies: false,
9530 show_progress: false,
9531 ..Default::default()
9532 };
9533
9534 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9535 let _ = orchestrator.generate().unwrap();
9536
9537 assert!(orchestrator.get_coa().is_some());
9539 }
9540
9541 #[test]
9542 fn test_get_master_data() {
9543 let config = create_test_config();
9544 let phase_config = PhaseConfig {
9545 generate_master_data: true,
9546 generate_document_flows: false,
9547 generate_journal_entries: false,
9548 inject_anomalies: false,
9549 show_progress: false,
9550 vendors_per_company: 5,
9551 customers_per_company: 5,
9552 materials_per_company: 5,
9553 assets_per_company: 5,
9554 employees_per_company: 5,
9555 ..Default::default()
9556 };
9557
9558 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9559 let result = orchestrator.generate().unwrap();
9560
9561 assert!(!result.master_data.vendors.is_empty());
9563 }
9564
9565 #[test]
9566 fn test_with_progress_builder() {
9567 let config = create_test_config();
9568 let orchestrator = EnhancedOrchestrator::with_defaults(config)
9569 .unwrap()
9570 .with_progress(false);
9571
9572 assert!(!orchestrator.phase_config.show_progress);
9574 }
9575
9576 #[test]
9577 fn test_multi_company_generation() {
9578 let mut config = create_test_config();
9579 config.companies.push(CompanyConfig {
9580 code: "2000".to_string(),
9581 name: "Subsidiary".to_string(),
9582 currency: "EUR".to_string(),
9583 country: "DE".to_string(),
9584 annual_transaction_volume: TransactionVolume::TenK,
9585 volume_weight: 0.5,
9586 fiscal_year_variant: "K4".to_string(),
9587 });
9588
9589 let phase_config = PhaseConfig {
9590 generate_master_data: true,
9591 generate_document_flows: false,
9592 generate_journal_entries: true,
9593 inject_anomalies: false,
9594 show_progress: false,
9595 vendors_per_company: 5,
9596 customers_per_company: 5,
9597 materials_per_company: 5,
9598 assets_per_company: 5,
9599 employees_per_company: 5,
9600 ..Default::default()
9601 };
9602
9603 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9604 let result = orchestrator.generate().unwrap();
9605
9606 assert!(result.statistics.vendor_count >= 10); assert!(result.statistics.customer_count >= 10);
9609 assert!(result.statistics.companies_count == 2);
9610 }
9611
9612 #[test]
9613 fn test_empty_master_data_skips_document_flows() {
9614 let config = create_test_config();
9615 let phase_config = PhaseConfig {
9616 generate_master_data: false, generate_document_flows: true, generate_journal_entries: false,
9619 inject_anomalies: false,
9620 show_progress: false,
9621 ..Default::default()
9622 };
9623
9624 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9625 let result = orchestrator.generate().unwrap();
9626
9627 assert!(result.document_flows.p2p_chains.is_empty());
9629 assert!(result.document_flows.o2c_chains.is_empty());
9630 }
9631
9632 #[test]
9633 fn test_journal_entry_line_item_count() {
9634 let config = create_test_config();
9635 let phase_config = PhaseConfig {
9636 generate_master_data: false,
9637 generate_document_flows: false,
9638 generate_journal_entries: true,
9639 inject_anomalies: false,
9640 show_progress: false,
9641 ..Default::default()
9642 };
9643
9644 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9645 let result = orchestrator.generate().unwrap();
9646
9647 let calculated_line_items: u64 = result
9649 .journal_entries
9650 .iter()
9651 .map(|e| e.line_count() as u64)
9652 .sum();
9653 assert_eq!(result.statistics.total_line_items, calculated_line_items);
9654 }
9655
9656 #[test]
9657 fn test_audit_generation() {
9658 let config = create_test_config();
9659 let phase_config = PhaseConfig {
9660 generate_master_data: false,
9661 generate_document_flows: false,
9662 generate_journal_entries: true,
9663 inject_anomalies: false,
9664 show_progress: false,
9665 generate_audit: true,
9666 audit_engagements: 2,
9667 workpapers_per_engagement: 5,
9668 evidence_per_workpaper: 2,
9669 risks_per_engagement: 3,
9670 findings_per_engagement: 2,
9671 judgments_per_engagement: 2,
9672 ..Default::default()
9673 };
9674
9675 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9676 let result = orchestrator.generate().unwrap();
9677
9678 assert_eq!(result.audit.engagements.len(), 2);
9680 assert!(!result.audit.workpapers.is_empty());
9681 assert!(!result.audit.evidence.is_empty());
9682 assert!(!result.audit.risk_assessments.is_empty());
9683 assert!(!result.audit.findings.is_empty());
9684 assert!(!result.audit.judgments.is_empty());
9685
9686 assert!(
9688 !result.audit.confirmations.is_empty(),
9689 "ISA 505 confirmations should be generated"
9690 );
9691 assert!(
9692 !result.audit.confirmation_responses.is_empty(),
9693 "ISA 505 confirmation responses should be generated"
9694 );
9695 assert!(
9696 !result.audit.procedure_steps.is_empty(),
9697 "ISA 330 procedure steps should be generated"
9698 );
9699 assert!(
9701 !result.audit.analytical_results.is_empty(),
9702 "ISA 520 analytical procedures should be generated"
9703 );
9704 assert!(
9705 !result.audit.ia_functions.is_empty(),
9706 "ISA 610 IA functions should be generated (one per engagement)"
9707 );
9708 assert!(
9709 !result.audit.related_parties.is_empty(),
9710 "ISA 550 related parties should be generated"
9711 );
9712
9713 assert_eq!(
9715 result.statistics.audit_engagement_count,
9716 result.audit.engagements.len()
9717 );
9718 assert_eq!(
9719 result.statistics.audit_workpaper_count,
9720 result.audit.workpapers.len()
9721 );
9722 assert_eq!(
9723 result.statistics.audit_evidence_count,
9724 result.audit.evidence.len()
9725 );
9726 assert_eq!(
9727 result.statistics.audit_risk_count,
9728 result.audit.risk_assessments.len()
9729 );
9730 assert_eq!(
9731 result.statistics.audit_finding_count,
9732 result.audit.findings.len()
9733 );
9734 assert_eq!(
9735 result.statistics.audit_judgment_count,
9736 result.audit.judgments.len()
9737 );
9738 assert_eq!(
9739 result.statistics.audit_confirmation_count,
9740 result.audit.confirmations.len()
9741 );
9742 assert_eq!(
9743 result.statistics.audit_confirmation_response_count,
9744 result.audit.confirmation_responses.len()
9745 );
9746 assert_eq!(
9747 result.statistics.audit_procedure_step_count,
9748 result.audit.procedure_steps.len()
9749 );
9750 assert_eq!(
9751 result.statistics.audit_sample_count,
9752 result.audit.samples.len()
9753 );
9754 assert_eq!(
9755 result.statistics.audit_analytical_result_count,
9756 result.audit.analytical_results.len()
9757 );
9758 assert_eq!(
9759 result.statistics.audit_ia_function_count,
9760 result.audit.ia_functions.len()
9761 );
9762 assert_eq!(
9763 result.statistics.audit_ia_report_count,
9764 result.audit.ia_reports.len()
9765 );
9766 assert_eq!(
9767 result.statistics.audit_related_party_count,
9768 result.audit.related_parties.len()
9769 );
9770 assert_eq!(
9771 result.statistics.audit_related_party_transaction_count,
9772 result.audit.related_party_transactions.len()
9773 );
9774 }
9775
9776 #[test]
9777 fn test_new_phases_disabled_by_default() {
9778 let config = create_test_config();
9779 assert!(!config.llm.enabled);
9781 assert!(!config.diffusion.enabled);
9782 assert!(!config.causal.enabled);
9783
9784 let phase_config = PhaseConfig {
9785 generate_master_data: false,
9786 generate_document_flows: false,
9787 generate_journal_entries: true,
9788 inject_anomalies: false,
9789 show_progress: false,
9790 ..Default::default()
9791 };
9792
9793 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9794 let result = orchestrator.generate().unwrap();
9795
9796 assert_eq!(result.statistics.llm_enrichment_ms, 0);
9798 assert_eq!(result.statistics.llm_vendors_enriched, 0);
9799 assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
9800 assert_eq!(result.statistics.diffusion_samples_generated, 0);
9801 assert_eq!(result.statistics.causal_generation_ms, 0);
9802 assert_eq!(result.statistics.causal_samples_generated, 0);
9803 assert!(result.statistics.causal_validation_passed.is_none());
9804 assert_eq!(result.statistics.counterfactual_pair_count, 0);
9805 assert!(result.counterfactual_pairs.is_empty());
9806 }
9807
9808 #[test]
9809 fn test_counterfactual_generation_enabled() {
9810 let config = create_test_config();
9811 let phase_config = PhaseConfig {
9812 generate_master_data: false,
9813 generate_document_flows: false,
9814 generate_journal_entries: true,
9815 inject_anomalies: false,
9816 show_progress: false,
9817 generate_counterfactuals: true,
9818 ..Default::default()
9819 };
9820
9821 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9822 let result = orchestrator.generate().unwrap();
9823
9824 if !result.journal_entries.is_empty() {
9826 assert_eq!(
9827 result.counterfactual_pairs.len(),
9828 result.journal_entries.len()
9829 );
9830 assert_eq!(
9831 result.statistics.counterfactual_pair_count,
9832 result.journal_entries.len()
9833 );
9834 let ids: std::collections::HashSet<_> = result
9836 .counterfactual_pairs
9837 .iter()
9838 .map(|p| p.pair_id.clone())
9839 .collect();
9840 assert_eq!(ids.len(), result.counterfactual_pairs.len());
9841 }
9842 }
9843
9844 #[test]
9845 fn test_llm_enrichment_enabled() {
9846 let mut config = create_test_config();
9847 config.llm.enabled = true;
9848 config.llm.max_vendor_enrichments = 3;
9849
9850 let phase_config = PhaseConfig {
9851 generate_master_data: true,
9852 generate_document_flows: false,
9853 generate_journal_entries: false,
9854 inject_anomalies: false,
9855 show_progress: false,
9856 vendors_per_company: 5,
9857 customers_per_company: 3,
9858 materials_per_company: 3,
9859 assets_per_company: 3,
9860 employees_per_company: 3,
9861 ..Default::default()
9862 };
9863
9864 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9865 let result = orchestrator.generate().unwrap();
9866
9867 assert!(result.statistics.llm_vendors_enriched > 0);
9869 assert!(result.statistics.llm_vendors_enriched <= 3);
9870 }
9871
9872 #[test]
9873 fn test_diffusion_enhancement_enabled() {
9874 let mut config = create_test_config();
9875 config.diffusion.enabled = true;
9876 config.diffusion.n_steps = 50;
9877 config.diffusion.sample_size = 20;
9878
9879 let phase_config = PhaseConfig {
9880 generate_master_data: false,
9881 generate_document_flows: false,
9882 generate_journal_entries: true,
9883 inject_anomalies: false,
9884 show_progress: false,
9885 ..Default::default()
9886 };
9887
9888 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9889 let result = orchestrator.generate().unwrap();
9890
9891 assert_eq!(result.statistics.diffusion_samples_generated, 20);
9893 }
9894
9895 #[test]
9896 fn test_causal_overlay_enabled() {
9897 let mut config = create_test_config();
9898 config.causal.enabled = true;
9899 config.causal.template = "fraud_detection".to_string();
9900 config.causal.sample_size = 100;
9901 config.causal.validate = true;
9902
9903 let phase_config = PhaseConfig {
9904 generate_master_data: false,
9905 generate_document_flows: false,
9906 generate_journal_entries: true,
9907 inject_anomalies: false,
9908 show_progress: false,
9909 ..Default::default()
9910 };
9911
9912 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9913 let result = orchestrator.generate().unwrap();
9914
9915 assert_eq!(result.statistics.causal_samples_generated, 100);
9917 assert!(result.statistics.causal_validation_passed.is_some());
9919 }
9920
9921 #[test]
9922 fn test_causal_overlay_revenue_cycle_template() {
9923 let mut config = create_test_config();
9924 config.causal.enabled = true;
9925 config.causal.template = "revenue_cycle".to_string();
9926 config.causal.sample_size = 50;
9927 config.causal.validate = false;
9928
9929 let phase_config = PhaseConfig {
9930 generate_master_data: false,
9931 generate_document_flows: false,
9932 generate_journal_entries: true,
9933 inject_anomalies: false,
9934 show_progress: false,
9935 ..Default::default()
9936 };
9937
9938 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9939 let result = orchestrator.generate().unwrap();
9940
9941 assert_eq!(result.statistics.causal_samples_generated, 50);
9943 assert!(result.statistics.causal_validation_passed.is_none());
9945 }
9946
9947 #[test]
9948 fn test_all_new_phases_enabled_together() {
9949 let mut config = create_test_config();
9950 config.llm.enabled = true;
9951 config.llm.max_vendor_enrichments = 2;
9952 config.diffusion.enabled = true;
9953 config.diffusion.n_steps = 20;
9954 config.diffusion.sample_size = 10;
9955 config.causal.enabled = true;
9956 config.causal.sample_size = 50;
9957 config.causal.validate = true;
9958
9959 let phase_config = PhaseConfig {
9960 generate_master_data: true,
9961 generate_document_flows: false,
9962 generate_journal_entries: true,
9963 inject_anomalies: false,
9964 show_progress: false,
9965 vendors_per_company: 5,
9966 customers_per_company: 3,
9967 materials_per_company: 3,
9968 assets_per_company: 3,
9969 employees_per_company: 3,
9970 ..Default::default()
9971 };
9972
9973 let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9974 let result = orchestrator.generate().unwrap();
9975
9976 assert!(result.statistics.llm_vendors_enriched > 0);
9978 assert_eq!(result.statistics.diffusion_samples_generated, 10);
9979 assert_eq!(result.statistics.causal_samples_generated, 50);
9980 assert!(result.statistics.causal_validation_passed.is_some());
9981 }
9982
9983 #[test]
9984 fn test_statistics_serialization_with_new_fields() {
9985 let stats = EnhancedGenerationStatistics {
9986 total_entries: 100,
9987 total_line_items: 500,
9988 llm_enrichment_ms: 42,
9989 llm_vendors_enriched: 10,
9990 diffusion_enhancement_ms: 100,
9991 diffusion_samples_generated: 50,
9992 causal_generation_ms: 200,
9993 causal_samples_generated: 100,
9994 causal_validation_passed: Some(true),
9995 ..Default::default()
9996 };
9997
9998 let json = serde_json::to_string(&stats).unwrap();
9999 let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
10000
10001 assert_eq!(deserialized.llm_enrichment_ms, 42);
10002 assert_eq!(deserialized.llm_vendors_enriched, 10);
10003 assert_eq!(deserialized.diffusion_enhancement_ms, 100);
10004 assert_eq!(deserialized.diffusion_samples_generated, 50);
10005 assert_eq!(deserialized.causal_generation_ms, 200);
10006 assert_eq!(deserialized.causal_samples_generated, 100);
10007 assert_eq!(deserialized.causal_validation_passed, Some(true));
10008 }
10009
10010 #[test]
10011 fn test_statistics_backward_compat_deserialization() {
10012 let old_json = r#"{
10014 "total_entries": 100,
10015 "total_line_items": 500,
10016 "accounts_count": 50,
10017 "companies_count": 1,
10018 "period_months": 12,
10019 "vendor_count": 10,
10020 "customer_count": 20,
10021 "material_count": 15,
10022 "asset_count": 5,
10023 "employee_count": 8,
10024 "p2p_chain_count": 5,
10025 "o2c_chain_count": 5,
10026 "ap_invoice_count": 5,
10027 "ar_invoice_count": 5,
10028 "ocpm_event_count": 0,
10029 "ocpm_object_count": 0,
10030 "ocpm_case_count": 0,
10031 "audit_engagement_count": 0,
10032 "audit_workpaper_count": 0,
10033 "audit_evidence_count": 0,
10034 "audit_risk_count": 0,
10035 "audit_finding_count": 0,
10036 "audit_judgment_count": 0,
10037 "anomalies_injected": 0,
10038 "data_quality_issues": 0,
10039 "banking_customer_count": 0,
10040 "banking_account_count": 0,
10041 "banking_transaction_count": 0,
10042 "banking_suspicious_count": 0,
10043 "graph_export_count": 0,
10044 "graph_node_count": 0,
10045 "graph_edge_count": 0
10046 }"#;
10047
10048 let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
10049
10050 assert_eq!(stats.llm_enrichment_ms, 0);
10052 assert_eq!(stats.llm_vendors_enriched, 0);
10053 assert_eq!(stats.diffusion_enhancement_ms, 0);
10054 assert_eq!(stats.diffusion_samples_generated, 0);
10055 assert_eq!(stats.causal_generation_ms, 0);
10056 assert_eq!(stats.causal_samples_generated, 0);
10057 assert!(stats.causal_validation_passed.is_none());
10058 }
10059}