Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
40};
41use datasynth_core::models::sourcing::{
42    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
43    SupplierBid, SupplierQualification, SupplierScorecard,
44};
45use datasynth_core::models::subledger::ap::APInvoice;
46use datasynth_core::models::subledger::ar::ARInvoice;
47use datasynth_core::models::*;
48use datasynth_core::traits::Generator;
49use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
50use datasynth_fingerprint::{
51    io::FingerprintReader,
52    models::Fingerprint,
53    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
54};
55use datasynth_generators::{
56    // Anomaly injection
57    AnomalyInjector,
58    AnomalyInjectorConfig,
59    AssetGenerator,
60    // Audit generators
61    AuditEngagementGenerator,
62    BalanceTrackerConfig,
63    // Bank reconciliation generator
64    BankReconciliationGenerator,
65    // S2C sourcing generators
66    BidEvaluationGenerator,
67    BidGenerator,
68    CatalogGenerator,
69    // Core generators
70    ChartOfAccountsGenerator,
71    ContractGenerator,
72    // Control generator
73    ControlGenerator,
74    ControlGeneratorConfig,
75    CustomerGenerator,
76    DataQualityConfig,
77    // Data quality
78    DataQualityInjector,
79    DataQualityStats,
80    // Document flow JE generator
81    DocumentFlowJeConfig,
82    DocumentFlowJeGenerator,
83    // Subledger linker
84    DocumentFlowLinker,
85    EmployeeGenerator,
86    // ESG anomaly labels
87    EsgAnomalyLabel,
88    EvidenceGenerator,
89    // Financial statement generator
90    FinancialStatementGenerator,
91    FindingGenerator,
92    JournalEntryGenerator,
93    JudgmentGenerator,
94    LatePaymentDistribution,
95    MaterialGenerator,
96    O2CDocumentChain,
97    O2CGenerator,
98    O2CGeneratorConfig,
99    O2CPaymentBehavior,
100    P2PDocumentChain,
101    // Document flow generators
102    P2PGenerator,
103    P2PGeneratorConfig,
104    P2PPaymentBehavior,
105    PaymentReference,
106    QualificationGenerator,
107    RfxGenerator,
108    RiskAssessmentGenerator,
109    // Balance validation
110    RunningBalanceTracker,
111    ScorecardGenerator,
112    SourcingProjectGenerator,
113    SpendAnalysisGenerator,
114    ValidationError,
115    // Master data generators
116    VendorGenerator,
117    WorkpaperGenerator,
118};
119use datasynth_graph::{
120    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
121    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
122    TransactionGraphConfig,
123};
124use datasynth_ocpm::{
125    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
126    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
127    OcpmUuidFactory, P2pDocuments, S2cDocuments,
128};
129
130use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
131use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
132use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
133use datasynth_core::llm::MockLlmProvider;
134use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
135use datasynth_core::models::documents::PaymentMethod;
136use datasynth_core::models::IndustrySector;
137use datasynth_generators::coa_generator::CoAFramework;
138use datasynth_generators::llm_enrichment::VendorLlmEnricher;
139use rayon::prelude::*;
140
141// ============================================================================
142// Configuration Conversion Functions
143// ============================================================================
144
145/// Convert P2P flow config from schema to generator config.
146fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
147    let payment_behavior = &schema_config.payment_behavior;
148    let late_dist = &payment_behavior.late_payment_days_distribution;
149
150    P2PGeneratorConfig {
151        three_way_match_rate: schema_config.three_way_match_rate,
152        partial_delivery_rate: schema_config.partial_delivery_rate,
153        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
154        price_variance_rate: schema_config.price_variance_rate,
155        max_price_variance_percent: schema_config.max_price_variance_percent,
156        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
157        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
158        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
159        payment_method_distribution: vec![
160            (PaymentMethod::BankTransfer, 0.60),
161            (PaymentMethod::Check, 0.25),
162            (PaymentMethod::Wire, 0.10),
163            (PaymentMethod::CreditCard, 0.05),
164        ],
165        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
166        payment_behavior: P2PPaymentBehavior {
167            late_payment_rate: payment_behavior.late_payment_rate,
168            late_payment_distribution: LatePaymentDistribution {
169                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
170                late_8_to_14: late_dist.late_8_to_14,
171                very_late_15_to_30: late_dist.very_late_15_to_30,
172                severely_late_31_to_60: late_dist.severely_late_31_to_60,
173                extremely_late_over_60: late_dist.extremely_late_over_60,
174            },
175            partial_payment_rate: payment_behavior.partial_payment_rate,
176            payment_correction_rate: payment_behavior.payment_correction_rate,
177            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
178        },
179    }
180}
181
182/// Convert O2C flow config from schema to generator config.
183fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
184    let payment_behavior = &schema_config.payment_behavior;
185
186    O2CGeneratorConfig {
187        credit_check_failure_rate: schema_config.credit_check_failure_rate,
188        partial_shipment_rate: schema_config.partial_shipment_rate,
189        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
190        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
191        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
192        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
193        bad_debt_rate: schema_config.bad_debt_rate,
194        returns_rate: schema_config.return_rate,
195        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
196        payment_method_distribution: vec![
197            (PaymentMethod::BankTransfer, 0.50),
198            (PaymentMethod::Check, 0.30),
199            (PaymentMethod::Wire, 0.15),
200            (PaymentMethod::CreditCard, 0.05),
201        ],
202        payment_behavior: O2CPaymentBehavior {
203            partial_payment_rate: payment_behavior.partial_payments.rate,
204            short_payment_rate: payment_behavior.short_payments.rate,
205            max_short_percent: payment_behavior.short_payments.max_short_percent,
206            on_account_rate: payment_behavior.on_account_payments.rate,
207            payment_correction_rate: payment_behavior.payment_corrections.rate,
208            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
209        },
210    }
211}
212
213/// Configuration for which generation phases to run.
214#[derive(Debug, Clone)]
215pub struct PhaseConfig {
216    /// Generate master data (vendors, customers, materials, assets, employees).
217    pub generate_master_data: bool,
218    /// Generate document flows (P2P, O2C).
219    pub generate_document_flows: bool,
220    /// Generate OCPM events from document flows.
221    pub generate_ocpm_events: bool,
222    /// Generate journal entries.
223    pub generate_journal_entries: bool,
224    /// Inject anomalies.
225    pub inject_anomalies: bool,
226    /// Inject data quality variations (typos, missing values, format variations).
227    pub inject_data_quality: bool,
228    /// Validate balance sheet equation after generation.
229    pub validate_balances: bool,
230    /// Show progress bars.
231    pub show_progress: bool,
232    /// Number of vendors to generate per company.
233    pub vendors_per_company: usize,
234    /// Number of customers to generate per company.
235    pub customers_per_company: usize,
236    /// Number of materials to generate per company.
237    pub materials_per_company: usize,
238    /// Number of assets to generate per company.
239    pub assets_per_company: usize,
240    /// Number of employees to generate per company.
241    pub employees_per_company: usize,
242    /// Number of P2P chains to generate.
243    pub p2p_chains: usize,
244    /// Number of O2C chains to generate.
245    pub o2c_chains: usize,
246    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
247    pub generate_audit: bool,
248    /// Number of audit engagements to generate.
249    pub audit_engagements: usize,
250    /// Number of workpapers per engagement.
251    pub workpapers_per_engagement: usize,
252    /// Number of evidence items per workpaper.
253    pub evidence_per_workpaper: usize,
254    /// Number of risk assessments per engagement.
255    pub risks_per_engagement: usize,
256    /// Number of findings per engagement.
257    pub findings_per_engagement: usize,
258    /// Number of professional judgments per engagement.
259    pub judgments_per_engagement: usize,
260    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
261    pub generate_banking: bool,
262    /// Generate graph exports (accounting network for ML training).
263    pub generate_graph_export: bool,
264    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
265    pub generate_sourcing: bool,
266    /// Generate bank reconciliations from payments.
267    pub generate_bank_reconciliation: bool,
268    /// Generate financial statements from trial balances.
269    pub generate_financial_statements: bool,
270    /// Generate accounting standards data (revenue recognition, impairment).
271    pub generate_accounting_standards: bool,
272    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
273    pub generate_manufacturing: bool,
274    /// Generate sales quotes, management KPIs, and budgets.
275    pub generate_sales_kpi_budgets: bool,
276    /// Generate tax jurisdictions and tax codes.
277    pub generate_tax: bool,
278    /// Generate ESG data (emissions, energy, water, waste, social, governance).
279    pub generate_esg: bool,
280    /// Generate intercompany transactions and eliminations.
281    pub generate_intercompany: bool,
282    /// Generate process evolution and organizational events.
283    pub generate_evolution_events: bool,
284    /// Generate counterfactual (original, mutated) JE pairs for ML training.
285    pub generate_counterfactuals: bool,
286    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
287    pub generate_compliance_regulations: bool,
288}
289
290impl Default for PhaseConfig {
291    fn default() -> Self {
292        Self {
293            generate_master_data: true,
294            generate_document_flows: true,
295            generate_ocpm_events: false, // Off by default
296            generate_journal_entries: true,
297            inject_anomalies: false,
298            inject_data_quality: false, // Off by default (to preserve clean test data)
299            validate_balances: true,
300            show_progress: true,
301            vendors_per_company: 50,
302            customers_per_company: 100,
303            materials_per_company: 200,
304            assets_per_company: 50,
305            employees_per_company: 100,
306            p2p_chains: 100,
307            o2c_chains: 100,
308            generate_audit: false, // Off by default
309            audit_engagements: 5,
310            workpapers_per_engagement: 20,
311            evidence_per_workpaper: 5,
312            risks_per_engagement: 15,
313            findings_per_engagement: 8,
314            judgments_per_engagement: 10,
315            generate_banking: false,                // Off by default
316            generate_graph_export: false,           // Off by default
317            generate_sourcing: false,               // Off by default
318            generate_bank_reconciliation: false,    // Off by default
319            generate_financial_statements: false,   // Off by default
320            generate_accounting_standards: false,   // Off by default
321            generate_manufacturing: false,          // Off by default
322            generate_sales_kpi_budgets: false,      // Off by default
323            generate_tax: false,                    // Off by default
324            generate_esg: false,                    // Off by default
325            generate_intercompany: false,           // Off by default
326            generate_evolution_events: true,        // On by default
327            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
328            generate_compliance_regulations: false, // Off by default
329        }
330    }
331}
332
333/// Master data snapshot containing all generated entities.
334#[derive(Debug, Clone, Default)]
335pub struct MasterDataSnapshot {
336    /// Generated vendors.
337    pub vendors: Vec<Vendor>,
338    /// Generated customers.
339    pub customers: Vec<Customer>,
340    /// Generated materials.
341    pub materials: Vec<Material>,
342    /// Generated fixed assets.
343    pub assets: Vec<FixedAsset>,
344    /// Generated employees.
345    pub employees: Vec<Employee>,
346}
347
348/// Info about a completed hypergraph export.
349#[derive(Debug, Clone)]
350pub struct HypergraphExportInfo {
351    /// Number of nodes exported.
352    pub node_count: usize,
353    /// Number of pairwise edges exported.
354    pub edge_count: usize,
355    /// Number of hyperedges exported.
356    pub hyperedge_count: usize,
357    /// Output directory path.
358    pub output_path: PathBuf,
359}
360
361/// Document flow snapshot containing all generated document chains.
362#[derive(Debug, Clone, Default)]
363pub struct DocumentFlowSnapshot {
364    /// P2P document chains.
365    pub p2p_chains: Vec<P2PDocumentChain>,
366    /// O2C document chains.
367    pub o2c_chains: Vec<O2CDocumentChain>,
368    /// All purchase orders (flattened).
369    pub purchase_orders: Vec<documents::PurchaseOrder>,
370    /// All goods receipts (flattened).
371    pub goods_receipts: Vec<documents::GoodsReceipt>,
372    /// All vendor invoices (flattened).
373    pub vendor_invoices: Vec<documents::VendorInvoice>,
374    /// All sales orders (flattened).
375    pub sales_orders: Vec<documents::SalesOrder>,
376    /// All deliveries (flattened).
377    pub deliveries: Vec<documents::Delivery>,
378    /// All customer invoices (flattened).
379    pub customer_invoices: Vec<documents::CustomerInvoice>,
380    /// All payments (flattened).
381    pub payments: Vec<documents::Payment>,
382}
383
384/// Subledger snapshot containing generated subledger records.
385#[derive(Debug, Clone, Default)]
386pub struct SubledgerSnapshot {
387    /// AP invoices linked from document flow vendor invoices.
388    pub ap_invoices: Vec<APInvoice>,
389    /// AR invoices linked from document flow customer invoices.
390    pub ar_invoices: Vec<ARInvoice>,
391    /// FA subledger records (asset acquisitions from FA generator).
392    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
393    /// Inventory positions from inventory generator.
394    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
395    /// Inventory movements from inventory generator.
396    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
397}
398
399/// OCPM snapshot containing generated OCPM event log data.
400#[derive(Debug, Clone, Default)]
401pub struct OcpmSnapshot {
402    /// OCPM event log (if generated)
403    pub event_log: Option<OcpmEventLog>,
404    /// Number of events generated
405    pub event_count: usize,
406    /// Number of objects generated
407    pub object_count: usize,
408    /// Number of cases generated
409    pub case_count: usize,
410}
411
412/// Audit data snapshot containing all generated audit-related entities.
413#[derive(Debug, Clone, Default)]
414pub struct AuditSnapshot {
415    /// Audit engagements per ISA 210/220.
416    pub engagements: Vec<AuditEngagement>,
417    /// Workpapers per ISA 230.
418    pub workpapers: Vec<Workpaper>,
419    /// Audit evidence per ISA 500.
420    pub evidence: Vec<AuditEvidence>,
421    /// Risk assessments per ISA 315/330.
422    pub risk_assessments: Vec<RiskAssessment>,
423    /// Audit findings per ISA 265.
424    pub findings: Vec<AuditFinding>,
425    /// Professional judgments per ISA 200.
426    pub judgments: Vec<ProfessionalJudgment>,
427}
428
429/// Banking KYC/AML data snapshot containing all generated banking entities.
430#[derive(Debug, Clone, Default)]
431pub struct BankingSnapshot {
432    /// Banking customers (retail, business, trust).
433    pub customers: Vec<BankingCustomer>,
434    /// Bank accounts.
435    pub accounts: Vec<BankAccount>,
436    /// Bank transactions with AML labels.
437    pub transactions: Vec<BankTransaction>,
438    /// Transaction-level AML labels with features.
439    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
440    /// Customer-level AML labels.
441    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
442    /// Account-level AML labels.
443    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
444    /// Relationship-level AML labels.
445    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
446    /// Case narratives for AML scenarios.
447    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
448    /// Number of suspicious transactions.
449    pub suspicious_count: usize,
450    /// Number of AML scenarios generated.
451    pub scenario_count: usize,
452}
453
454/// Graph export snapshot containing exported graph metadata.
455#[derive(Debug, Clone, Default, Serialize)]
456pub struct GraphExportSnapshot {
457    /// Whether graph export was performed.
458    pub exported: bool,
459    /// Number of graphs exported.
460    pub graph_count: usize,
461    /// Exported graph metadata (by format name).
462    pub exports: HashMap<String, GraphExportInfo>,
463}
464
465/// Information about an exported graph.
466#[derive(Debug, Clone, Serialize)]
467pub struct GraphExportInfo {
468    /// Graph name.
469    pub name: String,
470    /// Export format (pytorch_geometric, neo4j, dgl).
471    pub format: String,
472    /// Output directory path.
473    pub output_path: PathBuf,
474    /// Number of nodes.
475    pub node_count: usize,
476    /// Number of edges.
477    pub edge_count: usize,
478}
479
480/// S2C sourcing data snapshot.
481#[derive(Debug, Clone, Default)]
482pub struct SourcingSnapshot {
483    /// Spend analyses.
484    pub spend_analyses: Vec<SpendAnalysis>,
485    /// Sourcing projects.
486    pub sourcing_projects: Vec<SourcingProject>,
487    /// Supplier qualifications.
488    pub qualifications: Vec<SupplierQualification>,
489    /// RFx events (RFI, RFP, RFQ).
490    pub rfx_events: Vec<RfxEvent>,
491    /// Supplier bids.
492    pub bids: Vec<SupplierBid>,
493    /// Bid evaluations.
494    pub bid_evaluations: Vec<BidEvaluation>,
495    /// Procurement contracts.
496    pub contracts: Vec<ProcurementContract>,
497    /// Catalog items.
498    pub catalog_items: Vec<CatalogItem>,
499    /// Supplier scorecards.
500    pub scorecards: Vec<SupplierScorecard>,
501}
502
503/// A single period's trial balance with metadata.
504#[derive(Debug, Clone, Serialize, Deserialize)]
505pub struct PeriodTrialBalance {
506    /// Fiscal year.
507    pub fiscal_year: u16,
508    /// Fiscal period (1-12).
509    pub fiscal_period: u8,
510    /// Period start date.
511    pub period_start: NaiveDate,
512    /// Period end date.
513    pub period_end: NaiveDate,
514    /// Trial balance entries for this period.
515    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
516}
517
518/// Financial reporting snapshot (financial statements + bank reconciliations).
519#[derive(Debug, Clone, Default)]
520pub struct FinancialReportingSnapshot {
521    /// Financial statements (balance sheet, income statement, cash flow).
522    pub financial_statements: Vec<FinancialStatement>,
523    /// Bank reconciliations.
524    pub bank_reconciliations: Vec<BankReconciliation>,
525    /// Period-close trial balances (one per period).
526    pub trial_balances: Vec<PeriodTrialBalance>,
527}
528
529/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments).
530#[derive(Debug, Clone, Default)]
531pub struct HrSnapshot {
532    /// Payroll runs (actual data).
533    pub payroll_runs: Vec<PayrollRun>,
534    /// Payroll line items (actual data).
535    pub payroll_line_items: Vec<PayrollLineItem>,
536    /// Time entries (actual data).
537    pub time_entries: Vec<TimeEntry>,
538    /// Expense reports (actual data).
539    pub expense_reports: Vec<ExpenseReport>,
540    /// Benefit enrollments (actual data).
541    pub benefit_enrollments: Vec<BenefitEnrollment>,
542    /// Payroll runs.
543    pub payroll_run_count: usize,
544    /// Payroll line item count.
545    pub payroll_line_item_count: usize,
546    /// Time entry count.
547    pub time_entry_count: usize,
548    /// Expense report count.
549    pub expense_report_count: usize,
550    /// Benefit enrollment count.
551    pub benefit_enrollment_count: usize,
552}
553
554/// Accounting standards data snapshot (revenue recognition, impairment).
555#[derive(Debug, Clone, Default)]
556pub struct AccountingStandardsSnapshot {
557    /// Revenue recognition contracts (actual data).
558    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
559    /// Impairment tests (actual data).
560    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
561    /// Revenue recognition contract count.
562    pub revenue_contract_count: usize,
563    /// Impairment test count.
564    pub impairment_test_count: usize,
565}
566
567/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
568#[derive(Debug, Clone, Default)]
569pub struct ComplianceRegulationsSnapshot {
570    /// Flattened standard records for output.
571    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
572    /// Cross-reference records.
573    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
574    /// Jurisdiction profile records.
575    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
576    /// Generated audit procedures.
577    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
578    /// Generated compliance findings.
579    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
580    /// Generated regulatory filings.
581    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
582    /// Compliance graph (if graph integration enabled).
583    pub compliance_graph: Option<datasynth_graph::Graph>,
584}
585
586/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
587#[derive(Debug, Clone, Default)]
588pub struct ManufacturingSnapshot {
589    /// Production orders (actual data).
590    pub production_orders: Vec<ProductionOrder>,
591    /// Quality inspections (actual data).
592    pub quality_inspections: Vec<QualityInspection>,
593    /// Cycle counts (actual data).
594    pub cycle_counts: Vec<CycleCount>,
595    /// BOM components (actual data).
596    pub bom_components: Vec<BomComponent>,
597    /// Inventory movements (actual data).
598    pub inventory_movements: Vec<InventoryMovement>,
599    /// Production order count.
600    pub production_order_count: usize,
601    /// Quality inspection count.
602    pub quality_inspection_count: usize,
603    /// Cycle count count.
604    pub cycle_count_count: usize,
605    /// BOM component count.
606    pub bom_component_count: usize,
607    /// Inventory movement count.
608    pub inventory_movement_count: usize,
609}
610
611/// Sales, KPI, and budget data snapshot.
612#[derive(Debug, Clone, Default)]
613pub struct SalesKpiBudgetsSnapshot {
614    /// Sales quotes (actual data).
615    pub sales_quotes: Vec<SalesQuote>,
616    /// Management KPIs (actual data).
617    pub kpis: Vec<ManagementKpi>,
618    /// Budgets (actual data).
619    pub budgets: Vec<Budget>,
620    /// Sales quote count.
621    pub sales_quote_count: usize,
622    /// Management KPI count.
623    pub kpi_count: usize,
624    /// Budget line count.
625    pub budget_line_count: usize,
626}
627
628/// Anomaly labels generated during injection.
629#[derive(Debug, Clone, Default)]
630pub struct AnomalyLabels {
631    /// All anomaly labels.
632    pub labels: Vec<LabeledAnomaly>,
633    /// Summary statistics.
634    pub summary: Option<AnomalySummary>,
635    /// Count by anomaly type.
636    pub by_type: HashMap<String, usize>,
637}
638
639/// Balance validation results from running balance tracker.
640#[derive(Debug, Clone, Default)]
641pub struct BalanceValidationResult {
642    /// Whether validation was performed.
643    pub validated: bool,
644    /// Whether balance sheet equation is satisfied.
645    pub is_balanced: bool,
646    /// Number of entries processed.
647    pub entries_processed: u64,
648    /// Total debits across all entries.
649    pub total_debits: rust_decimal::Decimal,
650    /// Total credits across all entries.
651    pub total_credits: rust_decimal::Decimal,
652    /// Number of accounts tracked.
653    pub accounts_tracked: usize,
654    /// Number of companies tracked.
655    pub companies_tracked: usize,
656    /// Validation errors encountered.
657    pub validation_errors: Vec<ValidationError>,
658    /// Whether any unbalanced entries were found.
659    pub has_unbalanced_entries: bool,
660}
661
662/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
663#[derive(Debug, Clone, Default)]
664pub struct TaxSnapshot {
665    /// Tax jurisdictions.
666    pub jurisdictions: Vec<TaxJurisdiction>,
667    /// Tax codes.
668    pub codes: Vec<TaxCode>,
669    /// Tax lines computed on documents.
670    pub tax_lines: Vec<TaxLine>,
671    /// Tax returns filed per period.
672    pub tax_returns: Vec<TaxReturn>,
673    /// Tax provisions.
674    pub tax_provisions: Vec<TaxProvision>,
675    /// Withholding tax records.
676    pub withholding_records: Vec<WithholdingTaxRecord>,
677    /// Tax anomaly labels.
678    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
679    /// Jurisdiction count.
680    pub jurisdiction_count: usize,
681    /// Code count.
682    pub code_count: usize,
683}
684
685/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
686#[derive(Debug, Clone, Default, Serialize, Deserialize)]
687pub struct IntercompanySnapshot {
688    /// IC matched pairs (transaction pairs between related entities).
689    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
690    /// IC journal entries generated from matched pairs (seller side).
691    pub seller_journal_entries: Vec<JournalEntry>,
692    /// IC journal entries generated from matched pairs (buyer side).
693    pub buyer_journal_entries: Vec<JournalEntry>,
694    /// Elimination entries for consolidation.
695    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
696    /// IC matched pair count.
697    pub matched_pair_count: usize,
698    /// IC elimination entry count.
699    pub elimination_entry_count: usize,
700    /// IC matching rate (0.0 to 1.0).
701    pub match_rate: f64,
702}
703
704/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
705#[derive(Debug, Clone, Default)]
706pub struct EsgSnapshot {
707    /// Emission records (scope 1, 2, 3).
708    pub emissions: Vec<EmissionRecord>,
709    /// Energy consumption records.
710    pub energy: Vec<EnergyConsumption>,
711    /// Water usage records.
712    pub water: Vec<WaterUsage>,
713    /// Waste records.
714    pub waste: Vec<WasteRecord>,
715    /// Workforce diversity metrics.
716    pub diversity: Vec<WorkforceDiversityMetric>,
717    /// Pay equity metrics.
718    pub pay_equity: Vec<PayEquityMetric>,
719    /// Safety incidents.
720    pub safety_incidents: Vec<SafetyIncident>,
721    /// Safety metrics.
722    pub safety_metrics: Vec<SafetyMetric>,
723    /// Governance metrics.
724    pub governance: Vec<GovernanceMetric>,
725    /// Supplier ESG assessments.
726    pub supplier_assessments: Vec<SupplierEsgAssessment>,
727    /// Materiality assessments.
728    pub materiality: Vec<MaterialityAssessment>,
729    /// ESG disclosures.
730    pub disclosures: Vec<EsgDisclosure>,
731    /// Climate scenarios.
732    pub climate_scenarios: Vec<ClimateScenario>,
733    /// ESG anomaly labels.
734    pub anomaly_labels: Vec<EsgAnomalyLabel>,
735    /// Total emission record count.
736    pub emission_count: usize,
737    /// Total disclosure count.
738    pub disclosure_count: usize,
739}
740
741/// Treasury data snapshot (cash management, hedging, debt, pooling).
742#[derive(Debug, Clone, Default)]
743pub struct TreasurySnapshot {
744    /// Cash positions (daily balances per account).
745    pub cash_positions: Vec<CashPosition>,
746    /// Cash forecasts.
747    pub cash_forecasts: Vec<CashForecast>,
748    /// Cash pools.
749    pub cash_pools: Vec<CashPool>,
750    /// Cash pool sweep transactions.
751    pub cash_pool_sweeps: Vec<CashPoolSweep>,
752    /// Hedging instruments.
753    pub hedging_instruments: Vec<HedgingInstrument>,
754    /// Hedge relationships (ASC 815/IFRS 9 designations).
755    pub hedge_relationships: Vec<HedgeRelationship>,
756    /// Debt instruments.
757    pub debt_instruments: Vec<DebtInstrument>,
758    /// Bank guarantees and letters of credit.
759    pub bank_guarantees: Vec<BankGuarantee>,
760    /// Intercompany netting runs.
761    pub netting_runs: Vec<NettingRun>,
762    /// Treasury anomaly labels.
763    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
764}
765
766/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
767#[derive(Debug, Clone, Default)]
768pub struct ProjectAccountingSnapshot {
769    /// Projects with WBS hierarchies.
770    pub projects: Vec<Project>,
771    /// Project cost lines (linked from source documents).
772    pub cost_lines: Vec<ProjectCostLine>,
773    /// Revenue recognition records.
774    pub revenue_records: Vec<ProjectRevenue>,
775    /// Earned value metrics.
776    pub earned_value_metrics: Vec<EarnedValueMetric>,
777    /// Change orders.
778    pub change_orders: Vec<ChangeOrder>,
779    /// Project milestones.
780    pub milestones: Vec<ProjectMilestone>,
781}
782
783/// Complete result of enhanced generation run.
784#[derive(Debug)]
785pub struct EnhancedGenerationResult {
786    /// Generated chart of accounts.
787    pub chart_of_accounts: ChartOfAccounts,
788    /// Master data snapshot.
789    pub master_data: MasterDataSnapshot,
790    /// Document flow snapshot.
791    pub document_flows: DocumentFlowSnapshot,
792    /// Subledger snapshot (linked from document flows).
793    pub subledger: SubledgerSnapshot,
794    /// OCPM event log snapshot (if OCPM generation enabled).
795    pub ocpm: OcpmSnapshot,
796    /// Audit data snapshot (if audit generation enabled).
797    pub audit: AuditSnapshot,
798    /// Banking KYC/AML data snapshot (if banking generation enabled).
799    pub banking: BankingSnapshot,
800    /// Graph export snapshot (if graph export enabled).
801    pub graph_export: GraphExportSnapshot,
802    /// S2C sourcing data snapshot (if sourcing generation enabled).
803    pub sourcing: SourcingSnapshot,
804    /// Financial reporting snapshot (financial statements + bank reconciliations).
805    pub financial_reporting: FinancialReportingSnapshot,
806    /// HR data snapshot (payroll, time entries, expenses).
807    pub hr: HrSnapshot,
808    /// Accounting standards snapshot (revenue recognition, impairment).
809    pub accounting_standards: AccountingStandardsSnapshot,
810    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
811    pub manufacturing: ManufacturingSnapshot,
812    /// Sales, KPI, and budget snapshot.
813    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
814    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
815    pub tax: TaxSnapshot,
816    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
817    pub esg: EsgSnapshot,
818    /// Treasury data snapshot (cash management, hedging, debt).
819    pub treasury: TreasurySnapshot,
820    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
821    pub project_accounting: ProjectAccountingSnapshot,
822    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
823    pub process_evolution: Vec<ProcessEvolutionEvent>,
824    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
825    pub organizational_events: Vec<OrganizationalEvent>,
826    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
827    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
828    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
829    pub intercompany: IntercompanySnapshot,
830    /// Generated journal entries.
831    pub journal_entries: Vec<JournalEntry>,
832    /// Anomaly labels (if injection enabled).
833    pub anomaly_labels: AnomalyLabels,
834    /// Balance validation results (if validation enabled).
835    pub balance_validation: BalanceValidationResult,
836    /// Data quality statistics (if injection enabled).
837    pub data_quality_stats: DataQualityStats,
838    /// Generation statistics.
839    pub statistics: EnhancedGenerationStatistics,
840    /// Data lineage graph (if tracking enabled).
841    pub lineage: Option<super::lineage::LineageGraph>,
842    /// Quality gate evaluation result.
843    pub gate_result: Option<datasynth_eval::gates::GateResult>,
844    /// Internal controls (if controls generation enabled).
845    pub internal_controls: Vec<InternalControl>,
846    /// Opening balances (if opening balance generation enabled).
847    pub opening_balances: Vec<GeneratedOpeningBalance>,
848    /// GL-to-subledger reconciliation results (if reconciliation enabled).
849    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
850    /// Counterfactual (original, mutated) JE pairs for ML training.
851    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
852    /// Fraud red-flag indicators on P2P/O2C documents.
853    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
854    /// Collusion rings (coordinated fraud networks).
855    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
856    /// Bi-temporal version chains for vendor entities.
857    pub temporal_vendor_chains:
858        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
859    /// Entity relationship graph (nodes + edges with strength scores).
860    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
861    /// Cross-process links (P2P ↔ O2C via inventory movements).
862    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
863    /// Industry-specific GL accounts and metadata.
864    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
865    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
866    pub compliance_regulations: ComplianceRegulationsSnapshot,
867}
868
869/// Enhanced statistics about a generation run.
870#[derive(Debug, Clone, Default, Serialize, Deserialize)]
871pub struct EnhancedGenerationStatistics {
872    /// Total journal entries generated.
873    pub total_entries: u64,
874    /// Total line items generated.
875    pub total_line_items: u64,
876    /// Number of accounts in CoA.
877    pub accounts_count: usize,
878    /// Number of companies.
879    pub companies_count: usize,
880    /// Period in months.
881    pub period_months: u32,
882    /// Master data counts.
883    pub vendor_count: usize,
884    pub customer_count: usize,
885    pub material_count: usize,
886    pub asset_count: usize,
887    pub employee_count: usize,
888    /// Document flow counts.
889    pub p2p_chain_count: usize,
890    pub o2c_chain_count: usize,
891    /// Subledger counts.
892    pub ap_invoice_count: usize,
893    pub ar_invoice_count: usize,
894    /// OCPM counts.
895    pub ocpm_event_count: usize,
896    pub ocpm_object_count: usize,
897    pub ocpm_case_count: usize,
898    /// Audit counts.
899    pub audit_engagement_count: usize,
900    pub audit_workpaper_count: usize,
901    pub audit_evidence_count: usize,
902    pub audit_risk_count: usize,
903    pub audit_finding_count: usize,
904    pub audit_judgment_count: usize,
905    /// Anomaly counts.
906    pub anomalies_injected: usize,
907    /// Data quality issue counts.
908    pub data_quality_issues: usize,
909    /// Banking counts.
910    pub banking_customer_count: usize,
911    pub banking_account_count: usize,
912    pub banking_transaction_count: usize,
913    pub banking_suspicious_count: usize,
914    /// Graph export counts.
915    pub graph_export_count: usize,
916    pub graph_node_count: usize,
917    pub graph_edge_count: usize,
918    /// LLM enrichment timing (milliseconds).
919    #[serde(default)]
920    pub llm_enrichment_ms: u64,
921    /// Number of vendor names enriched by LLM.
922    #[serde(default)]
923    pub llm_vendors_enriched: usize,
924    /// Diffusion enhancement timing (milliseconds).
925    #[serde(default)]
926    pub diffusion_enhancement_ms: u64,
927    /// Number of diffusion samples generated.
928    #[serde(default)]
929    pub diffusion_samples_generated: usize,
930    /// Causal generation timing (milliseconds).
931    #[serde(default)]
932    pub causal_generation_ms: u64,
933    /// Number of causal samples generated.
934    #[serde(default)]
935    pub causal_samples_generated: usize,
936    /// Whether causal validation passed.
937    #[serde(default)]
938    pub causal_validation_passed: Option<bool>,
939    /// S2C sourcing counts.
940    #[serde(default)]
941    pub sourcing_project_count: usize,
942    #[serde(default)]
943    pub rfx_event_count: usize,
944    #[serde(default)]
945    pub bid_count: usize,
946    #[serde(default)]
947    pub contract_count: usize,
948    #[serde(default)]
949    pub catalog_item_count: usize,
950    #[serde(default)]
951    pub scorecard_count: usize,
952    /// Financial reporting counts.
953    #[serde(default)]
954    pub financial_statement_count: usize,
955    #[serde(default)]
956    pub bank_reconciliation_count: usize,
957    /// HR counts.
958    #[serde(default)]
959    pub payroll_run_count: usize,
960    #[serde(default)]
961    pub time_entry_count: usize,
962    #[serde(default)]
963    pub expense_report_count: usize,
964    #[serde(default)]
965    pub benefit_enrollment_count: usize,
966    /// Accounting standards counts.
967    #[serde(default)]
968    pub revenue_contract_count: usize,
969    #[serde(default)]
970    pub impairment_test_count: usize,
971    /// Manufacturing counts.
972    #[serde(default)]
973    pub production_order_count: usize,
974    #[serde(default)]
975    pub quality_inspection_count: usize,
976    #[serde(default)]
977    pub cycle_count_count: usize,
978    #[serde(default)]
979    pub bom_component_count: usize,
980    #[serde(default)]
981    pub inventory_movement_count: usize,
982    /// Sales & reporting counts.
983    #[serde(default)]
984    pub sales_quote_count: usize,
985    #[serde(default)]
986    pub kpi_count: usize,
987    #[serde(default)]
988    pub budget_line_count: usize,
989    /// Tax counts.
990    #[serde(default)]
991    pub tax_jurisdiction_count: usize,
992    #[serde(default)]
993    pub tax_code_count: usize,
994    /// ESG counts.
995    #[serde(default)]
996    pub esg_emission_count: usize,
997    #[serde(default)]
998    pub esg_disclosure_count: usize,
999    /// Intercompany counts.
1000    #[serde(default)]
1001    pub ic_matched_pair_count: usize,
1002    #[serde(default)]
1003    pub ic_elimination_count: usize,
1004    /// Number of intercompany journal entries (seller + buyer side).
1005    #[serde(default)]
1006    pub ic_transaction_count: usize,
1007    /// Number of fixed asset subledger records.
1008    #[serde(default)]
1009    pub fa_subledger_count: usize,
1010    /// Number of inventory subledger records.
1011    #[serde(default)]
1012    pub inventory_subledger_count: usize,
1013    /// Treasury debt instrument count.
1014    #[serde(default)]
1015    pub treasury_debt_instrument_count: usize,
1016    /// Treasury hedging instrument count.
1017    #[serde(default)]
1018    pub treasury_hedging_instrument_count: usize,
1019    /// Project accounting project count.
1020    #[serde(default)]
1021    pub project_count: usize,
1022    /// Project accounting change order count.
1023    #[serde(default)]
1024    pub project_change_order_count: usize,
1025    /// Tax provision count.
1026    #[serde(default)]
1027    pub tax_provision_count: usize,
1028    /// Opening balance count.
1029    #[serde(default)]
1030    pub opening_balance_count: usize,
1031    /// Subledger reconciliation count.
1032    #[serde(default)]
1033    pub subledger_reconciliation_count: usize,
1034    /// Tax line count.
1035    #[serde(default)]
1036    pub tax_line_count: usize,
1037    /// Project cost line count.
1038    #[serde(default)]
1039    pub project_cost_line_count: usize,
1040    /// Cash position count.
1041    #[serde(default)]
1042    pub cash_position_count: usize,
1043    /// Cash forecast count.
1044    #[serde(default)]
1045    pub cash_forecast_count: usize,
1046    /// Cash pool count.
1047    #[serde(default)]
1048    pub cash_pool_count: usize,
1049    /// Process evolution event count.
1050    #[serde(default)]
1051    pub process_evolution_event_count: usize,
1052    /// Organizational event count.
1053    #[serde(default)]
1054    pub organizational_event_count: usize,
1055    /// Counterfactual pair count.
1056    #[serde(default)]
1057    pub counterfactual_pair_count: usize,
1058    /// Number of fraud red-flag indicators generated.
1059    #[serde(default)]
1060    pub red_flag_count: usize,
1061    /// Number of collusion rings generated.
1062    #[serde(default)]
1063    pub collusion_ring_count: usize,
1064    /// Number of bi-temporal vendor version chains generated.
1065    #[serde(default)]
1066    pub temporal_version_chain_count: usize,
1067    /// Number of nodes in the entity relationship graph.
1068    #[serde(default)]
1069    pub entity_relationship_node_count: usize,
1070    /// Number of edges in the entity relationship graph.
1071    #[serde(default)]
1072    pub entity_relationship_edge_count: usize,
1073    /// Number of cross-process links generated.
1074    #[serde(default)]
1075    pub cross_process_link_count: usize,
1076    /// Number of disruption events generated.
1077    #[serde(default)]
1078    pub disruption_event_count: usize,
1079    /// Number of industry-specific GL accounts generated.
1080    #[serde(default)]
1081    pub industry_gl_account_count: usize,
1082}
1083
1084/// Enhanced orchestrator with full feature integration.
1085pub struct EnhancedOrchestrator {
1086    config: GeneratorConfig,
1087    phase_config: PhaseConfig,
1088    coa: Option<Arc<ChartOfAccounts>>,
1089    master_data: MasterDataSnapshot,
1090    seed: u64,
1091    multi_progress: Option<MultiProgress>,
1092    /// Resource guard for memory, disk, and CPU monitoring
1093    resource_guard: ResourceGuard,
1094    /// Output path for disk space monitoring
1095    output_path: Option<PathBuf>,
1096    /// Copula generators for preserving correlations (from fingerprint)
1097    copula_generators: Vec<CopulaGeneratorSpec>,
1098    /// Country pack registry for localized data generation
1099    country_pack_registry: datasynth_core::CountryPackRegistry,
1100    /// Optional streaming sink for phase-by-phase output
1101    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1102}
1103
1104impl EnhancedOrchestrator {
1105    /// Create a new enhanced orchestrator.
1106    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1107        datasynth_config::validate_config(&config)?;
1108
1109        let seed = config.global.seed.unwrap_or_else(rand::random);
1110
1111        // Build resource guard from config
1112        let resource_guard = Self::build_resource_guard(&config, None);
1113
1114        // Build country pack registry from config
1115        let country_pack_registry = match &config.country_packs {
1116            Some(cp) => {
1117                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1118                    .map_err(|e| SynthError::config(e.to_string()))?
1119            }
1120            None => datasynth_core::CountryPackRegistry::builtin_only()
1121                .map_err(|e| SynthError::config(e.to_string()))?,
1122        };
1123
1124        Ok(Self {
1125            config,
1126            phase_config,
1127            coa: None,
1128            master_data: MasterDataSnapshot::default(),
1129            seed,
1130            multi_progress: None,
1131            resource_guard,
1132            output_path: None,
1133            copula_generators: Vec::new(),
1134            country_pack_registry,
1135            phase_sink: None,
1136        })
1137    }
1138
1139    /// Create with default phase config.
1140    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1141        Self::new(config, PhaseConfig::default())
1142    }
1143
1144    /// Set a streaming phase sink for real-time output.
1145    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1146        self.phase_sink = Some(sink);
1147        self
1148    }
1149
1150    /// Emit a batch of items to the phase sink (if configured).
1151    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1152        if let Some(ref sink) = self.phase_sink {
1153            for item in items {
1154                if let Ok(value) = serde_json::to_value(item) {
1155                    if let Err(e) = sink.emit(phase, type_name, &value) {
1156                        warn!(
1157                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1158                        );
1159                    }
1160                }
1161            }
1162            if let Err(e) = sink.phase_complete(phase) {
1163                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1164            }
1165        }
1166    }
1167
1168    /// Enable/disable progress bars.
1169    pub fn with_progress(mut self, show: bool) -> Self {
1170        self.phase_config.show_progress = show;
1171        if show {
1172            self.multi_progress = Some(MultiProgress::new());
1173        }
1174        self
1175    }
1176
1177    /// Set the output path for disk space monitoring.
1178    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1179        let path = path.into();
1180        self.output_path = Some(path.clone());
1181        // Rebuild resource guard with the output path
1182        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1183        self
1184    }
1185
1186    /// Access the country pack registry.
1187    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1188        &self.country_pack_registry
1189    }
1190
1191    /// Look up a country pack by country code string.
1192    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1193        self.country_pack_registry.get_by_str(country)
1194    }
1195
1196    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1197    /// company, defaulting to `"US"` if no companies are configured.
1198    fn primary_country_code(&self) -> &str {
1199        self.config
1200            .companies
1201            .first()
1202            .map(|c| c.country.as_str())
1203            .unwrap_or("US")
1204    }
1205
1206    /// Resolve the country pack for the primary (first) company.
1207    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1208        self.country_pack_for(self.primary_country_code())
1209    }
1210
1211    /// Resolve the CoA framework from config/country-pack.
1212    fn resolve_coa_framework(&self) -> CoAFramework {
1213        if self.config.accounting_standards.enabled {
1214            match self.config.accounting_standards.framework {
1215                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1216                    return CoAFramework::FrenchPcg;
1217                }
1218                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1219                    return CoAFramework::GermanSkr04;
1220                }
1221                _ => {}
1222            }
1223        }
1224        // Fallback: derive from country pack
1225        let pack = self.primary_pack();
1226        match pack.accounting.framework.as_str() {
1227            "french_gaap" => CoAFramework::FrenchPcg,
1228            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1229            _ => CoAFramework::UsGaap,
1230        }
1231    }
1232
1233    /// Check if copula generators are available.
1234    ///
1235    /// Returns true if the orchestrator has copula generators for preserving
1236    /// correlations (typically from fingerprint-based generation).
1237    pub fn has_copulas(&self) -> bool {
1238        !self.copula_generators.is_empty()
1239    }
1240
1241    /// Get the copula generators.
1242    ///
1243    /// Returns a reference to the copula generators for use during generation.
1244    /// These can be used to generate correlated samples that preserve the
1245    /// statistical relationships from the source data.
1246    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1247        &self.copula_generators
1248    }
1249
1250    /// Get a mutable reference to the copula generators.
1251    ///
1252    /// Allows generators to sample from copulas during data generation.
1253    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1254        &mut self.copula_generators
1255    }
1256
1257    /// Sample correlated values from a named copula.
1258    ///
1259    /// Returns None if the copula doesn't exist.
1260    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1261        self.copula_generators
1262            .iter_mut()
1263            .find(|c| c.name == copula_name)
1264            .map(|c| c.generator.sample())
1265    }
1266
1267    /// Create an orchestrator from a fingerprint file.
1268    ///
1269    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1270    /// and creates an orchestrator configured to generate data matching
1271    /// the statistical properties of the original data.
1272    ///
1273    /// # Arguments
1274    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1275    /// * `phase_config` - Phase configuration for generation
1276    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1277    ///
1278    /// # Example
1279    /// ```no_run
1280    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1281    /// use std::path::Path;
1282    ///
1283    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1284    ///     Path::new("fingerprint.dsf"),
1285    ///     PhaseConfig::default(),
1286    ///     1.0,
1287    /// ).unwrap();
1288    /// ```
1289    pub fn from_fingerprint(
1290        fingerprint_path: &std::path::Path,
1291        phase_config: PhaseConfig,
1292        scale: f64,
1293    ) -> SynthResult<Self> {
1294        info!("Loading fingerprint from: {}", fingerprint_path.display());
1295
1296        // Read the fingerprint
1297        let reader = FingerprintReader::new();
1298        let fingerprint = reader
1299            .read_from_file(fingerprint_path)
1300            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1301
1302        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1303    }
1304
1305    /// Create an orchestrator from a loaded fingerprint.
1306    ///
1307    /// # Arguments
1308    /// * `fingerprint` - The loaded fingerprint
1309    /// * `phase_config` - Phase configuration for generation
1310    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1311    pub fn from_fingerprint_data(
1312        fingerprint: Fingerprint,
1313        phase_config: PhaseConfig,
1314        scale: f64,
1315    ) -> SynthResult<Self> {
1316        info!(
1317            "Synthesizing config from fingerprint (version: {}, tables: {})",
1318            fingerprint.manifest.version,
1319            fingerprint.schema.tables.len()
1320        );
1321
1322        // Generate a seed for the synthesis
1323        let seed: u64 = rand::random();
1324
1325        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1326        let options = SynthesisOptions {
1327            scale,
1328            seed: Some(seed),
1329            preserve_correlations: true,
1330            inject_anomalies: true,
1331        };
1332        let synthesizer = ConfigSynthesizer::with_options(options);
1333
1334        // Synthesize full result including copula generators
1335        let synthesis_result = synthesizer
1336            .synthesize_full(&fingerprint, seed)
1337            .map_err(|e| {
1338                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1339            })?;
1340
1341        // Start with a base config from the fingerprint's industry if available
1342        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1343            Self::base_config_for_industry(industry)
1344        } else {
1345            Self::base_config_for_industry("manufacturing")
1346        };
1347
1348        // Apply the synthesized patches
1349        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1350
1351        // Log synthesis results
1352        info!(
1353            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1354            fingerprint.schema.tables.len(),
1355            scale,
1356            synthesis_result.copula_generators.len()
1357        );
1358
1359        if !synthesis_result.copula_generators.is_empty() {
1360            for spec in &synthesis_result.copula_generators {
1361                info!(
1362                    "  Copula '{}' for table '{}': {} columns",
1363                    spec.name,
1364                    spec.table,
1365                    spec.columns.len()
1366                );
1367            }
1368        }
1369
1370        // Create the orchestrator with the synthesized config
1371        let mut orchestrator = Self::new(config, phase_config)?;
1372
1373        // Store copula generators for use during generation
1374        orchestrator.copula_generators = synthesis_result.copula_generators;
1375
1376        Ok(orchestrator)
1377    }
1378
1379    /// Create a base config for a given industry.
1380    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1381        use datasynth_config::presets::create_preset;
1382        use datasynth_config::TransactionVolume;
1383        use datasynth_core::models::{CoAComplexity, IndustrySector};
1384
1385        let sector = match industry.to_lowercase().as_str() {
1386            "manufacturing" => IndustrySector::Manufacturing,
1387            "retail" => IndustrySector::Retail,
1388            "financial" | "financial_services" => IndustrySector::FinancialServices,
1389            "healthcare" => IndustrySector::Healthcare,
1390            "technology" | "tech" => IndustrySector::Technology,
1391            _ => IndustrySector::Manufacturing,
1392        };
1393
1394        // Create a preset with reasonable defaults
1395        create_preset(
1396            sector,
1397            1,  // company count
1398            12, // period months
1399            CoAComplexity::Medium,
1400            TransactionVolume::TenK,
1401        )
1402    }
1403
1404    /// Apply a config patch to a GeneratorConfig.
1405    fn apply_config_patch(
1406        mut config: GeneratorConfig,
1407        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1408    ) -> GeneratorConfig {
1409        use datasynth_fingerprint::synthesis::ConfigValue;
1410
1411        for (key, value) in patch.values() {
1412            match (key.as_str(), value) {
1413                // Transaction count is handled via TransactionVolume enum on companies
1414                // Log it but cannot directly set it (would need to modify company volumes)
1415                ("transactions.count", ConfigValue::Integer(n)) => {
1416                    info!(
1417                        "Fingerprint suggests {} transactions (apply via company volumes)",
1418                        n
1419                    );
1420                }
1421                ("global.period_months", ConfigValue::Integer(n)) => {
1422                    config.global.period_months = (*n).clamp(1, 120) as u32;
1423                }
1424                ("global.start_date", ConfigValue::String(s)) => {
1425                    config.global.start_date = s.clone();
1426                }
1427                ("global.seed", ConfigValue::Integer(n)) => {
1428                    config.global.seed = Some(*n as u64);
1429                }
1430                ("fraud.enabled", ConfigValue::Bool(b)) => {
1431                    config.fraud.enabled = *b;
1432                }
1433                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1434                    config.fraud.fraud_rate = *f;
1435                }
1436                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1437                    config.data_quality.enabled = *b;
1438                }
1439                // Handle anomaly injection paths (mapped to fraud config)
1440                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1441                    config.fraud.enabled = *b;
1442                }
1443                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1444                    config.fraud.fraud_rate = *f;
1445                }
1446                _ => {
1447                    debug!("Ignoring unknown config patch key: {}", key);
1448                }
1449            }
1450        }
1451
1452        config
1453    }
1454
1455    /// Build a resource guard from the configuration.
1456    fn build_resource_guard(
1457        config: &GeneratorConfig,
1458        output_path: Option<PathBuf>,
1459    ) -> ResourceGuard {
1460        let mut builder = ResourceGuardBuilder::new();
1461
1462        // Configure memory limit if set
1463        if config.global.memory_limit_mb > 0 {
1464            builder = builder.memory_limit(config.global.memory_limit_mb);
1465        }
1466
1467        // Configure disk monitoring for output path
1468        if let Some(path) = output_path {
1469            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1470        }
1471
1472        // Use conservative degradation settings for production safety
1473        builder = builder.conservative();
1474
1475        builder.build()
1476    }
1477
1478    /// Check resources (memory, disk, CPU) and return degradation level.
1479    ///
1480    /// Returns an error if hard limits are exceeded.
1481    /// Returns Ok(DegradationLevel) indicating current resource state.
1482    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1483        self.resource_guard.check()
1484    }
1485
1486    /// Check resources with logging.
1487    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1488        let level = self.resource_guard.check()?;
1489
1490        if level != DegradationLevel::Normal {
1491            warn!(
1492                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1493                phase,
1494                level,
1495                self.resource_guard.current_memory_mb(),
1496                self.resource_guard.available_disk_mb()
1497            );
1498        }
1499
1500        Ok(level)
1501    }
1502
1503    /// Get current degradation actions based on resource state.
1504    fn get_degradation_actions(&self) -> DegradationActions {
1505        self.resource_guard.get_actions()
1506    }
1507
1508    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1509    fn check_memory_limit(&self) -> SynthResult<()> {
1510        self.check_resources()?;
1511        Ok(())
1512    }
1513
1514    /// Run the complete generation workflow.
1515    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1516        info!("Starting enhanced generation workflow");
1517        info!(
1518            "Config: industry={:?}, period_months={}, companies={}",
1519            self.config.global.industry,
1520            self.config.global.period_months,
1521            self.config.companies.len()
1522        );
1523
1524        // Initial resource check before starting
1525        let initial_level = self.check_resources_with_log("initial")?;
1526        if initial_level == DegradationLevel::Emergency {
1527            return Err(SynthError::resource(
1528                "Insufficient resources to start generation",
1529            ));
1530        }
1531
1532        let mut stats = EnhancedGenerationStatistics {
1533            companies_count: self.config.companies.len(),
1534            period_months: self.config.global.period_months,
1535            ..Default::default()
1536        };
1537
1538        // Phase 1: Chart of Accounts
1539        let coa = self.phase_chart_of_accounts(&mut stats)?;
1540
1541        // Phase 2: Master Data
1542        self.phase_master_data(&mut stats)?;
1543
1544        // Emit master data to stream sink
1545        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1546        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1547        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1548
1549        // Phase 3: Document Flows + Subledger Linking
1550        let (mut document_flows, subledger, fa_journal_entries) =
1551            self.phase_document_flows(&mut stats)?;
1552
1553        // Emit document flows to stream sink
1554        self.emit_phase_items(
1555            "document_flows",
1556            "PurchaseOrder",
1557            &document_flows.purchase_orders,
1558        );
1559        self.emit_phase_items(
1560            "document_flows",
1561            "GoodsReceipt",
1562            &document_flows.goods_receipts,
1563        );
1564        self.emit_phase_items(
1565            "document_flows",
1566            "VendorInvoice",
1567            &document_flows.vendor_invoices,
1568        );
1569        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1570        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1571
1572        // Phase 3b: Opening Balances (before JE generation)
1573        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1574
1575        // Note: Opening balances are exported as balance/opening_balances.json but are not
1576        // converted to journal entries. Converting to JEs requires richer type information
1577        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1578        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1579        // A future enhancement could store (Decimal, AccountType) in the balances map.
1580
1581        // Phase 4: Journal Entries
1582        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1583
1584        // Phase 4b: Append FA acquisition journal entries to main entries
1585        if !fa_journal_entries.is_empty() {
1586            debug!(
1587                "Appending {} FA acquisition JEs to main entries",
1588                fa_journal_entries.len()
1589            );
1590            entries.extend(fa_journal_entries);
1591        }
1592
1593        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1594        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1595
1596        // Get current degradation actions for optional phases
1597        let actions = self.get_degradation_actions();
1598
1599        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1600        let sourcing = self.phase_sourcing_data(&mut stats)?;
1601
1602        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1603        if !sourcing.contracts.is_empty() {
1604            let mut linked_count = 0usize;
1605            for chain in &mut document_flows.p2p_chains {
1606                if chain.purchase_order.contract_id.is_none() {
1607                    if let Some(contract) = sourcing
1608                        .contracts
1609                        .iter()
1610                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1611                    {
1612                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1613                        linked_count += 1;
1614                    }
1615                }
1616            }
1617            if linked_count > 0 {
1618                debug!(
1619                    "Linked {} purchase orders to S2C contracts by vendor match",
1620                    linked_count
1621                );
1622            }
1623        }
1624
1625        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1626        let intercompany = self.phase_intercompany(&mut stats)?;
1627
1628        // Phase 5c: Append IC journal entries to main entries
1629        if !intercompany.seller_journal_entries.is_empty()
1630            || !intercompany.buyer_journal_entries.is_empty()
1631        {
1632            let ic_je_count = intercompany.seller_journal_entries.len()
1633                + intercompany.buyer_journal_entries.len();
1634            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1635            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1636            debug!(
1637                "Appended {} IC journal entries to main entries",
1638                ic_je_count
1639            );
1640        }
1641
1642        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1643        let hr = self.phase_hr_data(&mut stats)?;
1644
1645        // Phase 6b: Generate JEs from payroll runs
1646        if !hr.payroll_runs.is_empty() {
1647            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1648            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1649            entries.extend(payroll_jes);
1650        }
1651
1652        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1653        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1654
1655        // Phase 7a: Generate JEs from production orders
1656        if !manufacturing_snap.production_orders.is_empty() {
1657            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1658            debug!("Generated {} JEs from production orders", mfg_jes.len());
1659            entries.extend(mfg_jes);
1660        }
1661
1662        // Update final entry/line-item stats after all JE-generating phases
1663        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1664        if !entries.is_empty() {
1665            stats.total_entries = entries.len() as u64;
1666            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1667            debug!(
1668                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1669                stats.total_entries, stats.total_line_items
1670            );
1671        }
1672
1673        // Phase 7b: Apply internal controls to journal entries
1674        if self.config.internal_controls.enabled && !entries.is_empty() {
1675            info!("Phase 7b: Applying internal controls to journal entries");
1676            let control_config = ControlGeneratorConfig {
1677                exception_rate: self.config.internal_controls.exception_rate,
1678                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1679                enable_sox_marking: true,
1680                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1681                    self.config.internal_controls.sox_materiality_threshold,
1682                )
1683                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1684            };
1685            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1686            for entry in &mut entries {
1687                control_gen.apply_controls(entry, &coa);
1688            }
1689            let with_controls = entries
1690                .iter()
1691                .filter(|e| !e.header.control_ids.is_empty())
1692                .count();
1693            info!(
1694                "Applied controls to {} entries ({} with control IDs assigned)",
1695                entries.len(),
1696                with_controls
1697            );
1698        }
1699
1700        // Emit journal entries to stream sink (after all JE-generating phases)
1701        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1702
1703        // Phase 8: Anomaly Injection (after all JE-generating phases)
1704        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1705
1706        // Emit anomaly labels to stream sink
1707        self.emit_phase_items(
1708            "anomaly_injection",
1709            "LabeledAnomaly",
1710            &anomaly_labels.labels,
1711        );
1712
1713        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
1714        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
1715
1716        // Emit red flags to stream sink
1717        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
1718
1719        // Phase 26b: Collusion Ring Generation (after red flags)
1720        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
1721
1722        // Emit collusion rings to stream sink
1723        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
1724
1725        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1726        let balance_validation = self.phase_balance_validation(&entries)?;
1727
1728        // Phase 9b: GL-to-Subledger Reconciliation
1729        let subledger_reconciliation =
1730            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1731
1732        // Phase 10: Data Quality Injection
1733        let data_quality_stats =
1734            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1735
1736        // Phase 11: Audit Data
1737        let audit = self.phase_audit_data(&entries, &mut stats)?;
1738
1739        // Phase 12: Banking KYC/AML Data
1740        let banking = self.phase_banking_data(&mut stats)?;
1741
1742        // Phase 13: Graph Export
1743        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1744
1745        // Phase 14: LLM Enrichment
1746        self.phase_llm_enrichment(&mut stats);
1747
1748        // Phase 15: Diffusion Enhancement
1749        self.phase_diffusion_enhancement(&mut stats);
1750
1751        // Phase 16: Causal Overlay
1752        self.phase_causal_overlay(&mut stats);
1753
1754        // Phase 17: Bank Reconciliation + Financial Statements
1755        let financial_reporting =
1756            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1757
1758        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1759        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1760
1761        // Phase 18b: OCPM Events (after all process data is available)
1762        let ocpm = self.phase_ocpm_events(
1763            &document_flows,
1764            &sourcing,
1765            &hr,
1766            &manufacturing_snap,
1767            &banking,
1768            &audit,
1769            &financial_reporting,
1770            &mut stats,
1771        )?;
1772
1773        // Emit OCPM events to stream sink
1774        if let Some(ref event_log) = ocpm.event_log {
1775            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1776        }
1777
1778        // Phase 19: Sales Quotes, Management KPIs, Budgets
1779        let sales_kpi_budgets =
1780            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1781
1782        // Phase 20: Tax Generation
1783        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1784
1785        // Phase 21: ESG Data Generation
1786        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1787
1788        // Phase 22: Treasury Data Generation
1789        let treasury =
1790            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
1791
1792        // Phase 23: Project Accounting Data Generation
1793        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1794
1795        // Phase 24: Process Evolution + Organizational Events
1796        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
1797
1798        // Phase 24b: Disruption Events
1799        let disruption_events = self.phase_disruption_events(&mut stats)?;
1800
1801        // Phase 27: Bi-Temporal Vendor Version Chains
1802        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
1803
1804        // Phase 28: Entity Relationship Graph + Cross-Process Links
1805        let (entity_relationship_graph, cross_process_links) =
1806            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
1807
1808        // Phase 29: Industry-specific GL accounts
1809        let industry_output = self.phase_industry_data(&mut stats);
1810
1811        // Phase: Compliance regulations (must run before hypergraph so it can be included)
1812        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
1813
1814        // Phase 19b: Hypergraph Export (after all data is available)
1815        self.phase_hypergraph_export(
1816            &coa,
1817            &entries,
1818            &document_flows,
1819            &sourcing,
1820            &hr,
1821            &manufacturing_snap,
1822            &banking,
1823            &audit,
1824            &financial_reporting,
1825            &ocpm,
1826            &compliance_regulations,
1827            &mut stats,
1828        )?;
1829
1830        // Phase 10c: Additional graph builders (approval, entity, banking)
1831        // These run after all data is available since they need banking/IC data.
1832        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1833            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1834        }
1835
1836        // Log informational messages for config sections not yet fully wired
1837        if self.config.streaming.enabled {
1838            info!("Note: streaming config is enabled but batch mode does not use it");
1839        }
1840        if self.config.vendor_network.enabled {
1841            debug!("Vendor network config available; relationship graph generation is partial");
1842        }
1843        if self.config.customer_segmentation.enabled {
1844            debug!("Customer segmentation config available; segment-aware generation is partial");
1845        }
1846
1847        // Log final resource statistics
1848        let resource_stats = self.resource_guard.stats();
1849        info!(
1850            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1851            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1852            resource_stats.disk.estimated_bytes_written,
1853            resource_stats.degradation_level
1854        );
1855
1856        // Flush any remaining stream sink data
1857        if let Some(ref sink) = self.phase_sink {
1858            if let Err(e) = sink.flush() {
1859                warn!("Stream sink flush failed: {e}");
1860            }
1861        }
1862
1863        // Build data lineage graph
1864        let lineage = self.build_lineage_graph();
1865
1866        // Evaluate quality gates if enabled in config
1867        let gate_result = if self.config.quality_gates.enabled {
1868            let profile_name = &self.config.quality_gates.profile;
1869            match datasynth_eval::gates::get_profile(profile_name) {
1870                Some(profile) => {
1871                    // Build an evaluation populated with actual generation metrics.
1872                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1873
1874                    // Populate balance sheet evaluation from balance validation results
1875                    if balance_validation.validated {
1876                        eval.coherence.balance =
1877                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1878                                equation_balanced: balance_validation.is_balanced,
1879                                max_imbalance: (balance_validation.total_debits
1880                                    - balance_validation.total_credits)
1881                                    .abs(),
1882                                periods_evaluated: 1,
1883                                periods_imbalanced: if balance_validation.is_balanced {
1884                                    0
1885                                } else {
1886                                    1
1887                                },
1888                                period_results: Vec::new(),
1889                                companies_evaluated: self.config.companies.len(),
1890                            });
1891                    }
1892
1893                    // Set coherence passes based on balance validation
1894                    eval.coherence.passes = balance_validation.is_balanced;
1895                    if !balance_validation.is_balanced {
1896                        eval.coherence
1897                            .failures
1898                            .push("Balance sheet equation not satisfied".to_string());
1899                    }
1900
1901                    // Set statistical score based on entry count (basic sanity)
1902                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1903                    eval.statistical.passes = !entries.is_empty();
1904
1905                    // Set quality score from data quality stats
1906                    eval.quality.overall_score = 0.9; // Default high for generated data
1907                    eval.quality.passes = true;
1908
1909                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1910                    info!(
1911                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1912                        profile_name, result.gates_passed, result.gates_total, result.summary
1913                    );
1914                    Some(result)
1915                }
1916                None => {
1917                    warn!(
1918                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1919                        profile_name
1920                    );
1921                    None
1922                }
1923            }
1924        } else {
1925            None
1926        };
1927
1928        // Generate internal controls if enabled
1929        let internal_controls = if self.config.internal_controls.enabled {
1930            InternalControl::standard_controls()
1931        } else {
1932            Vec::new()
1933        };
1934
1935        Ok(EnhancedGenerationResult {
1936            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
1937            master_data: std::mem::take(&mut self.master_data),
1938            document_flows,
1939            subledger,
1940            ocpm,
1941            audit,
1942            banking,
1943            graph_export,
1944            sourcing,
1945            financial_reporting,
1946            hr,
1947            accounting_standards,
1948            manufacturing: manufacturing_snap,
1949            sales_kpi_budgets,
1950            tax,
1951            esg: esg_snap,
1952            treasury,
1953            project_accounting,
1954            process_evolution,
1955            organizational_events,
1956            disruption_events,
1957            intercompany,
1958            journal_entries: entries,
1959            anomaly_labels,
1960            balance_validation,
1961            data_quality_stats,
1962            statistics: stats,
1963            lineage: Some(lineage),
1964            gate_result,
1965            internal_controls,
1966            opening_balances,
1967            subledger_reconciliation,
1968            counterfactual_pairs,
1969            red_flags,
1970            collusion_rings,
1971            temporal_vendor_chains,
1972            entity_relationship_graph,
1973            cross_process_links,
1974            industry_output,
1975            compliance_regulations,
1976        })
1977    }
1978
1979    // ========================================================================
1980    // Generation Phase Methods
1981    // ========================================================================
1982
1983    /// Phase 1: Generate Chart of Accounts and update statistics.
1984    fn phase_chart_of_accounts(
1985        &mut self,
1986        stats: &mut EnhancedGenerationStatistics,
1987    ) -> SynthResult<Arc<ChartOfAccounts>> {
1988        info!("Phase 1: Generating Chart of Accounts");
1989        let coa = self.generate_coa()?;
1990        stats.accounts_count = coa.account_count();
1991        info!(
1992            "Chart of Accounts generated: {} accounts",
1993            stats.accounts_count
1994        );
1995        self.check_resources_with_log("post-coa")?;
1996        Ok(coa)
1997    }
1998
1999    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2000    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2001        if self.phase_config.generate_master_data {
2002            info!("Phase 2: Generating Master Data");
2003            self.generate_master_data()?;
2004            stats.vendor_count = self.master_data.vendors.len();
2005            stats.customer_count = self.master_data.customers.len();
2006            stats.material_count = self.master_data.materials.len();
2007            stats.asset_count = self.master_data.assets.len();
2008            stats.employee_count = self.master_data.employees.len();
2009            info!(
2010                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2011                stats.vendor_count, stats.customer_count, stats.material_count,
2012                stats.asset_count, stats.employee_count
2013            );
2014            self.check_resources_with_log("post-master-data")?;
2015        } else {
2016            debug!("Phase 2: Skipped (master data generation disabled)");
2017        }
2018        Ok(())
2019    }
2020
2021    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2022    fn phase_document_flows(
2023        &mut self,
2024        stats: &mut EnhancedGenerationStatistics,
2025    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2026        let mut document_flows = DocumentFlowSnapshot::default();
2027        let mut subledger = SubledgerSnapshot::default();
2028
2029        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2030            info!("Phase 3: Generating Document Flows");
2031            self.generate_document_flows(&mut document_flows)?;
2032            stats.p2p_chain_count = document_flows.p2p_chains.len();
2033            stats.o2c_chain_count = document_flows.o2c_chains.len();
2034            info!(
2035                "Document flows generated: {} P2P chains, {} O2C chains",
2036                stats.p2p_chain_count, stats.o2c_chain_count
2037            );
2038
2039            // Phase 3b: Link document flows to subledgers (for data coherence)
2040            debug!("Phase 3b: Linking document flows to subledgers");
2041            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2042            stats.ap_invoice_count = subledger.ap_invoices.len();
2043            stats.ar_invoice_count = subledger.ar_invoices.len();
2044            debug!(
2045                "Subledgers linked: {} AP invoices, {} AR invoices",
2046                stats.ap_invoice_count, stats.ar_invoice_count
2047            );
2048
2049            self.check_resources_with_log("post-document-flows")?;
2050        } else {
2051            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2052        }
2053
2054        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
2055        let mut fa_journal_entries = Vec::new();
2056        if !self.master_data.assets.is_empty() {
2057            debug!("Generating FA subledger records");
2058            let company_code = self
2059                .config
2060                .companies
2061                .first()
2062                .map(|c| c.code.as_str())
2063                .unwrap_or("1000");
2064            let currency = self
2065                .config
2066                .companies
2067                .first()
2068                .map(|c| c.currency.as_str())
2069                .unwrap_or("USD");
2070
2071            let mut fa_gen = datasynth_generators::FAGenerator::new(
2072                datasynth_generators::FAGeneratorConfig::default(),
2073                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2074            );
2075
2076            for asset in &self.master_data.assets {
2077                let (record, je) = fa_gen.generate_asset_acquisition(
2078                    company_code,
2079                    &format!("{:?}", asset.asset_class),
2080                    &asset.description,
2081                    asset.acquisition_date,
2082                    currency,
2083                    asset.cost_center.as_deref(),
2084                );
2085                subledger.fa_records.push(record);
2086                fa_journal_entries.push(je);
2087            }
2088
2089            stats.fa_subledger_count = subledger.fa_records.len();
2090            debug!(
2091                "FA subledger records generated: {} (with {} acquisition JEs)",
2092                stats.fa_subledger_count,
2093                fa_journal_entries.len()
2094            );
2095        }
2096
2097        // Generate Inventory subledger records from master data materials
2098        if !self.master_data.materials.is_empty() {
2099            debug!("Generating Inventory subledger records");
2100            let first_company = self.config.companies.first();
2101            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2102            let inv_currency = first_company
2103                .map(|c| c.currency.clone())
2104                .unwrap_or_else(|| "USD".to_string());
2105
2106            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2107                datasynth_generators::InventoryGeneratorConfig::default(),
2108                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2109                inv_currency.clone(),
2110            );
2111
2112            for (i, material) in self.master_data.materials.iter().enumerate() {
2113                let plant = format!("PLANT{:02}", (i % 3) + 1);
2114                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2115                let initial_qty = rust_decimal::Decimal::from(
2116                    material
2117                        .safety_stock
2118                        .to_string()
2119                        .parse::<i64>()
2120                        .unwrap_or(100),
2121                );
2122
2123                let position = inv_gen.generate_position(
2124                    company_code,
2125                    &plant,
2126                    &storage_loc,
2127                    &material.material_id,
2128                    &material.description,
2129                    initial_qty,
2130                    Some(material.standard_cost),
2131                    &inv_currency,
2132                );
2133                subledger.inventory_positions.push(position);
2134            }
2135
2136            stats.inventory_subledger_count = subledger.inventory_positions.len();
2137            debug!(
2138                "Inventory subledger records generated: {}",
2139                stats.inventory_subledger_count
2140            );
2141        }
2142
2143        Ok((document_flows, subledger, fa_journal_entries))
2144    }
2145
2146    /// Phase 3c: Generate OCPM events from document flows.
2147    #[allow(clippy::too_many_arguments)]
2148    fn phase_ocpm_events(
2149        &mut self,
2150        document_flows: &DocumentFlowSnapshot,
2151        sourcing: &SourcingSnapshot,
2152        hr: &HrSnapshot,
2153        manufacturing: &ManufacturingSnapshot,
2154        banking: &BankingSnapshot,
2155        audit: &AuditSnapshot,
2156        financial_reporting: &FinancialReportingSnapshot,
2157        stats: &mut EnhancedGenerationStatistics,
2158    ) -> SynthResult<OcpmSnapshot> {
2159        if self.phase_config.generate_ocpm_events {
2160            info!("Phase 3c: Generating OCPM Events");
2161            let ocpm_snapshot = self.generate_ocpm_events(
2162                document_flows,
2163                sourcing,
2164                hr,
2165                manufacturing,
2166                banking,
2167                audit,
2168                financial_reporting,
2169            )?;
2170            stats.ocpm_event_count = ocpm_snapshot.event_count;
2171            stats.ocpm_object_count = ocpm_snapshot.object_count;
2172            stats.ocpm_case_count = ocpm_snapshot.case_count;
2173            info!(
2174                "OCPM events generated: {} events, {} objects, {} cases",
2175                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2176            );
2177            self.check_resources_with_log("post-ocpm")?;
2178            Ok(ocpm_snapshot)
2179        } else {
2180            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2181            Ok(OcpmSnapshot::default())
2182        }
2183    }
2184
2185    /// Phase 4: Generate journal entries from document flows and standalone generation.
2186    fn phase_journal_entries(
2187        &mut self,
2188        coa: &Arc<ChartOfAccounts>,
2189        document_flows: &DocumentFlowSnapshot,
2190        _stats: &mut EnhancedGenerationStatistics,
2191    ) -> SynthResult<Vec<JournalEntry>> {
2192        let mut entries = Vec::new();
2193
2194        // Phase 4a: Generate JEs from document flows (for data coherence)
2195        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2196            debug!("Phase 4a: Generating JEs from document flows");
2197            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2198            debug!("Generated {} JEs from document flows", flow_entries.len());
2199            entries.extend(flow_entries);
2200        }
2201
2202        // Phase 4b: Generate standalone journal entries
2203        if self.phase_config.generate_journal_entries {
2204            info!("Phase 4: Generating Journal Entries");
2205            let je_entries = self.generate_journal_entries(coa)?;
2206            info!("Generated {} standalone journal entries", je_entries.len());
2207            entries.extend(je_entries);
2208        } else {
2209            debug!("Phase 4: Skipped (journal entry generation disabled)");
2210        }
2211
2212        if !entries.is_empty() {
2213            // Note: stats.total_entries/total_line_items are set in generate()
2214            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2215            self.check_resources_with_log("post-journal-entries")?;
2216        }
2217
2218        Ok(entries)
2219    }
2220
2221    /// Phase 5: Inject anomalies into journal entries.
2222    fn phase_anomaly_injection(
2223        &mut self,
2224        entries: &mut [JournalEntry],
2225        actions: &DegradationActions,
2226        stats: &mut EnhancedGenerationStatistics,
2227    ) -> SynthResult<AnomalyLabels> {
2228        if self.phase_config.inject_anomalies
2229            && !entries.is_empty()
2230            && !actions.skip_anomaly_injection
2231        {
2232            info!("Phase 5: Injecting Anomalies");
2233            let result = self.inject_anomalies(entries)?;
2234            stats.anomalies_injected = result.labels.len();
2235            info!("Injected {} anomalies", stats.anomalies_injected);
2236            self.check_resources_with_log("post-anomaly-injection")?;
2237            Ok(result)
2238        } else if actions.skip_anomaly_injection {
2239            warn!("Phase 5: Skipped due to resource degradation");
2240            Ok(AnomalyLabels::default())
2241        } else {
2242            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2243            Ok(AnomalyLabels::default())
2244        }
2245    }
2246
2247    /// Phase 6: Validate balance sheet equation on journal entries.
2248    fn phase_balance_validation(
2249        &mut self,
2250        entries: &[JournalEntry],
2251    ) -> SynthResult<BalanceValidationResult> {
2252        if self.phase_config.validate_balances && !entries.is_empty() {
2253            debug!("Phase 6: Validating Balances");
2254            let balance_validation = self.validate_journal_entries(entries)?;
2255            if balance_validation.is_balanced {
2256                debug!("Balance validation passed");
2257            } else {
2258                warn!(
2259                    "Balance validation found {} errors",
2260                    balance_validation.validation_errors.len()
2261                );
2262            }
2263            Ok(balance_validation)
2264        } else {
2265            Ok(BalanceValidationResult::default())
2266        }
2267    }
2268
2269    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2270    fn phase_data_quality_injection(
2271        &mut self,
2272        entries: &mut [JournalEntry],
2273        actions: &DegradationActions,
2274        stats: &mut EnhancedGenerationStatistics,
2275    ) -> SynthResult<DataQualityStats> {
2276        if self.phase_config.inject_data_quality
2277            && !entries.is_empty()
2278            && !actions.skip_data_quality
2279        {
2280            info!("Phase 7: Injecting Data Quality Variations");
2281            let dq_stats = self.inject_data_quality(entries)?;
2282            stats.data_quality_issues = dq_stats.records_with_issues;
2283            info!("Injected {} data quality issues", stats.data_quality_issues);
2284            self.check_resources_with_log("post-data-quality")?;
2285            Ok(dq_stats)
2286        } else if actions.skip_data_quality {
2287            warn!("Phase 7: Skipped due to resource degradation");
2288            Ok(DataQualityStats::default())
2289        } else {
2290            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2291            Ok(DataQualityStats::default())
2292        }
2293    }
2294
2295    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2296    fn phase_audit_data(
2297        &mut self,
2298        entries: &[JournalEntry],
2299        stats: &mut EnhancedGenerationStatistics,
2300    ) -> SynthResult<AuditSnapshot> {
2301        if self.phase_config.generate_audit {
2302            info!("Phase 8: Generating Audit Data");
2303            let audit_snapshot = self.generate_audit_data(entries)?;
2304            stats.audit_engagement_count = audit_snapshot.engagements.len();
2305            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2306            stats.audit_evidence_count = audit_snapshot.evidence.len();
2307            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2308            stats.audit_finding_count = audit_snapshot.findings.len();
2309            stats.audit_judgment_count = audit_snapshot.judgments.len();
2310            info!(
2311                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2312                stats.audit_engagement_count, stats.audit_workpaper_count,
2313                stats.audit_evidence_count, stats.audit_risk_count,
2314                stats.audit_finding_count, stats.audit_judgment_count
2315            );
2316            self.check_resources_with_log("post-audit")?;
2317            Ok(audit_snapshot)
2318        } else {
2319            debug!("Phase 8: Skipped (audit generation disabled)");
2320            Ok(AuditSnapshot::default())
2321        }
2322    }
2323
2324    /// Phase 9: Generate banking KYC/AML data.
2325    fn phase_banking_data(
2326        &mut self,
2327        stats: &mut EnhancedGenerationStatistics,
2328    ) -> SynthResult<BankingSnapshot> {
2329        if self.phase_config.generate_banking && self.config.banking.enabled {
2330            info!("Phase 9: Generating Banking KYC/AML Data");
2331            let banking_snapshot = self.generate_banking_data()?;
2332            stats.banking_customer_count = banking_snapshot.customers.len();
2333            stats.banking_account_count = banking_snapshot.accounts.len();
2334            stats.banking_transaction_count = banking_snapshot.transactions.len();
2335            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2336            info!(
2337                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2338                stats.banking_customer_count, stats.banking_account_count,
2339                stats.banking_transaction_count, stats.banking_suspicious_count
2340            );
2341            self.check_resources_with_log("post-banking")?;
2342            Ok(banking_snapshot)
2343        } else {
2344            debug!("Phase 9: Skipped (banking generation disabled)");
2345            Ok(BankingSnapshot::default())
2346        }
2347    }
2348
2349    /// Phase 10: Export accounting network graphs for ML training.
2350    fn phase_graph_export(
2351        &mut self,
2352        entries: &[JournalEntry],
2353        coa: &Arc<ChartOfAccounts>,
2354        stats: &mut EnhancedGenerationStatistics,
2355    ) -> SynthResult<GraphExportSnapshot> {
2356        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2357            && !entries.is_empty()
2358        {
2359            info!("Phase 10: Exporting Accounting Network Graphs");
2360            match self.export_graphs(entries, coa, stats) {
2361                Ok(snapshot) => {
2362                    info!(
2363                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2364                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2365                    );
2366                    Ok(snapshot)
2367                }
2368                Err(e) => {
2369                    warn!("Phase 10: Graph export failed: {}", e);
2370                    Ok(GraphExportSnapshot::default())
2371                }
2372            }
2373        } else {
2374            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2375            Ok(GraphExportSnapshot::default())
2376        }
2377    }
2378
2379    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2380    #[allow(clippy::too_many_arguments)]
2381    fn phase_hypergraph_export(
2382        &self,
2383        coa: &Arc<ChartOfAccounts>,
2384        entries: &[JournalEntry],
2385        document_flows: &DocumentFlowSnapshot,
2386        sourcing: &SourcingSnapshot,
2387        hr: &HrSnapshot,
2388        manufacturing: &ManufacturingSnapshot,
2389        banking: &BankingSnapshot,
2390        audit: &AuditSnapshot,
2391        financial_reporting: &FinancialReportingSnapshot,
2392        ocpm: &OcpmSnapshot,
2393        compliance: &ComplianceRegulationsSnapshot,
2394        stats: &mut EnhancedGenerationStatistics,
2395    ) -> SynthResult<()> {
2396        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2397            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2398            match self.export_hypergraph(
2399                coa,
2400                entries,
2401                document_flows,
2402                sourcing,
2403                hr,
2404                manufacturing,
2405                banking,
2406                audit,
2407                financial_reporting,
2408                ocpm,
2409                compliance,
2410                stats,
2411            ) {
2412                Ok(info) => {
2413                    info!(
2414                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2415                        info.node_count, info.edge_count, info.hyperedge_count
2416                    );
2417                }
2418                Err(e) => {
2419                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2420                }
2421            }
2422        } else {
2423            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2424        }
2425        Ok(())
2426    }
2427
2428    /// Phase 11: LLM Enrichment.
2429    ///
2430    /// Uses an LLM provider (mock by default) to enrich vendor names with
2431    /// realistic, context-aware names. This phase is non-blocking: failures
2432    /// log a warning but do not stop the generation pipeline.
2433    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2434        if !self.config.llm.enabled {
2435            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2436            return;
2437        }
2438
2439        info!("Phase 11: Starting LLM Enrichment");
2440        let start = std::time::Instant::now();
2441
2442        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2443            let provider = Arc::new(MockLlmProvider::new(self.seed));
2444            let enricher = VendorLlmEnricher::new(provider);
2445
2446            let industry = format!("{:?}", self.config.global.industry);
2447            let max_enrichments = self
2448                .config
2449                .llm
2450                .max_vendor_enrichments
2451                .min(self.master_data.vendors.len());
2452
2453            let mut enriched_count = 0usize;
2454            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2455                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2456                    Ok(name) => {
2457                        vendor.name = name;
2458                        enriched_count += 1;
2459                    }
2460                    Err(e) => {
2461                        warn!(
2462                            "LLM vendor enrichment failed for {}: {}",
2463                            vendor.vendor_id, e
2464                        );
2465                    }
2466                }
2467            }
2468
2469            enriched_count
2470        }));
2471
2472        match result {
2473            Ok(enriched_count) => {
2474                stats.llm_vendors_enriched = enriched_count;
2475                let elapsed = start.elapsed();
2476                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2477                info!(
2478                    "Phase 11 complete: {} vendors enriched in {}ms",
2479                    enriched_count, stats.llm_enrichment_ms
2480                );
2481            }
2482            Err(_) => {
2483                let elapsed = start.elapsed();
2484                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2485                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2486            }
2487        }
2488    }
2489
2490    /// Phase 12: Diffusion Enhancement.
2491    ///
2492    /// Generates a sample set using the statistical diffusion backend to
2493    /// demonstrate distribution-matching data generation. This phase is
2494    /// non-blocking: failures log a warning but do not stop the pipeline.
2495    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2496        if !self.config.diffusion.enabled {
2497            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2498            return;
2499        }
2500
2501        info!("Phase 12: Starting Diffusion Enhancement");
2502        let start = std::time::Instant::now();
2503
2504        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2505            // Target distribution: transaction amounts (log-normal-like)
2506            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2507            let stds = vec![2000.0, 1.5, 1.0];
2508
2509            let diffusion_config = DiffusionConfig {
2510                n_steps: self.config.diffusion.n_steps,
2511                seed: self.seed,
2512                ..Default::default()
2513            };
2514
2515            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2516
2517            let n_samples = self.config.diffusion.sample_size;
2518            let n_features = 3; // amount, line_items, approval_level
2519            let samples = backend.generate(n_samples, n_features, self.seed);
2520
2521            samples.len()
2522        }));
2523
2524        match result {
2525            Ok(sample_count) => {
2526                stats.diffusion_samples_generated = sample_count;
2527                let elapsed = start.elapsed();
2528                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2529                info!(
2530                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2531                    sample_count, stats.diffusion_enhancement_ms
2532                );
2533            }
2534            Err(_) => {
2535                let elapsed = start.elapsed();
2536                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2537                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2538            }
2539        }
2540    }
2541
2542    /// Phase 13: Causal Overlay.
2543    ///
2544    /// Builds a structural causal model from a built-in template (e.g.,
2545    /// fraud_detection) and generates causal samples. Optionally validates
2546    /// that the output respects the causal structure. This phase is
2547    /// non-blocking: failures log a warning but do not stop the pipeline.
2548    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2549        if !self.config.causal.enabled {
2550            debug!("Phase 13: Skipped (causal generation disabled)");
2551            return;
2552        }
2553
2554        info!("Phase 13: Starting Causal Overlay");
2555        let start = std::time::Instant::now();
2556
2557        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2558            // Select template based on config
2559            let graph = match self.config.causal.template.as_str() {
2560                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2561                _ => CausalGraph::fraud_detection_template(),
2562            };
2563
2564            let scm = StructuralCausalModel::new(graph.clone())
2565                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
2566
2567            let n_samples = self.config.causal.sample_size;
2568            let samples = scm
2569                .generate(n_samples, self.seed)
2570                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
2571
2572            // Optionally validate causal structure
2573            let validation_passed = if self.config.causal.validate {
2574                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2575                if report.valid {
2576                    info!(
2577                        "Causal validation passed: all {} checks OK",
2578                        report.checks.len()
2579                    );
2580                } else {
2581                    warn!(
2582                        "Causal validation: {} violations detected: {:?}",
2583                        report.violations.len(),
2584                        report.violations
2585                    );
2586                }
2587                Some(report.valid)
2588            } else {
2589                None
2590            };
2591
2592            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2593        }));
2594
2595        match result {
2596            Ok(Ok((sample_count, validation_passed))) => {
2597                stats.causal_samples_generated = sample_count;
2598                stats.causal_validation_passed = validation_passed;
2599                let elapsed = start.elapsed();
2600                stats.causal_generation_ms = elapsed.as_millis() as u64;
2601                info!(
2602                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2603                    sample_count, stats.causal_generation_ms, validation_passed,
2604                );
2605            }
2606            Ok(Err(e)) => {
2607                let elapsed = start.elapsed();
2608                stats.causal_generation_ms = elapsed.as_millis() as u64;
2609                warn!("Phase 13: Causal generation failed: {}", e);
2610            }
2611            Err(_) => {
2612                let elapsed = start.elapsed();
2613                stats.causal_generation_ms = elapsed.as_millis() as u64;
2614                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2615            }
2616        }
2617    }
2618
2619    /// Phase 14: Generate S2C sourcing data.
2620    fn phase_sourcing_data(
2621        &mut self,
2622        stats: &mut EnhancedGenerationStatistics,
2623    ) -> SynthResult<SourcingSnapshot> {
2624        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2625            debug!("Phase 14: Skipped (sourcing generation disabled)");
2626            return Ok(SourcingSnapshot::default());
2627        }
2628
2629        info!("Phase 14: Generating S2C Sourcing Data");
2630        let seed = self.seed;
2631
2632        // Gather vendor data from master data
2633        let vendor_ids: Vec<String> = self
2634            .master_data
2635            .vendors
2636            .iter()
2637            .map(|v| v.vendor_id.clone())
2638            .collect();
2639        if vendor_ids.is_empty() {
2640            debug!("Phase 14: Skipped (no vendors available)");
2641            return Ok(SourcingSnapshot::default());
2642        }
2643
2644        let categories: Vec<(String, String)> = vec![
2645            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2646            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2647            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2648            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2649            ("CAT-LOG".to_string(), "Logistics".to_string()),
2650        ];
2651        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2652            .iter()
2653            .map(|(id, name)| {
2654                (
2655                    id.clone(),
2656                    name.clone(),
2657                    rust_decimal::Decimal::from(100_000),
2658                )
2659            })
2660            .collect();
2661
2662        let company_code = self
2663            .config
2664            .companies
2665            .first()
2666            .map(|c| c.code.as_str())
2667            .unwrap_or("1000");
2668        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2669            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2670        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2671        let fiscal_year = start_date.year() as u16;
2672        let owner_ids: Vec<String> = self
2673            .master_data
2674            .employees
2675            .iter()
2676            .take(5)
2677            .map(|e| e.employee_id.clone())
2678            .collect();
2679        let owner_id = owner_ids
2680            .first()
2681            .map(std::string::String::as_str)
2682            .unwrap_or("BUYER-001");
2683
2684        // Step 1: Spend Analysis
2685        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2686        let spend_analyses =
2687            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2688
2689        // Step 2: Sourcing Projects
2690        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2691        let sourcing_projects = if owner_ids.is_empty() {
2692            Vec::new()
2693        } else {
2694            project_gen.generate(
2695                company_code,
2696                &categories_with_spend,
2697                &owner_ids,
2698                start_date,
2699                self.config.global.period_months,
2700            )
2701        };
2702        stats.sourcing_project_count = sourcing_projects.len();
2703
2704        // Step 3: Qualifications
2705        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2706        let mut qual_gen = QualificationGenerator::new(seed + 2);
2707        let qualifications = qual_gen.generate(
2708            company_code,
2709            &qual_vendor_ids,
2710            sourcing_projects.first().map(|p| p.project_id.as_str()),
2711            owner_id,
2712            start_date,
2713        );
2714
2715        // Step 4: RFx Events
2716        let mut rfx_gen = RfxGenerator::new(seed + 3);
2717        let rfx_events: Vec<RfxEvent> = sourcing_projects
2718            .iter()
2719            .map(|proj| {
2720                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2721                rfx_gen.generate(
2722                    company_code,
2723                    &proj.project_id,
2724                    &proj.category_id,
2725                    &qualified_vids,
2726                    owner_id,
2727                    start_date,
2728                    50000.0,
2729                )
2730            })
2731            .collect();
2732        stats.rfx_event_count = rfx_events.len();
2733
2734        // Step 5: Bids
2735        let mut bid_gen = BidGenerator::new(seed + 4);
2736        let mut all_bids = Vec::new();
2737        for rfx in &rfx_events {
2738            let bidder_count = vendor_ids.len().clamp(2, 5);
2739            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2740            let bids = bid_gen.generate(rfx, &responding, start_date);
2741            all_bids.extend(bids);
2742        }
2743        stats.bid_count = all_bids.len();
2744
2745        // Step 6: Bid Evaluations
2746        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2747        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2748            .iter()
2749            .map(|rfx| {
2750                let rfx_bids: Vec<SupplierBid> = all_bids
2751                    .iter()
2752                    .filter(|b| b.rfx_id == rfx.rfx_id)
2753                    .cloned()
2754                    .collect();
2755                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2756            })
2757            .collect();
2758
2759        // Step 7: Contracts from winning bids
2760        let mut contract_gen = ContractGenerator::new(seed + 6);
2761        let contracts: Vec<ProcurementContract> = bid_evaluations
2762            .iter()
2763            .zip(rfx_events.iter())
2764            .filter_map(|(eval, rfx)| {
2765                eval.ranked_bids.first().and_then(|winner| {
2766                    all_bids
2767                        .iter()
2768                        .find(|b| b.bid_id == winner.bid_id)
2769                        .map(|winning_bid| {
2770                            contract_gen.generate_from_bid(
2771                                winning_bid,
2772                                Some(&rfx.sourcing_project_id),
2773                                &rfx.category_id,
2774                                owner_id,
2775                                start_date,
2776                            )
2777                        })
2778                })
2779            })
2780            .collect();
2781        stats.contract_count = contracts.len();
2782
2783        // Step 8: Catalog Items
2784        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2785        let catalog_items = catalog_gen.generate(&contracts);
2786        stats.catalog_item_count = catalog_items.len();
2787
2788        // Step 9: Scorecards
2789        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2790        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2791            .iter()
2792            .fold(
2793                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2794                |mut acc, c| {
2795                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2796                    acc
2797                },
2798            )
2799            .into_iter()
2800            .collect();
2801        let scorecards = scorecard_gen.generate(
2802            company_code,
2803            &vendor_contracts,
2804            start_date,
2805            end_date,
2806            owner_id,
2807        );
2808        stats.scorecard_count = scorecards.len();
2809
2810        // Back-populate cross-references on sourcing projects (Task 35)
2811        // Link each project to its RFx events, contracts, and spend analyses
2812        let mut sourcing_projects = sourcing_projects;
2813        for project in &mut sourcing_projects {
2814            // Link RFx events generated for this project
2815            project.rfx_ids = rfx_events
2816                .iter()
2817                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2818                .map(|rfx| rfx.rfx_id.clone())
2819                .collect();
2820
2821            // Link contract awarded from this project's RFx
2822            project.contract_id = contracts
2823                .iter()
2824                .find(|c| {
2825                    c.sourcing_project_id
2826                        .as_deref()
2827                        .is_some_and(|sp| sp == project.project_id)
2828                })
2829                .map(|c| c.contract_id.clone());
2830
2831            // Link spend analysis for matching category (use category_id as the reference)
2832            project.spend_analysis_id = spend_analyses
2833                .iter()
2834                .find(|sa| sa.category_id == project.category_id)
2835                .map(|sa| sa.category_id.clone());
2836        }
2837
2838        info!(
2839            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2840            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2841            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2842        );
2843        self.check_resources_with_log("post-sourcing")?;
2844
2845        Ok(SourcingSnapshot {
2846            spend_analyses,
2847            sourcing_projects,
2848            qualifications,
2849            rfx_events,
2850            bids: all_bids,
2851            bid_evaluations,
2852            contracts,
2853            catalog_items,
2854            scorecards,
2855        })
2856    }
2857
2858    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2859    fn phase_intercompany(
2860        &mut self,
2861        stats: &mut EnhancedGenerationStatistics,
2862    ) -> SynthResult<IntercompanySnapshot> {
2863        // Skip if intercompany is disabled in config
2864        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2865            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2866            return Ok(IntercompanySnapshot::default());
2867        }
2868
2869        // Intercompany requires at least 2 companies
2870        if self.config.companies.len() < 2 {
2871            debug!(
2872                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2873                self.config.companies.len()
2874            );
2875            return Ok(IntercompanySnapshot::default());
2876        }
2877
2878        info!("Phase 14b: Generating Intercompany Transactions");
2879
2880        let seed = self.seed;
2881        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2882            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2883        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2884
2885        // Build ownership structure from company configs
2886        // First company is treated as the parent, remaining are subsidiaries
2887        let parent_code = self.config.companies[0].code.clone();
2888        let mut ownership_structure =
2889            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2890
2891        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2892            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2893                format!("REL{:03}", i + 1),
2894                parent_code.clone(),
2895                company.code.clone(),
2896                rust_decimal::Decimal::from(100), // Default 100% ownership
2897                start_date,
2898            );
2899            ownership_structure.add_relationship(relationship);
2900        }
2901
2902        // Convert config transfer pricing method to core model enum
2903        let tp_method = match self.config.intercompany.transfer_pricing_method {
2904            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2905                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2906            }
2907            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2908                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2909            }
2910            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2911                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2912            }
2913            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2914                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2915            }
2916            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2917                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2918            }
2919        };
2920
2921        // Build IC generator config from schema config
2922        let ic_currency = self
2923            .config
2924            .companies
2925            .first()
2926            .map(|c| c.currency.clone())
2927            .unwrap_or_else(|| "USD".to_string());
2928        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2929            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2930            transfer_pricing_method: tp_method,
2931            markup_percent: rust_decimal::Decimal::from_f64_retain(
2932                self.config.intercompany.markup_percent,
2933            )
2934            .unwrap_or(rust_decimal::Decimal::from(5)),
2935            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2936            default_currency: ic_currency,
2937            ..Default::default()
2938        };
2939
2940        // Create IC generator
2941        let mut ic_generator = datasynth_generators::ICGenerator::new(
2942            ic_gen_config,
2943            ownership_structure.clone(),
2944            seed + 50,
2945        );
2946
2947        // Generate IC transactions for the period
2948        // Use ~3 transactions per day as a reasonable default
2949        let transactions_per_day = 3;
2950        let matched_pairs = ic_generator.generate_transactions_for_period(
2951            start_date,
2952            end_date,
2953            transactions_per_day,
2954        );
2955
2956        // Generate journal entries from matched pairs
2957        let mut seller_entries = Vec::new();
2958        let mut buyer_entries = Vec::new();
2959        let fiscal_year = start_date.year();
2960
2961        for pair in &matched_pairs {
2962            let fiscal_period = pair.posting_date.month();
2963            let (seller_je, buyer_je) =
2964                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2965            seller_entries.push(seller_je);
2966            buyer_entries.push(buyer_je);
2967        }
2968
2969        // Run matching engine
2970        let matching_config = datasynth_generators::ICMatchingConfig {
2971            base_currency: self
2972                .config
2973                .companies
2974                .first()
2975                .map(|c| c.currency.clone())
2976                .unwrap_or_else(|| "USD".to_string()),
2977            ..Default::default()
2978        };
2979        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2980        matching_engine.load_matched_pairs(&matched_pairs);
2981        let matching_result = matching_engine.run_matching(end_date);
2982
2983        // Generate elimination entries if configured
2984        let mut elimination_entries = Vec::new();
2985        if self.config.intercompany.generate_eliminations {
2986            let elim_config = datasynth_generators::EliminationConfig {
2987                consolidation_entity: "GROUP".to_string(),
2988                base_currency: self
2989                    .config
2990                    .companies
2991                    .first()
2992                    .map(|c| c.currency.clone())
2993                    .unwrap_or_else(|| "USD".to_string()),
2994                ..Default::default()
2995            };
2996
2997            let mut elim_generator =
2998                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2999
3000            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
3001            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
3002                matching_result
3003                    .matched_balances
3004                    .iter()
3005                    .chain(matching_result.unmatched_balances.iter())
3006                    .cloned()
3007                    .collect();
3008
3009            let journal = elim_generator.generate_eliminations(
3010                &fiscal_period,
3011                end_date,
3012                &all_balances,
3013                &matched_pairs,
3014                &std::collections::HashMap::new(), // investment amounts (simplified)
3015                &std::collections::HashMap::new(), // equity amounts (simplified)
3016            );
3017
3018            elimination_entries = journal.entries.clone();
3019        }
3020
3021        let matched_pair_count = matched_pairs.len();
3022        let elimination_entry_count = elimination_entries.len();
3023        let match_rate = matching_result.match_rate;
3024
3025        stats.ic_matched_pair_count = matched_pair_count;
3026        stats.ic_elimination_count = elimination_entry_count;
3027        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
3028
3029        info!(
3030            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3031            matched_pair_count,
3032            stats.ic_transaction_count,
3033            seller_entries.len(),
3034            buyer_entries.len(),
3035            elimination_entry_count,
3036            match_rate * 100.0
3037        );
3038        self.check_resources_with_log("post-intercompany")?;
3039
3040        Ok(IntercompanySnapshot {
3041            matched_pairs,
3042            seller_journal_entries: seller_entries,
3043            buyer_journal_entries: buyer_entries,
3044            elimination_entries,
3045            matched_pair_count,
3046            elimination_entry_count,
3047            match_rate,
3048        })
3049    }
3050
3051    /// Phase 15: Generate bank reconciliations and financial statements.
3052    fn phase_financial_reporting(
3053        &mut self,
3054        document_flows: &DocumentFlowSnapshot,
3055        journal_entries: &[JournalEntry],
3056        coa: &Arc<ChartOfAccounts>,
3057        stats: &mut EnhancedGenerationStatistics,
3058    ) -> SynthResult<FinancialReportingSnapshot> {
3059        let fs_enabled = self.phase_config.generate_financial_statements
3060            || self.config.financial_reporting.enabled;
3061        let br_enabled = self.phase_config.generate_bank_reconciliation;
3062
3063        if !fs_enabled && !br_enabled {
3064            debug!("Phase 15: Skipped (financial reporting disabled)");
3065            return Ok(FinancialReportingSnapshot::default());
3066        }
3067
3068        info!("Phase 15: Generating Financial Reporting Data");
3069
3070        let seed = self.seed;
3071        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3072            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3073
3074        let mut financial_statements = Vec::new();
3075        let mut bank_reconciliations = Vec::new();
3076        let mut trial_balances = Vec::new();
3077
3078        // Generate financial statements from JE-derived trial balances.
3079        //
3080        // When journal entries are available, we use cumulative trial balances for
3081        // balance sheet accounts and current-period trial balances for income
3082        // statement accounts. We also track prior-period trial balances so the
3083        // generator can produce comparative amounts, and we build a proper
3084        // cash flow statement from working capital changes rather than random data.
3085        if fs_enabled {
3086            let company_code = self
3087                .config
3088                .companies
3089                .first()
3090                .map(|c| c.code.as_str())
3091                .unwrap_or("1000");
3092            let currency = self
3093                .config
3094                .companies
3095                .first()
3096                .map(|c| c.currency.as_str())
3097                .unwrap_or("USD");
3098            let has_journal_entries = !journal_entries.is_empty();
3099
3100            // Use FinancialStatementGenerator for balance sheet and income statement,
3101            // but build cash flow ourselves from TB data when JEs are available.
3102            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
3103
3104            // Track prior-period cumulative TB for comparative amounts and cash flow
3105            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
3106                None;
3107
3108            // Generate one set of statements per period
3109            for period in 0..self.config.global.period_months {
3110                let period_start = start_date + chrono::Months::new(period);
3111                let period_end =
3112                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3113                let fiscal_year = period_end.year() as u16;
3114                let fiscal_period = period_end.month() as u8;
3115
3116                if has_journal_entries {
3117                    // Build cumulative trial balance from actual JEs for coherent
3118                    // balance sheet (cumulative) and income statement (current period)
3119                    let tb_entries = Self::build_cumulative_trial_balance(
3120                        journal_entries,
3121                        coa,
3122                        company_code,
3123                        start_date,
3124                        period_end,
3125                        fiscal_year,
3126                        fiscal_period,
3127                    );
3128
3129                    // Generate balance sheet and income statement via the generator,
3130                    // passing prior-period TB for comparative amounts
3131                    let prior_ref = prior_cumulative_tb.as_deref();
3132                    let stmts = fs_gen.generate(
3133                        company_code,
3134                        currency,
3135                        &tb_entries,
3136                        period_start,
3137                        period_end,
3138                        fiscal_year,
3139                        fiscal_period,
3140                        prior_ref,
3141                        "SYS-AUTOCLOSE",
3142                    );
3143
3144                    // Replace the generator's random cash flow with our TB-derived one
3145                    for stmt in stmts {
3146                        if stmt.statement_type == StatementType::CashFlowStatement {
3147                            // Build a coherent cash flow from trial balance changes
3148                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
3149                            let cf_items = Self::build_cash_flow_from_trial_balances(
3150                                &tb_entries,
3151                                prior_ref,
3152                                net_income,
3153                            );
3154                            financial_statements.push(FinancialStatement {
3155                                cash_flow_items: cf_items,
3156                                ..stmt
3157                            });
3158                        } else {
3159                            financial_statements.push(stmt);
3160                        }
3161                    }
3162
3163                    // Store current TB in snapshot for output
3164                    trial_balances.push(PeriodTrialBalance {
3165                        fiscal_year,
3166                        fiscal_period,
3167                        period_start,
3168                        period_end,
3169                        entries: tb_entries.clone(),
3170                    });
3171
3172                    // Store current TB as prior for next period
3173                    prior_cumulative_tb = Some(tb_entries);
3174                } else {
3175                    // Fallback: no JEs available, use single-period TB from entries
3176                    // (which will be empty, producing zero-valued statements)
3177                    let tb_entries = Self::build_trial_balance_from_entries(
3178                        journal_entries,
3179                        coa,
3180                        company_code,
3181                        fiscal_year,
3182                        fiscal_period,
3183                    );
3184
3185                    let stmts = fs_gen.generate(
3186                        company_code,
3187                        currency,
3188                        &tb_entries,
3189                        period_start,
3190                        period_end,
3191                        fiscal_year,
3192                        fiscal_period,
3193                        None,
3194                        "SYS-AUTOCLOSE",
3195                    );
3196                    financial_statements.extend(stmts);
3197
3198                    // Store trial balance even in fallback path
3199                    if !tb_entries.is_empty() {
3200                        trial_balances.push(PeriodTrialBalance {
3201                            fiscal_year,
3202                            fiscal_period,
3203                            period_start,
3204                            period_end,
3205                            entries: tb_entries,
3206                        });
3207                    }
3208                }
3209            }
3210            stats.financial_statement_count = financial_statements.len();
3211            info!(
3212                "Financial statements generated: {} statements (JE-derived: {})",
3213                stats.financial_statement_count, has_journal_entries
3214            );
3215        }
3216
3217        // Generate bank reconciliations from payment data
3218        if br_enabled && !document_flows.payments.is_empty() {
3219            let employee_ids: Vec<String> = self
3220                .master_data
3221                .employees
3222                .iter()
3223                .map(|e| e.employee_id.clone())
3224                .collect();
3225            let mut br_gen =
3226                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3227
3228            // Group payments by company code and period
3229            for company in &self.config.companies {
3230                let company_payments: Vec<PaymentReference> = document_flows
3231                    .payments
3232                    .iter()
3233                    .filter(|p| p.header.company_code == company.code)
3234                    .map(|p| PaymentReference {
3235                        id: p.header.document_id.clone(),
3236                        amount: if p.is_vendor { p.amount } else { -p.amount },
3237                        date: p.header.document_date,
3238                        reference: p
3239                            .check_number
3240                            .clone()
3241                            .or_else(|| p.wire_reference.clone())
3242                            .unwrap_or_else(|| p.header.document_id.clone()),
3243                    })
3244                    .collect();
3245
3246                if company_payments.is_empty() {
3247                    continue;
3248                }
3249
3250                let bank_account_id = format!("{}-MAIN", company.code);
3251
3252                // Generate one reconciliation per period
3253                for period in 0..self.config.global.period_months {
3254                    let period_start = start_date + chrono::Months::new(period);
3255                    let period_end =
3256                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3257
3258                    let period_payments: Vec<PaymentReference> = company_payments
3259                        .iter()
3260                        .filter(|p| p.date >= period_start && p.date <= period_end)
3261                        .cloned()
3262                        .collect();
3263
3264                    let recon = br_gen.generate(
3265                        &company.code,
3266                        &bank_account_id,
3267                        period_start,
3268                        period_end,
3269                        &company.currency,
3270                        &period_payments,
3271                    );
3272                    bank_reconciliations.push(recon);
3273                }
3274            }
3275            info!(
3276                "Bank reconciliations generated: {} reconciliations",
3277                bank_reconciliations.len()
3278            );
3279        }
3280
3281        stats.bank_reconciliation_count = bank_reconciliations.len();
3282        self.check_resources_with_log("post-financial-reporting")?;
3283
3284        if !trial_balances.is_empty() {
3285            info!(
3286                "Period-close trial balances captured: {} periods",
3287                trial_balances.len()
3288            );
3289        }
3290
3291        Ok(FinancialReportingSnapshot {
3292            financial_statements,
3293            bank_reconciliations,
3294            trial_balances,
3295        })
3296    }
3297
3298    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3299    ///
3300    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3301    /// posted in the journal entries flows through to the trial balance, using the real
3302    /// GL account numbers from the CoA.
3303    fn build_trial_balance_from_entries(
3304        journal_entries: &[JournalEntry],
3305        coa: &ChartOfAccounts,
3306        company_code: &str,
3307        fiscal_year: u16,
3308        fiscal_period: u8,
3309    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3310        use rust_decimal::Decimal;
3311
3312        // Accumulate total debits and credits per GL account
3313        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3314        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3315
3316        for je in journal_entries {
3317            // Filter to matching company, fiscal year, and period
3318            if je.header.company_code != company_code
3319                || je.header.fiscal_year != fiscal_year
3320                || je.header.fiscal_period != fiscal_period
3321            {
3322                continue;
3323            }
3324
3325            for line in &je.lines {
3326                let acct = &line.gl_account;
3327                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3328                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3329            }
3330        }
3331
3332        // Build a TrialBalanceEntry for each account that had activity
3333        let mut all_accounts: Vec<&String> = account_debits
3334            .keys()
3335            .chain(account_credits.keys())
3336            .collect::<std::collections::HashSet<_>>()
3337            .into_iter()
3338            .collect();
3339        all_accounts.sort();
3340
3341        let mut entries = Vec::new();
3342
3343        for acct_number in all_accounts {
3344            let debit = account_debits
3345                .get(acct_number)
3346                .copied()
3347                .unwrap_or(Decimal::ZERO);
3348            let credit = account_credits
3349                .get(acct_number)
3350                .copied()
3351                .unwrap_or(Decimal::ZERO);
3352
3353            if debit.is_zero() && credit.is_zero() {
3354                continue;
3355            }
3356
3357            // Look up account name from CoA, fall back to "Account {code}"
3358            let account_name = coa
3359                .get_account(acct_number)
3360                .map(|gl| gl.short_description.clone())
3361                .unwrap_or_else(|| format!("Account {acct_number}"));
3362
3363            // Map account code prefix to the category strings expected by
3364            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3365            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3366            // OperatingExpenses).
3367            let category = Self::category_from_account_code(acct_number);
3368
3369            entries.push(datasynth_generators::TrialBalanceEntry {
3370                account_code: acct_number.clone(),
3371                account_name,
3372                category,
3373                debit_balance: debit,
3374                credit_balance: credit,
3375            });
3376        }
3377
3378        entries
3379    }
3380
3381    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3382    /// (and including) the given period end date.
3383    ///
3384    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3385    /// while income statement accounts (revenue, expenses) show only the current period.
3386    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3387    fn build_cumulative_trial_balance(
3388        journal_entries: &[JournalEntry],
3389        coa: &ChartOfAccounts,
3390        company_code: &str,
3391        start_date: NaiveDate,
3392        period_end: NaiveDate,
3393        fiscal_year: u16,
3394        fiscal_period: u8,
3395    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3396        use rust_decimal::Decimal;
3397
3398        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3399        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3400        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3401
3402        // Accumulate debits/credits for income statement accounts (current period only)
3403        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3404        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3405
3406        for je in journal_entries {
3407            if je.header.company_code != company_code {
3408                continue;
3409            }
3410
3411            for line in &je.lines {
3412                let acct = &line.gl_account;
3413                let category = Self::category_from_account_code(acct);
3414                let is_bs_account = matches!(
3415                    category.as_str(),
3416                    "Cash"
3417                        | "Receivables"
3418                        | "Inventory"
3419                        | "FixedAssets"
3420                        | "Payables"
3421                        | "AccruedLiabilities"
3422                        | "LongTermDebt"
3423                        | "Equity"
3424                );
3425
3426                if is_bs_account {
3427                    // Balance sheet: accumulate from start through period_end
3428                    if je.header.document_date <= period_end
3429                        && je.header.document_date >= start_date
3430                    {
3431                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3432                            line.debit_amount;
3433                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3434                            line.credit_amount;
3435                    }
3436                } else {
3437                    // Income statement: current period only
3438                    if je.header.fiscal_year == fiscal_year
3439                        && je.header.fiscal_period == fiscal_period
3440                    {
3441                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3442                            line.debit_amount;
3443                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3444                            line.credit_amount;
3445                    }
3446                }
3447            }
3448        }
3449
3450        // Merge all accounts
3451        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3452        all_accounts.extend(bs_debits.keys().cloned());
3453        all_accounts.extend(bs_credits.keys().cloned());
3454        all_accounts.extend(is_debits.keys().cloned());
3455        all_accounts.extend(is_credits.keys().cloned());
3456
3457        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3458        sorted_accounts.sort();
3459
3460        let mut entries = Vec::new();
3461
3462        for acct_number in &sorted_accounts {
3463            let category = Self::category_from_account_code(acct_number);
3464            let is_bs_account = matches!(
3465                category.as_str(),
3466                "Cash"
3467                    | "Receivables"
3468                    | "Inventory"
3469                    | "FixedAssets"
3470                    | "Payables"
3471                    | "AccruedLiabilities"
3472                    | "LongTermDebt"
3473                    | "Equity"
3474            );
3475
3476            let (debit, credit) = if is_bs_account {
3477                (
3478                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3479                    bs_credits
3480                        .get(acct_number)
3481                        .copied()
3482                        .unwrap_or(Decimal::ZERO),
3483                )
3484            } else {
3485                (
3486                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3487                    is_credits
3488                        .get(acct_number)
3489                        .copied()
3490                        .unwrap_or(Decimal::ZERO),
3491                )
3492            };
3493
3494            if debit.is_zero() && credit.is_zero() {
3495                continue;
3496            }
3497
3498            let account_name = coa
3499                .get_account(acct_number)
3500                .map(|gl| gl.short_description.clone())
3501                .unwrap_or_else(|| format!("Account {acct_number}"));
3502
3503            entries.push(datasynth_generators::TrialBalanceEntry {
3504                account_code: acct_number.clone(),
3505                account_name,
3506                category,
3507                debit_balance: debit,
3508                credit_balance: credit,
3509            });
3510        }
3511
3512        entries
3513    }
3514
3515    /// Build a JE-derived cash flow statement using the indirect method.
3516    ///
3517    /// Compares current and prior cumulative trial balances to derive working capital
3518    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3519    fn build_cash_flow_from_trial_balances(
3520        current_tb: &[datasynth_generators::TrialBalanceEntry],
3521        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3522        net_income: rust_decimal::Decimal,
3523    ) -> Vec<CashFlowItem> {
3524        use rust_decimal::Decimal;
3525
3526        // Helper: aggregate a TB by category and return net (debit - credit)
3527        let aggregate =
3528            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3529                let mut map: HashMap<String, Decimal> = HashMap::new();
3530                for entry in tb {
3531                    let net = entry.debit_balance - entry.credit_balance;
3532                    *map.entry(entry.category.clone()).or_default() += net;
3533                }
3534                map
3535            };
3536
3537        let current = aggregate(current_tb);
3538        let prior = prior_tb.map(aggregate);
3539
3540        // Get balance for a category, defaulting to zero
3541        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3542            *map.get(key).unwrap_or(&Decimal::ZERO)
3543        };
3544
3545        // Compute change: current - prior (or current if no prior)
3546        let change = |key: &str| -> Decimal {
3547            let curr = get(&current, key);
3548            match &prior {
3549                Some(p) => curr - get(p, key),
3550                None => curr,
3551            }
3552        };
3553
3554        // Operating activities (indirect method)
3555        // Depreciation add-back: approximate from FixedAssets decrease
3556        let fixed_asset_change = change("FixedAssets");
3557        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3558            -fixed_asset_change
3559        } else {
3560            Decimal::ZERO
3561        };
3562
3563        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3564        let ar_change = change("Receivables");
3565        let inventory_change = change("Inventory");
3566        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3567        let ap_change = change("Payables");
3568        let accrued_change = change("AccruedLiabilities");
3569
3570        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3571            + (-ap_change)
3572            + (-accrued_change);
3573
3574        // Investing activities
3575        let capex = if fixed_asset_change > Decimal::ZERO {
3576            -fixed_asset_change
3577        } else {
3578            Decimal::ZERO
3579        };
3580        let investing_cf = capex;
3581
3582        // Financing activities
3583        let debt_change = -change("LongTermDebt");
3584        let equity_change = -change("Equity");
3585        let financing_cf = debt_change + equity_change;
3586
3587        let net_change = operating_cf + investing_cf + financing_cf;
3588
3589        vec![
3590            CashFlowItem {
3591                item_code: "CF-NI".to_string(),
3592                label: "Net Income".to_string(),
3593                category: CashFlowCategory::Operating,
3594                amount: net_income,
3595                amount_prior: None,
3596                sort_order: 1,
3597                is_total: false,
3598            },
3599            CashFlowItem {
3600                item_code: "CF-DEP".to_string(),
3601                label: "Depreciation & Amortization".to_string(),
3602                category: CashFlowCategory::Operating,
3603                amount: depreciation_addback,
3604                amount_prior: None,
3605                sort_order: 2,
3606                is_total: false,
3607            },
3608            CashFlowItem {
3609                item_code: "CF-AR".to_string(),
3610                label: "Change in Accounts Receivable".to_string(),
3611                category: CashFlowCategory::Operating,
3612                amount: -ar_change,
3613                amount_prior: None,
3614                sort_order: 3,
3615                is_total: false,
3616            },
3617            CashFlowItem {
3618                item_code: "CF-AP".to_string(),
3619                label: "Change in Accounts Payable".to_string(),
3620                category: CashFlowCategory::Operating,
3621                amount: -ap_change,
3622                amount_prior: None,
3623                sort_order: 4,
3624                is_total: false,
3625            },
3626            CashFlowItem {
3627                item_code: "CF-INV".to_string(),
3628                label: "Change in Inventory".to_string(),
3629                category: CashFlowCategory::Operating,
3630                amount: -inventory_change,
3631                amount_prior: None,
3632                sort_order: 5,
3633                is_total: false,
3634            },
3635            CashFlowItem {
3636                item_code: "CF-OP".to_string(),
3637                label: "Net Cash from Operating Activities".to_string(),
3638                category: CashFlowCategory::Operating,
3639                amount: operating_cf,
3640                amount_prior: None,
3641                sort_order: 6,
3642                is_total: true,
3643            },
3644            CashFlowItem {
3645                item_code: "CF-CAPEX".to_string(),
3646                label: "Capital Expenditures".to_string(),
3647                category: CashFlowCategory::Investing,
3648                amount: capex,
3649                amount_prior: None,
3650                sort_order: 7,
3651                is_total: false,
3652            },
3653            CashFlowItem {
3654                item_code: "CF-INV-T".to_string(),
3655                label: "Net Cash from Investing Activities".to_string(),
3656                category: CashFlowCategory::Investing,
3657                amount: investing_cf,
3658                amount_prior: None,
3659                sort_order: 8,
3660                is_total: true,
3661            },
3662            CashFlowItem {
3663                item_code: "CF-DEBT".to_string(),
3664                label: "Net Borrowings / (Repayments)".to_string(),
3665                category: CashFlowCategory::Financing,
3666                amount: debt_change,
3667                amount_prior: None,
3668                sort_order: 9,
3669                is_total: false,
3670            },
3671            CashFlowItem {
3672                item_code: "CF-EQ".to_string(),
3673                label: "Equity Changes".to_string(),
3674                category: CashFlowCategory::Financing,
3675                amount: equity_change,
3676                amount_prior: None,
3677                sort_order: 10,
3678                is_total: false,
3679            },
3680            CashFlowItem {
3681                item_code: "CF-FIN-T".to_string(),
3682                label: "Net Cash from Financing Activities".to_string(),
3683                category: CashFlowCategory::Financing,
3684                amount: financing_cf,
3685                amount_prior: None,
3686                sort_order: 11,
3687                is_total: true,
3688            },
3689            CashFlowItem {
3690                item_code: "CF-NET".to_string(),
3691                label: "Net Change in Cash".to_string(),
3692                category: CashFlowCategory::Operating,
3693                amount: net_change,
3694                amount_prior: None,
3695                sort_order: 12,
3696                is_total: true,
3697            },
3698        ]
3699    }
3700
3701    /// Calculate net income from a set of trial balance entries.
3702    ///
3703    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3704    fn calculate_net_income_from_tb(
3705        tb: &[datasynth_generators::TrialBalanceEntry],
3706    ) -> rust_decimal::Decimal {
3707        use rust_decimal::Decimal;
3708
3709        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3710        for entry in tb {
3711            let net = entry.debit_balance - entry.credit_balance;
3712            *aggregated.entry(entry.category.clone()).or_default() += net;
3713        }
3714
3715        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3716        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3717        let opex = *aggregated
3718            .get("OperatingExpenses")
3719            .unwrap_or(&Decimal::ZERO);
3720        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3721        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3722
3723        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3724        // other_income is typically negative (credit), other_expenses is typically positive
3725        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3726        let tax_rate = Decimal::new(25, 2); // 0.25
3727        let tax = operating_income * tax_rate;
3728        operating_income - tax
3729    }
3730
3731    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3732    ///
3733    /// Uses the first two digits of the account code to classify into the categories
3734    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3735    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3736    /// OperatingExpenses, OtherIncome, OtherExpenses.
3737    fn category_from_account_code(code: &str) -> String {
3738        let prefix: String = code.chars().take(2).collect();
3739        match prefix.as_str() {
3740            "10" => "Cash",
3741            "11" => "Receivables",
3742            "12" | "13" | "14" => "Inventory",
3743            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3744            "20" => "Payables",
3745            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3746            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3747            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3748            "40" | "41" | "42" | "43" | "44" => "Revenue",
3749            "50" | "51" | "52" => "CostOfSales",
3750            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3751                "OperatingExpenses"
3752            }
3753            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3754            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3755            _ => "OperatingExpenses",
3756        }
3757        .to_string()
3758    }
3759
3760    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3761    fn phase_hr_data(
3762        &mut self,
3763        stats: &mut EnhancedGenerationStatistics,
3764    ) -> SynthResult<HrSnapshot> {
3765        if !self.config.hr.enabled {
3766            debug!("Phase 16: Skipped (HR generation disabled)");
3767            return Ok(HrSnapshot::default());
3768        }
3769
3770        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3771
3772        let seed = self.seed;
3773        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3774            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3775        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3776        let company_code = self
3777            .config
3778            .companies
3779            .first()
3780            .map(|c| c.code.as_str())
3781            .unwrap_or("1000");
3782        let currency = self
3783            .config
3784            .companies
3785            .first()
3786            .map(|c| c.currency.as_str())
3787            .unwrap_or("USD");
3788
3789        let employee_ids: Vec<String> = self
3790            .master_data
3791            .employees
3792            .iter()
3793            .map(|e| e.employee_id.clone())
3794            .collect();
3795
3796        if employee_ids.is_empty() {
3797            debug!("Phase 16: Skipped (no employees available)");
3798            return Ok(HrSnapshot::default());
3799        }
3800
3801        // Extract cost-center pool from master data employees for cross-reference
3802        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3803        let cost_center_ids: Vec<String> = self
3804            .master_data
3805            .employees
3806            .iter()
3807            .filter_map(|e| e.cost_center.clone())
3808            .collect::<std::collections::HashSet<_>>()
3809            .into_iter()
3810            .collect();
3811
3812        let mut snapshot = HrSnapshot::default();
3813
3814        // Generate payroll runs (one per month)
3815        if self.config.hr.payroll.enabled {
3816            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3817                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3818
3819            // Look up country pack for payroll deductions and labels
3820            let payroll_pack = self.primary_pack();
3821
3822            // Store the pack on the generator so generate() resolves
3823            // localized deduction rates and labels from it.
3824            payroll_gen.set_country_pack(payroll_pack.clone());
3825
3826            let employees_with_salary: Vec<(
3827                String,
3828                rust_decimal::Decimal,
3829                Option<String>,
3830                Option<String>,
3831            )> = self
3832                .master_data
3833                .employees
3834                .iter()
3835                .map(|e| {
3836                    (
3837                        e.employee_id.clone(),
3838                        rust_decimal::Decimal::from(5000), // Default monthly salary
3839                        e.cost_center.clone(),
3840                        e.department_id.clone(),
3841                    )
3842                })
3843                .collect();
3844
3845            for month in 0..self.config.global.period_months {
3846                let period_start = start_date + chrono::Months::new(month);
3847                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3848                let (run, items) = payroll_gen.generate(
3849                    company_code,
3850                    &employees_with_salary,
3851                    period_start,
3852                    period_end,
3853                    currency,
3854                );
3855                snapshot.payroll_runs.push(run);
3856                snapshot.payroll_run_count += 1;
3857                snapshot.payroll_line_item_count += items.len();
3858                snapshot.payroll_line_items.extend(items);
3859            }
3860        }
3861
3862        // Generate time entries
3863        if self.config.hr.time_attendance.enabled {
3864            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3865                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3866            let entries = time_gen.generate(
3867                &employee_ids,
3868                start_date,
3869                end_date,
3870                &self.config.hr.time_attendance,
3871            );
3872            snapshot.time_entry_count = entries.len();
3873            snapshot.time_entries = entries;
3874        }
3875
3876        // Generate expense reports
3877        if self.config.hr.expenses.enabled {
3878            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3879                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3880            expense_gen.set_country_pack(self.primary_pack().clone());
3881            let company_currency = self
3882                .config
3883                .companies
3884                .first()
3885                .map(|c| c.currency.as_str())
3886                .unwrap_or("USD");
3887            let reports = expense_gen.generate_with_currency(
3888                &employee_ids,
3889                start_date,
3890                end_date,
3891                &self.config.hr.expenses,
3892                company_currency,
3893            );
3894            snapshot.expense_report_count = reports.len();
3895            snapshot.expense_reports = reports;
3896        }
3897
3898        // Generate benefit enrollments (gated on payroll, since benefits require employees)
3899        if self.config.hr.payroll.enabled {
3900            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
3901            let employee_pairs: Vec<(String, String)> = self
3902                .master_data
3903                .employees
3904                .iter()
3905                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
3906                .collect();
3907            let enrollments =
3908                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
3909            snapshot.benefit_enrollment_count = enrollments.len();
3910            snapshot.benefit_enrollments = enrollments;
3911        }
3912
3913        stats.payroll_run_count = snapshot.payroll_run_count;
3914        stats.time_entry_count = snapshot.time_entry_count;
3915        stats.expense_report_count = snapshot.expense_report_count;
3916        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
3917
3918        info!(
3919            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments",
3920            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3921            snapshot.time_entry_count, snapshot.expense_report_count,
3922            snapshot.benefit_enrollment_count
3923        );
3924        self.check_resources_with_log("post-hr")?;
3925
3926        Ok(snapshot)
3927    }
3928
3929    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3930    fn phase_accounting_standards(
3931        &mut self,
3932        stats: &mut EnhancedGenerationStatistics,
3933    ) -> SynthResult<AccountingStandardsSnapshot> {
3934        if !self.phase_config.generate_accounting_standards
3935            || !self.config.accounting_standards.enabled
3936        {
3937            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3938            return Ok(AccountingStandardsSnapshot::default());
3939        }
3940        info!("Phase 17: Generating Accounting Standards Data");
3941
3942        let seed = self.seed;
3943        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3944            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3945        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3946        let company_code = self
3947            .config
3948            .companies
3949            .first()
3950            .map(|c| c.code.as_str())
3951            .unwrap_or("1000");
3952        let currency = self
3953            .config
3954            .companies
3955            .first()
3956            .map(|c| c.currency.as_str())
3957            .unwrap_or("USD");
3958
3959        // Convert config framework to standards framework.
3960        // If the user explicitly set a framework in the YAML config, use that.
3961        // Otherwise, fall back to the country pack's accounting.framework field,
3962        // and if that is also absent or unrecognised, default to US GAAP.
3963        let framework = match self.config.accounting_standards.framework {
3964            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3965                datasynth_standards::framework::AccountingFramework::UsGaap
3966            }
3967            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3968                datasynth_standards::framework::AccountingFramework::Ifrs
3969            }
3970            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3971                datasynth_standards::framework::AccountingFramework::DualReporting
3972            }
3973            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3974                datasynth_standards::framework::AccountingFramework::FrenchGaap
3975            }
3976            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3977                datasynth_standards::framework::AccountingFramework::GermanGaap
3978            }
3979            None => {
3980                // Derive framework from the primary company's country pack
3981                let pack = self.primary_pack();
3982                let pack_fw = pack.accounting.framework.as_str();
3983                match pack_fw {
3984                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3985                    "dual_reporting" => {
3986                        datasynth_standards::framework::AccountingFramework::DualReporting
3987                    }
3988                    "french_gaap" => {
3989                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3990                    }
3991                    "german_gaap" | "hgb" => {
3992                        datasynth_standards::framework::AccountingFramework::GermanGaap
3993                    }
3994                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3995                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3996                }
3997            }
3998        };
3999
4000        let mut snapshot = AccountingStandardsSnapshot::default();
4001
4002        // Revenue recognition
4003        if self.config.accounting_standards.revenue_recognition.enabled {
4004            let customer_ids: Vec<String> = self
4005                .master_data
4006                .customers
4007                .iter()
4008                .map(|c| c.customer_id.clone())
4009                .collect();
4010
4011            if !customer_ids.is_empty() {
4012                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
4013                let contracts = rev_gen.generate(
4014                    company_code,
4015                    &customer_ids,
4016                    start_date,
4017                    end_date,
4018                    currency,
4019                    &self.config.accounting_standards.revenue_recognition,
4020                    framework,
4021                );
4022                snapshot.revenue_contract_count = contracts.len();
4023                snapshot.contracts = contracts;
4024            }
4025        }
4026
4027        // Impairment testing
4028        if self.config.accounting_standards.impairment.enabled {
4029            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
4030                .master_data
4031                .assets
4032                .iter()
4033                .map(|a| {
4034                    (
4035                        a.asset_id.clone(),
4036                        a.description.clone(),
4037                        a.acquisition_cost,
4038                    )
4039                })
4040                .collect();
4041
4042            if !asset_data.is_empty() {
4043                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
4044                let tests = imp_gen.generate(
4045                    company_code,
4046                    &asset_data,
4047                    end_date,
4048                    &self.config.accounting_standards.impairment,
4049                    framework,
4050                );
4051                snapshot.impairment_test_count = tests.len();
4052                snapshot.impairment_tests = tests;
4053            }
4054        }
4055
4056        stats.revenue_contract_count = snapshot.revenue_contract_count;
4057        stats.impairment_test_count = snapshot.impairment_test_count;
4058
4059        info!(
4060            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
4061            snapshot.revenue_contract_count, snapshot.impairment_test_count
4062        );
4063        self.check_resources_with_log("post-accounting-standards")?;
4064
4065        Ok(snapshot)
4066    }
4067
4068    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
4069    fn phase_manufacturing(
4070        &mut self,
4071        stats: &mut EnhancedGenerationStatistics,
4072    ) -> SynthResult<ManufacturingSnapshot> {
4073        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
4074            debug!("Phase 18: Skipped (manufacturing generation disabled)");
4075            return Ok(ManufacturingSnapshot::default());
4076        }
4077        info!("Phase 18: Generating Manufacturing Data");
4078
4079        let seed = self.seed;
4080        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4081            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4082        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4083        let company_code = self
4084            .config
4085            .companies
4086            .first()
4087            .map(|c| c.code.as_str())
4088            .unwrap_or("1000");
4089
4090        let material_data: Vec<(String, String)> = self
4091            .master_data
4092            .materials
4093            .iter()
4094            .map(|m| (m.material_id.clone(), m.description.clone()))
4095            .collect();
4096
4097        if material_data.is_empty() {
4098            debug!("Phase 18: Skipped (no materials available)");
4099            return Ok(ManufacturingSnapshot::default());
4100        }
4101
4102        let mut snapshot = ManufacturingSnapshot::default();
4103
4104        // Generate production orders
4105        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
4106        let production_orders = prod_gen.generate(
4107            company_code,
4108            &material_data,
4109            start_date,
4110            end_date,
4111            &self.config.manufacturing.production_orders,
4112            &self.config.manufacturing.costing,
4113            &self.config.manufacturing.routing,
4114        );
4115        snapshot.production_order_count = production_orders.len();
4116
4117        // Generate quality inspections from production orders
4118        let inspection_data: Vec<(String, String, String)> = production_orders
4119            .iter()
4120            .map(|po| {
4121                (
4122                    po.order_id.clone(),
4123                    po.material_id.clone(),
4124                    po.material_description.clone(),
4125                )
4126            })
4127            .collect();
4128
4129        snapshot.production_orders = production_orders;
4130
4131        if !inspection_data.is_empty() {
4132            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
4133            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
4134            snapshot.quality_inspection_count = inspections.len();
4135            snapshot.quality_inspections = inspections;
4136        }
4137
4138        // Generate cycle counts (one per month)
4139        let storage_locations: Vec<(String, String)> = material_data
4140            .iter()
4141            .enumerate()
4142            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
4143            .collect();
4144
4145        let employee_ids: Vec<String> = self
4146            .master_data
4147            .employees
4148            .iter()
4149            .map(|e| e.employee_id.clone())
4150            .collect();
4151        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
4152            .with_employee_pool(employee_ids);
4153        let mut cycle_count_total = 0usize;
4154        for month in 0..self.config.global.period_months {
4155            let count_date = start_date + chrono::Months::new(month);
4156            let items_per_count = storage_locations.len().clamp(10, 50);
4157            let cc = cc_gen.generate(
4158                company_code,
4159                &storage_locations,
4160                count_date,
4161                items_per_count,
4162            );
4163            snapshot.cycle_counts.push(cc);
4164            cycle_count_total += 1;
4165        }
4166        snapshot.cycle_count_count = cycle_count_total;
4167
4168        // Generate BOM components
4169        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
4170        let bom_components = bom_gen.generate(company_code, &material_data);
4171        snapshot.bom_component_count = bom_components.len();
4172        snapshot.bom_components = bom_components;
4173
4174        // Generate inventory movements
4175        let currency = self
4176            .config
4177            .companies
4178            .first()
4179            .map(|c| c.currency.as_str())
4180            .unwrap_or("USD");
4181        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
4182        let inventory_movements = inv_mov_gen.generate(
4183            company_code,
4184            &material_data,
4185            start_date,
4186            end_date,
4187            2,
4188            currency,
4189        );
4190        snapshot.inventory_movement_count = inventory_movements.len();
4191        snapshot.inventory_movements = inventory_movements;
4192
4193        stats.production_order_count = snapshot.production_order_count;
4194        stats.quality_inspection_count = snapshot.quality_inspection_count;
4195        stats.cycle_count_count = snapshot.cycle_count_count;
4196        stats.bom_component_count = snapshot.bom_component_count;
4197        stats.inventory_movement_count = snapshot.inventory_movement_count;
4198
4199        info!(
4200            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
4201            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
4202            snapshot.bom_component_count, snapshot.inventory_movement_count
4203        );
4204        self.check_resources_with_log("post-manufacturing")?;
4205
4206        Ok(snapshot)
4207    }
4208
4209    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
4210    fn phase_sales_kpi_budgets(
4211        &mut self,
4212        coa: &Arc<ChartOfAccounts>,
4213        financial_reporting: &FinancialReportingSnapshot,
4214        stats: &mut EnhancedGenerationStatistics,
4215    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
4216        if !self.phase_config.generate_sales_kpi_budgets {
4217            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4218            return Ok(SalesKpiBudgetsSnapshot::default());
4219        }
4220        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4221
4222        let seed = self.seed;
4223        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4224            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4225        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4226        let company_code = self
4227            .config
4228            .companies
4229            .first()
4230            .map(|c| c.code.as_str())
4231            .unwrap_or("1000");
4232
4233        let mut snapshot = SalesKpiBudgetsSnapshot::default();
4234
4235        // Sales Quotes
4236        if self.config.sales_quotes.enabled {
4237            let customer_data: Vec<(String, String)> = self
4238                .master_data
4239                .customers
4240                .iter()
4241                .map(|c| (c.customer_id.clone(), c.name.clone()))
4242                .collect();
4243            let material_data: Vec<(String, String)> = self
4244                .master_data
4245                .materials
4246                .iter()
4247                .map(|m| (m.material_id.clone(), m.description.clone()))
4248                .collect();
4249
4250            if !customer_data.is_empty() && !material_data.is_empty() {
4251                let employee_ids: Vec<String> = self
4252                    .master_data
4253                    .employees
4254                    .iter()
4255                    .map(|e| e.employee_id.clone())
4256                    .collect();
4257                let customer_ids: Vec<String> = self
4258                    .master_data
4259                    .customers
4260                    .iter()
4261                    .map(|c| c.customer_id.clone())
4262                    .collect();
4263                let company_currency = self
4264                    .config
4265                    .companies
4266                    .first()
4267                    .map(|c| c.currency.as_str())
4268                    .unwrap_or("USD");
4269
4270                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4271                    .with_pools(employee_ids, customer_ids);
4272                let quotes = quote_gen.generate_with_currency(
4273                    company_code,
4274                    &customer_data,
4275                    &material_data,
4276                    start_date,
4277                    end_date,
4278                    &self.config.sales_quotes,
4279                    company_currency,
4280                );
4281                snapshot.sales_quote_count = quotes.len();
4282                snapshot.sales_quotes = quotes;
4283            }
4284        }
4285
4286        // Management KPIs
4287        if self.config.financial_reporting.management_kpis.enabled {
4288            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4289            let mut kpis = kpi_gen.generate(
4290                company_code,
4291                start_date,
4292                end_date,
4293                &self.config.financial_reporting.management_kpis,
4294            );
4295
4296            // Override financial KPIs with actual data from financial statements
4297            {
4298                use rust_decimal::Decimal;
4299
4300                if let Some(income_stmt) =
4301                    financial_reporting.financial_statements.iter().find(|fs| {
4302                        fs.statement_type == StatementType::IncomeStatement
4303                            && fs.company_code == company_code
4304                    })
4305                {
4306                    // Extract revenue and COGS from income statement line items
4307                    let total_revenue: Decimal = income_stmt
4308                        .line_items
4309                        .iter()
4310                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
4311                        .map(|li| li.amount)
4312                        .sum();
4313                    let total_cogs: Decimal = income_stmt
4314                        .line_items
4315                        .iter()
4316                        .filter(|li| {
4317                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4318                                && !li.is_total
4319                        })
4320                        .map(|li| li.amount.abs())
4321                        .sum();
4322                    let total_opex: Decimal = income_stmt
4323                        .line_items
4324                        .iter()
4325                        .filter(|li| {
4326                            li.section.contains("Expense")
4327                                && !li.is_total
4328                                && !li.section.contains("Cost")
4329                        })
4330                        .map(|li| li.amount.abs())
4331                        .sum();
4332
4333                    if total_revenue > Decimal::ZERO {
4334                        let hundred = Decimal::from(100);
4335                        let gross_margin_pct =
4336                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4337                        let operating_income = total_revenue - total_cogs - total_opex;
4338                        let op_margin_pct =
4339                            (operating_income * hundred / total_revenue).round_dp(2);
4340
4341                        // Override gross margin and operating margin KPIs
4342                        for kpi in &mut kpis {
4343                            if kpi.name == "Gross Margin" {
4344                                kpi.value = gross_margin_pct;
4345                            } else if kpi.name == "Operating Margin" {
4346                                kpi.value = op_margin_pct;
4347                            }
4348                        }
4349                    }
4350                }
4351
4352                // Override Current Ratio from balance sheet
4353                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4354                    fs.statement_type == StatementType::BalanceSheet
4355                        && fs.company_code == company_code
4356                }) {
4357                    let current_assets: Decimal = bs
4358                        .line_items
4359                        .iter()
4360                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4361                        .map(|li| li.amount)
4362                        .sum();
4363                    let current_liabilities: Decimal = bs
4364                        .line_items
4365                        .iter()
4366                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4367                        .map(|li| li.amount.abs())
4368                        .sum();
4369
4370                    if current_liabilities > Decimal::ZERO {
4371                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4372                        for kpi in &mut kpis {
4373                            if kpi.name == "Current Ratio" {
4374                                kpi.value = current_ratio;
4375                            }
4376                        }
4377                    }
4378                }
4379            }
4380
4381            snapshot.kpi_count = kpis.len();
4382            snapshot.kpis = kpis;
4383        }
4384
4385        // Budgets
4386        if self.config.financial_reporting.budgets.enabled {
4387            let account_data: Vec<(String, String)> = coa
4388                .accounts
4389                .iter()
4390                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4391                .collect();
4392
4393            if !account_data.is_empty() {
4394                let fiscal_year = start_date.year() as u32;
4395                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4396                let budget = budget_gen.generate(
4397                    company_code,
4398                    fiscal_year,
4399                    &account_data,
4400                    &self.config.financial_reporting.budgets,
4401                );
4402                snapshot.budget_line_count = budget.line_items.len();
4403                snapshot.budgets.push(budget);
4404            }
4405        }
4406
4407        stats.sales_quote_count = snapshot.sales_quote_count;
4408        stats.kpi_count = snapshot.kpi_count;
4409        stats.budget_line_count = snapshot.budget_line_count;
4410
4411        info!(
4412            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4413            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4414        );
4415        self.check_resources_with_log("post-sales-kpi-budgets")?;
4416
4417        Ok(snapshot)
4418    }
4419
4420    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4421    fn phase_tax_generation(
4422        &mut self,
4423        document_flows: &DocumentFlowSnapshot,
4424        stats: &mut EnhancedGenerationStatistics,
4425    ) -> SynthResult<TaxSnapshot> {
4426        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4427            debug!("Phase 20: Skipped (tax generation disabled)");
4428            return Ok(TaxSnapshot::default());
4429        }
4430        info!("Phase 20: Generating Tax Data");
4431
4432        let seed = self.seed;
4433        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4434            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4435        let fiscal_year = start_date.year();
4436        let company_code = self
4437            .config
4438            .companies
4439            .first()
4440            .map(|c| c.code.as_str())
4441            .unwrap_or("1000");
4442
4443        let mut gen =
4444            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4445
4446        let pack = self.primary_pack().clone();
4447        let (jurisdictions, codes) =
4448            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4449
4450        // Generate tax provisions for each company
4451        let mut provisions = Vec::new();
4452        if self.config.tax.provisions.enabled {
4453            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4454            for company in &self.config.companies {
4455                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4456                let statutory_rate = rust_decimal::Decimal::new(
4457                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
4458                    2,
4459                );
4460                let provision = provision_gen.generate(
4461                    &company.code,
4462                    start_date,
4463                    pre_tax_income,
4464                    statutory_rate,
4465                );
4466                provisions.push(provision);
4467            }
4468        }
4469
4470        // Generate tax lines from document invoices
4471        let mut tax_lines = Vec::new();
4472        if !codes.is_empty() {
4473            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4474                datasynth_generators::TaxLineGeneratorConfig::default(),
4475                codes.clone(),
4476                seed + 72,
4477            );
4478
4479            // Tax lines from vendor invoices (input tax)
4480            // Use the first company's country as buyer country
4481            let buyer_country = self
4482                .config
4483                .companies
4484                .first()
4485                .map(|c| c.country.as_str())
4486                .unwrap_or("US");
4487            for vi in &document_flows.vendor_invoices {
4488                let lines = tax_line_gen.generate_for_document(
4489                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4490                    &vi.header.document_id,
4491                    buyer_country, // seller approx same country
4492                    buyer_country,
4493                    vi.payable_amount,
4494                    vi.header.document_date,
4495                    None,
4496                );
4497                tax_lines.extend(lines);
4498            }
4499
4500            // Tax lines from customer invoices (output tax)
4501            for ci in &document_flows.customer_invoices {
4502                let lines = tax_line_gen.generate_for_document(
4503                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4504                    &ci.header.document_id,
4505                    buyer_country, // seller is the company
4506                    buyer_country,
4507                    ci.total_gross_amount,
4508                    ci.header.document_date,
4509                    None,
4510                );
4511                tax_lines.extend(lines);
4512            }
4513        }
4514
4515        let snapshot = TaxSnapshot {
4516            jurisdiction_count: jurisdictions.len(),
4517            code_count: codes.len(),
4518            jurisdictions,
4519            codes,
4520            tax_provisions: provisions,
4521            tax_lines,
4522            tax_returns: Vec::new(),
4523            withholding_records: Vec::new(),
4524            tax_anomaly_labels: Vec::new(),
4525        };
4526
4527        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4528        stats.tax_code_count = snapshot.code_count;
4529        stats.tax_provision_count = snapshot.tax_provisions.len();
4530        stats.tax_line_count = snapshot.tax_lines.len();
4531
4532        info!(
4533            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4534            snapshot.jurisdiction_count,
4535            snapshot.code_count,
4536            snapshot.tax_provisions.len()
4537        );
4538        self.check_resources_with_log("post-tax")?;
4539
4540        Ok(snapshot)
4541    }
4542
4543    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4544    fn phase_esg_generation(
4545        &mut self,
4546        document_flows: &DocumentFlowSnapshot,
4547        stats: &mut EnhancedGenerationStatistics,
4548    ) -> SynthResult<EsgSnapshot> {
4549        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4550            debug!("Phase 21: Skipped (ESG generation disabled)");
4551            return Ok(EsgSnapshot::default());
4552        }
4553        info!("Phase 21: Generating ESG Data");
4554
4555        let seed = self.seed;
4556        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4557            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4558        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4559        let entity_id = self
4560            .config
4561            .companies
4562            .first()
4563            .map(|c| c.code.as_str())
4564            .unwrap_or("1000");
4565
4566        let esg_cfg = &self.config.esg;
4567        let mut snapshot = EsgSnapshot::default();
4568
4569        // Energy consumption (feeds into scope 1 & 2 emissions)
4570        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4571            esg_cfg.environmental.energy.clone(),
4572            seed + 80,
4573        );
4574        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4575
4576        // Water usage
4577        let facility_count = esg_cfg.environmental.energy.facility_count;
4578        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4579        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4580
4581        // Waste
4582        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4583            seed + 82,
4584            esg_cfg.environmental.waste.diversion_target,
4585            facility_count,
4586        );
4587        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4588
4589        // Emissions (scope 1, 2, 3)
4590        let mut emission_gen =
4591            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4592
4593        // Build EnergyInput from energy_records
4594        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4595            .iter()
4596            .map(|e| datasynth_generators::EnergyInput {
4597                facility_id: e.facility_id.clone(),
4598                energy_type: match e.energy_source {
4599                    EnergySourceType::NaturalGas => {
4600                        datasynth_generators::EnergyInputType::NaturalGas
4601                    }
4602                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4603                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4604                    _ => datasynth_generators::EnergyInputType::Electricity,
4605                },
4606                consumption_kwh: e.consumption_kwh,
4607                period: e.period,
4608            })
4609            .collect();
4610
4611        let mut emissions = Vec::new();
4612        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4613        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4614
4615        // Scope 3: use vendor spend data from actual payments
4616        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4617            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4618            for payment in &document_flows.payments {
4619                if payment.is_vendor {
4620                    *totals
4621                        .entry(payment.business_partner_id.clone())
4622                        .or_default() += payment.amount;
4623                }
4624            }
4625            totals
4626        };
4627        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4628            .master_data
4629            .vendors
4630            .iter()
4631            .map(|v| {
4632                let spend = vendor_payment_totals
4633                    .get(&v.vendor_id)
4634                    .copied()
4635                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4636                datasynth_generators::VendorSpendInput {
4637                    vendor_id: v.vendor_id.clone(),
4638                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4639                    spend,
4640                    country: v.country.clone(),
4641                }
4642            })
4643            .collect();
4644        if !vendor_spend.is_empty() {
4645            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4646                entity_id,
4647                &vendor_spend,
4648                start_date,
4649                end_date,
4650            ));
4651        }
4652
4653        // Business travel & commuting (scope 3)
4654        let headcount = self.master_data.employees.len() as u32;
4655        if headcount > 0 {
4656            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4657            emissions.extend(emission_gen.generate_scope3_business_travel(
4658                entity_id,
4659                travel_spend,
4660                start_date,
4661            ));
4662            emissions
4663                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4664        }
4665
4666        snapshot.emission_count = emissions.len();
4667        snapshot.emissions = emissions;
4668        snapshot.energy = energy_records;
4669
4670        // Social: Workforce diversity, pay equity, safety
4671        let mut workforce_gen =
4672            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4673        let total_headcount = headcount.max(100);
4674        snapshot.diversity =
4675            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4676        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4677        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4678            entity_id,
4679            facility_count,
4680            start_date,
4681            end_date,
4682        );
4683
4684        // Compute safety metrics
4685        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4686        let safety_metric = workforce_gen.compute_safety_metrics(
4687            entity_id,
4688            &snapshot.safety_incidents,
4689            total_hours,
4690            start_date,
4691        );
4692        snapshot.safety_metrics = vec![safety_metric];
4693
4694        // Governance
4695        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4696            seed + 85,
4697            esg_cfg.governance.board_size,
4698            esg_cfg.governance.independence_target,
4699        );
4700        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4701
4702        // Supplier ESG assessments
4703        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4704            esg_cfg.supply_chain_esg.clone(),
4705            seed + 86,
4706        );
4707        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4708            .master_data
4709            .vendors
4710            .iter()
4711            .map(|v| datasynth_generators::VendorInput {
4712                vendor_id: v.vendor_id.clone(),
4713                country: v.country.clone(),
4714                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4715                quality_score: None,
4716            })
4717            .collect();
4718        snapshot.supplier_assessments =
4719            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4720
4721        // Disclosures
4722        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4723            seed + 87,
4724            esg_cfg.reporting.clone(),
4725            esg_cfg.climate_scenarios.clone(),
4726        );
4727        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4728        snapshot.disclosures = disclosure_gen.generate_disclosures(
4729            entity_id,
4730            &snapshot.materiality,
4731            start_date,
4732            end_date,
4733        );
4734        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4735        snapshot.disclosure_count = snapshot.disclosures.len();
4736
4737        // Anomaly injection
4738        if esg_cfg.anomaly_rate > 0.0 {
4739            let mut anomaly_injector =
4740                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4741            let mut labels = Vec::new();
4742            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4743            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4744            labels.extend(
4745                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4746            );
4747            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4748            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4749            snapshot.anomaly_labels = labels;
4750        }
4751
4752        stats.esg_emission_count = snapshot.emission_count;
4753        stats.esg_disclosure_count = snapshot.disclosure_count;
4754
4755        info!(
4756            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4757            snapshot.emission_count,
4758            snapshot.disclosure_count,
4759            snapshot.supplier_assessments.len()
4760        );
4761        self.check_resources_with_log("post-esg")?;
4762
4763        Ok(snapshot)
4764    }
4765
4766    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
4767    fn phase_treasury_data(
4768        &mut self,
4769        document_flows: &DocumentFlowSnapshot,
4770        subledger: &SubledgerSnapshot,
4771        intercompany: &IntercompanySnapshot,
4772        stats: &mut EnhancedGenerationStatistics,
4773    ) -> SynthResult<TreasurySnapshot> {
4774        if !self.config.treasury.enabled {
4775            debug!("Phase 22: Skipped (treasury generation disabled)");
4776            return Ok(TreasurySnapshot::default());
4777        }
4778        info!("Phase 22: Generating Treasury Data");
4779
4780        let seed = self.seed;
4781        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4782            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4783        let currency = self
4784            .config
4785            .companies
4786            .first()
4787            .map(|c| c.currency.as_str())
4788            .unwrap_or("USD");
4789        let entity_id = self
4790            .config
4791            .companies
4792            .first()
4793            .map(|c| c.code.as_str())
4794            .unwrap_or("1000");
4795
4796        let mut snapshot = TreasurySnapshot::default();
4797
4798        // Generate debt instruments
4799        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4800            self.config.treasury.debt.clone(),
4801            seed + 90,
4802        );
4803        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4804
4805        // Generate hedging instruments (IR swaps for floating-rate debt)
4806        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4807            self.config.treasury.hedging.clone(),
4808            seed + 91,
4809        );
4810        for debt in &snapshot.debt_instruments {
4811            if debt.rate_type == InterestRateType::Variable {
4812                let swap = hedge_gen.generate_ir_swap(
4813                    currency,
4814                    debt.principal,
4815                    debt.origination_date,
4816                    debt.maturity_date,
4817                );
4818                snapshot.hedging_instruments.push(swap);
4819            }
4820        }
4821
4822        // Build FX exposures from foreign-currency payments and generate
4823        // FX forwards + hedge relationship designations via generate() API.
4824        {
4825            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4826            for payment in &document_flows.payments {
4827                if payment.currency != currency {
4828                    let entry = fx_map
4829                        .entry(payment.currency.clone())
4830                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4831                    entry.0 += payment.amount;
4832                    // Use the latest settlement date among grouped payments
4833                    if payment.header.document_date > entry.1 {
4834                        entry.1 = payment.header.document_date;
4835                    }
4836                }
4837            }
4838            if !fx_map.is_empty() {
4839                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4840                    .into_iter()
4841                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4842                        datasynth_generators::treasury::FxExposure {
4843                            currency_pair: format!("{foreign_ccy}/{currency}"),
4844                            foreign_currency: foreign_ccy,
4845                            net_amount,
4846                            settlement_date,
4847                            description: "AP payment FX exposure".to_string(),
4848                        }
4849                    })
4850                    .collect();
4851                let (fx_instruments, fx_relationships) =
4852                    hedge_gen.generate(start_date, &fx_exposures);
4853                snapshot.hedging_instruments.extend(fx_instruments);
4854                snapshot.hedge_relationships.extend(fx_relationships);
4855            }
4856        }
4857
4858        // Inject anomalies if configured
4859        if self.config.treasury.anomaly_rate > 0.0 {
4860            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4861                seed + 92,
4862                self.config.treasury.anomaly_rate,
4863            );
4864            let mut labels = Vec::new();
4865            labels.extend(
4866                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4867            );
4868            snapshot.treasury_anomaly_labels = labels;
4869        }
4870
4871        // Generate cash positions from payment flows
4872        if self.config.treasury.cash_positioning.enabled {
4873            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4874
4875            // AP payments as outflows
4876            for payment in &document_flows.payments {
4877                cash_flows.push(datasynth_generators::treasury::CashFlow {
4878                    date: payment.header.document_date,
4879                    account_id: format!("{entity_id}-MAIN"),
4880                    amount: payment.amount,
4881                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4882                });
4883            }
4884
4885            // Customer receipts (from O2C chains) as inflows
4886            for chain in &document_flows.o2c_chains {
4887                if let Some(ref receipt) = chain.customer_receipt {
4888                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4889                        date: receipt.header.document_date,
4890                        account_id: format!("{entity_id}-MAIN"),
4891                        amount: receipt.amount,
4892                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4893                    });
4894                }
4895                // Remainder receipts (follow-up to partial payments)
4896                for receipt in &chain.remainder_receipts {
4897                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4898                        date: receipt.header.document_date,
4899                        account_id: format!("{entity_id}-MAIN"),
4900                        amount: receipt.amount,
4901                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4902                    });
4903                }
4904            }
4905
4906            if !cash_flows.is_empty() {
4907                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4908                    self.config.treasury.cash_positioning.clone(),
4909                    seed + 93,
4910                );
4911                let account_id = format!("{entity_id}-MAIN");
4912                snapshot.cash_positions = cash_gen.generate(
4913                    entity_id,
4914                    &account_id,
4915                    currency,
4916                    &cash_flows,
4917                    start_date,
4918                    start_date + chrono::Months::new(self.config.global.period_months),
4919                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4920                );
4921            }
4922        }
4923
4924        // Generate cash forecasts from AR/AP aging
4925        if self.config.treasury.cash_forecasting.enabled {
4926            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4927
4928            // Build AR aging items from subledger AR invoices
4929            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
4930                .ar_invoices
4931                .iter()
4932                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4933                .map(|inv| {
4934                    let days_past_due = if inv.due_date < end_date {
4935                        (end_date - inv.due_date).num_days().max(0) as u32
4936                    } else {
4937                        0
4938                    };
4939                    datasynth_generators::treasury::ArAgingItem {
4940                        expected_date: inv.due_date,
4941                        amount: inv.amount_remaining,
4942                        days_past_due,
4943                        document_id: inv.invoice_number.clone(),
4944                    }
4945                })
4946                .collect();
4947
4948            // Build AP aging items from subledger AP invoices
4949            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
4950                .ap_invoices
4951                .iter()
4952                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4953                .map(|inv| datasynth_generators::treasury::ApAgingItem {
4954                    payment_date: inv.due_date,
4955                    amount: inv.amount_remaining,
4956                    document_id: inv.invoice_number.clone(),
4957                })
4958                .collect();
4959
4960            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
4961                self.config.treasury.cash_forecasting.clone(),
4962                seed + 94,
4963            );
4964            let forecast = forecast_gen.generate(
4965                entity_id,
4966                currency,
4967                end_date,
4968                &ar_items,
4969                &ap_items,
4970                &[], // scheduled disbursements - empty for now
4971            );
4972            snapshot.cash_forecasts.push(forecast);
4973        }
4974
4975        // Generate cash pools and sweeps
4976        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
4977            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4978            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
4979                self.config.treasury.cash_pooling.clone(),
4980                seed + 95,
4981            );
4982
4983            // Create a pool from available accounts
4984            let account_ids: Vec<String> = snapshot
4985                .cash_positions
4986                .iter()
4987                .map(|cp| cp.bank_account_id.clone())
4988                .collect::<std::collections::HashSet<_>>()
4989                .into_iter()
4990                .collect();
4991
4992            if let Some(pool) =
4993                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
4994            {
4995                // Generate sweeps - build participant balances from last cash position per account
4996                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4997                for cp in &snapshot.cash_positions {
4998                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
4999                }
5000
5001                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
5002                    latest_balances
5003                        .into_iter()
5004                        .filter(|(id, _)| pool.participant_accounts.contains(id))
5005                        .map(
5006                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
5007                                account_id: id,
5008                                balance,
5009                            },
5010                        )
5011                        .collect();
5012
5013                let sweeps =
5014                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
5015                snapshot.cash_pool_sweeps = sweeps;
5016                snapshot.cash_pools.push(pool);
5017            }
5018        }
5019
5020        // Generate bank guarantees
5021        if self.config.treasury.bank_guarantees.enabled {
5022            let vendor_names: Vec<String> = self
5023                .master_data
5024                .vendors
5025                .iter()
5026                .map(|v| v.name.clone())
5027                .collect();
5028            if !vendor_names.is_empty() {
5029                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
5030                    self.config.treasury.bank_guarantees.clone(),
5031                    seed + 96,
5032                );
5033                snapshot.bank_guarantees =
5034                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
5035            }
5036        }
5037
5038        // Generate netting runs from intercompany matched pairs
5039        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
5040            let entity_ids: Vec<String> = self
5041                .config
5042                .companies
5043                .iter()
5044                .map(|c| c.code.clone())
5045                .collect();
5046            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
5047                .matched_pairs
5048                .iter()
5049                .map(|mp| {
5050                    (
5051                        mp.seller_company.clone(),
5052                        mp.buyer_company.clone(),
5053                        mp.amount,
5054                    )
5055                })
5056                .collect();
5057            if entity_ids.len() >= 2 {
5058                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
5059                    self.config.treasury.netting.clone(),
5060                    seed + 97,
5061                );
5062                snapshot.netting_runs = netting_gen.generate(
5063                    &entity_ids,
5064                    currency,
5065                    start_date,
5066                    self.config.global.period_months,
5067                    &ic_amounts,
5068                );
5069            }
5070        }
5071
5072        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
5073        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
5074        stats.cash_position_count = snapshot.cash_positions.len();
5075        stats.cash_forecast_count = snapshot.cash_forecasts.len();
5076        stats.cash_pool_count = snapshot.cash_pools.len();
5077
5078        info!(
5079            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
5080            snapshot.debt_instruments.len(),
5081            snapshot.hedging_instruments.len(),
5082            snapshot.cash_positions.len(),
5083            snapshot.cash_forecasts.len(),
5084            snapshot.cash_pools.len(),
5085            snapshot.bank_guarantees.len(),
5086            snapshot.netting_runs.len(),
5087        );
5088        self.check_resources_with_log("post-treasury")?;
5089
5090        Ok(snapshot)
5091    }
5092
5093    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
5094    fn phase_project_accounting(
5095        &mut self,
5096        document_flows: &DocumentFlowSnapshot,
5097        hr: &HrSnapshot,
5098        stats: &mut EnhancedGenerationStatistics,
5099    ) -> SynthResult<ProjectAccountingSnapshot> {
5100        if !self.config.project_accounting.enabled {
5101            debug!("Phase 23: Skipped (project accounting disabled)");
5102            return Ok(ProjectAccountingSnapshot::default());
5103        }
5104        info!("Phase 23: Generating Project Accounting Data");
5105
5106        let seed = self.seed;
5107        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5108            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5109        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5110        let company_code = self
5111            .config
5112            .companies
5113            .first()
5114            .map(|c| c.code.as_str())
5115            .unwrap_or("1000");
5116
5117        let mut snapshot = ProjectAccountingSnapshot::default();
5118
5119        // Generate projects with WBS hierarchies
5120        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
5121            self.config.project_accounting.clone(),
5122            seed + 95,
5123        );
5124        let pool = project_gen.generate(company_code, start_date, end_date);
5125        snapshot.projects = pool.projects.clone();
5126
5127        // Link source documents to projects for cost allocation
5128        {
5129            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
5130                Vec::new();
5131
5132            // Time entries
5133            for te in &hr.time_entries {
5134                let total_hours = te.hours_regular + te.hours_overtime;
5135                if total_hours > 0.0 {
5136                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5137                        id: te.entry_id.clone(),
5138                        entity_id: company_code.to_string(),
5139                        date: te.date,
5140                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
5141                            .unwrap_or(rust_decimal::Decimal::ZERO),
5142                        source_type: CostSourceType::TimeEntry,
5143                        hours: Some(
5144                            rust_decimal::Decimal::from_f64_retain(total_hours)
5145                                .unwrap_or(rust_decimal::Decimal::ZERO),
5146                        ),
5147                    });
5148                }
5149            }
5150
5151            // Expense reports
5152            for er in &hr.expense_reports {
5153                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5154                    id: er.report_id.clone(),
5155                    entity_id: company_code.to_string(),
5156                    date: er.submission_date,
5157                    amount: er.total_amount,
5158                    source_type: CostSourceType::ExpenseReport,
5159                    hours: None,
5160                });
5161            }
5162
5163            // Purchase orders
5164            for po in &document_flows.purchase_orders {
5165                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5166                    id: po.header.document_id.clone(),
5167                    entity_id: company_code.to_string(),
5168                    date: po.header.document_date,
5169                    amount: po.total_net_amount,
5170                    source_type: CostSourceType::PurchaseOrder,
5171                    hours: None,
5172                });
5173            }
5174
5175            // Vendor invoices
5176            for vi in &document_flows.vendor_invoices {
5177                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5178                    id: vi.header.document_id.clone(),
5179                    entity_id: company_code.to_string(),
5180                    date: vi.header.document_date,
5181                    amount: vi.payable_amount,
5182                    source_type: CostSourceType::VendorInvoice,
5183                    hours: None,
5184                });
5185            }
5186
5187            if !source_docs.is_empty() && !pool.projects.is_empty() {
5188                let mut cost_gen =
5189                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
5190                        self.config.project_accounting.cost_allocation.clone(),
5191                        seed + 99,
5192                    );
5193                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
5194            }
5195        }
5196
5197        // Generate change orders
5198        if self.config.project_accounting.change_orders.enabled {
5199            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
5200                self.config.project_accounting.change_orders.clone(),
5201                seed + 96,
5202            );
5203            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
5204        }
5205
5206        // Generate milestones
5207        if self.config.project_accounting.milestones.enabled {
5208            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
5209                self.config.project_accounting.milestones.clone(),
5210                seed + 97,
5211            );
5212            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
5213        }
5214
5215        // Generate earned value metrics (needs cost lines, so only if we have projects)
5216        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
5217            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
5218                self.config.project_accounting.earned_value.clone(),
5219                seed + 98,
5220            );
5221            snapshot.earned_value_metrics =
5222                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
5223        }
5224
5225        stats.project_count = snapshot.projects.len();
5226        stats.project_change_order_count = snapshot.change_orders.len();
5227        stats.project_cost_line_count = snapshot.cost_lines.len();
5228
5229        info!(
5230            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
5231            snapshot.projects.len(),
5232            snapshot.change_orders.len(),
5233            snapshot.milestones.len(),
5234            snapshot.earned_value_metrics.len()
5235        );
5236        self.check_resources_with_log("post-project-accounting")?;
5237
5238        Ok(snapshot)
5239    }
5240
5241    /// Phase 24: Generate process evolution and organizational events.
5242    fn phase_evolution_events(
5243        &mut self,
5244        stats: &mut EnhancedGenerationStatistics,
5245    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
5246        if !self.phase_config.generate_evolution_events {
5247            debug!("Phase 24: Skipped (evolution events disabled)");
5248            return Ok((Vec::new(), Vec::new()));
5249        }
5250        info!("Phase 24: Generating Process Evolution + Organizational Events");
5251
5252        let seed = self.seed;
5253        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5254            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5255        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5256
5257        // Process evolution events
5258        let mut proc_gen =
5259            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
5260                seed + 100,
5261            );
5262        let process_events = proc_gen.generate_events(start_date, end_date);
5263
5264        // Organizational events
5265        let company_codes: Vec<String> = self
5266            .config
5267            .companies
5268            .iter()
5269            .map(|c| c.code.clone())
5270            .collect();
5271        let mut org_gen =
5272            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
5273                seed + 101,
5274            );
5275        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
5276
5277        stats.process_evolution_event_count = process_events.len();
5278        stats.organizational_event_count = org_events.len();
5279
5280        info!(
5281            "Evolution events generated: {} process evolution, {} organizational",
5282            process_events.len(),
5283            org_events.len()
5284        );
5285        self.check_resources_with_log("post-evolution-events")?;
5286
5287        Ok((process_events, org_events))
5288    }
5289
5290    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
5291    /// data recovery, and regulatory changes).
5292    fn phase_disruption_events(
5293        &self,
5294        stats: &mut EnhancedGenerationStatistics,
5295    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
5296        if !self.config.organizational_events.enabled {
5297            debug!("Phase 24b: Skipped (organizational events disabled)");
5298            return Ok(Vec::new());
5299        }
5300        info!("Phase 24b: Generating Disruption Events");
5301
5302        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5303            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5304        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5305
5306        let company_codes: Vec<String> = self
5307            .config
5308            .companies
5309            .iter()
5310            .map(|c| c.code.clone())
5311            .collect();
5312
5313        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
5314        let events = gen.generate(start_date, end_date, &company_codes);
5315
5316        stats.disruption_event_count = events.len();
5317        info!("Disruption events generated: {} events", events.len());
5318        self.check_resources_with_log("post-disruption-events")?;
5319
5320        Ok(events)
5321    }
5322
5323    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
5324    ///
5325    /// Produces paired examples where each pair contains the original clean JE
5326    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
5327    /// split transaction). Useful for training anomaly detection models with
5328    /// known ground truth.
5329    fn phase_counterfactuals(
5330        &self,
5331        journal_entries: &[JournalEntry],
5332        stats: &mut EnhancedGenerationStatistics,
5333    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
5334        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
5335            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
5336            return Ok(Vec::new());
5337        }
5338        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
5339
5340        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
5341
5342        let mut gen = CounterfactualGenerator::new(self.seed + 110);
5343
5344        // Rotating set of specs to produce diverse mutation types
5345        let specs = [
5346            CounterfactualSpec::ScaleAmount { factor: 2.5 },
5347            CounterfactualSpec::ShiftDate { days: -14 },
5348            CounterfactualSpec::SelfApprove,
5349            CounterfactualSpec::SplitTransaction { split_count: 3 },
5350        ];
5351
5352        let pairs: Vec<_> = journal_entries
5353            .iter()
5354            .enumerate()
5355            .map(|(i, je)| {
5356                let spec = &specs[i % specs.len()];
5357                gen.generate(je, spec)
5358            })
5359            .collect();
5360
5361        stats.counterfactual_pair_count = pairs.len();
5362        info!(
5363            "Counterfactual pairs generated: {} pairs from {} journal entries",
5364            pairs.len(),
5365            journal_entries.len()
5366        );
5367        self.check_resources_with_log("post-counterfactuals")?;
5368
5369        Ok(pairs)
5370    }
5371
5372    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
5373    ///
5374    /// Uses the anomaly labels (from Phase 8) to determine which documents are
5375    /// fraudulent, then generates probabilistic red flags on all chain documents.
5376    /// Non-fraud documents also receive red flags at a lower rate (false positives)
5377    /// to produce realistic ML training data.
5378    fn phase_red_flags(
5379        &self,
5380        anomaly_labels: &AnomalyLabels,
5381        document_flows: &DocumentFlowSnapshot,
5382        stats: &mut EnhancedGenerationStatistics,
5383    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
5384        if !self.config.fraud.enabled {
5385            debug!("Phase 26: Skipped (fraud generation disabled)");
5386            return Ok(Vec::new());
5387        }
5388        info!("Phase 26: Generating Fraud Red-Flag Indicators");
5389
5390        use datasynth_generators::fraud::RedFlagGenerator;
5391
5392        let generator = RedFlagGenerator::new();
5393        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
5394
5395        // Build a set of document IDs that are known-fraudulent from anomaly labels.
5396        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
5397            .labels
5398            .iter()
5399            .filter(|label| label.anomaly_type.is_intentional())
5400            .map(|label| label.document_id.as_str())
5401            .collect();
5402
5403        let mut flags = Vec::new();
5404
5405        // Iterate P2P chains: use the purchase order document ID as the chain key.
5406        for chain in &document_flows.p2p_chains {
5407            let doc_id = &chain.purchase_order.header.document_id;
5408            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5409            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5410        }
5411
5412        // Iterate O2C chains: use the sales order document ID as the chain key.
5413        for chain in &document_flows.o2c_chains {
5414            let doc_id = &chain.sales_order.header.document_id;
5415            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5416            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5417        }
5418
5419        stats.red_flag_count = flags.len();
5420        info!(
5421            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
5422            flags.len(),
5423            document_flows.p2p_chains.len(),
5424            document_flows.o2c_chains.len(),
5425            fraud_doc_ids.len()
5426        );
5427        self.check_resources_with_log("post-red-flags")?;
5428
5429        Ok(flags)
5430    }
5431
5432    /// Phase 26b: Generate collusion rings from employee/vendor pools.
5433    ///
5434    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
5435    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
5436    /// advance them over the simulation period.
5437    fn phase_collusion_rings(
5438        &mut self,
5439        stats: &mut EnhancedGenerationStatistics,
5440    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
5441        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
5442            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
5443            return Ok(Vec::new());
5444        }
5445        info!("Phase 26b: Generating Collusion Rings");
5446
5447        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5448            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5449        let months = self.config.global.period_months;
5450
5451        let employee_ids: Vec<String> = self
5452            .master_data
5453            .employees
5454            .iter()
5455            .map(|e| e.employee_id.clone())
5456            .collect();
5457        let vendor_ids: Vec<String> = self
5458            .master_data
5459            .vendors
5460            .iter()
5461            .map(|v| v.vendor_id.clone())
5462            .collect();
5463
5464        let mut generator =
5465            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
5466        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
5467
5468        stats.collusion_ring_count = rings.len();
5469        info!(
5470            "Collusion rings generated: {} rings, total members: {}",
5471            rings.len(),
5472            rings
5473                .iter()
5474                .map(datasynth_generators::fraud::CollusionRing::size)
5475                .sum::<usize>()
5476        );
5477        self.check_resources_with_log("post-collusion-rings")?;
5478
5479        Ok(rings)
5480    }
5481
5482    /// Phase 27: Generate bi-temporal version chains for vendor entities.
5483    ///
5484    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
5485    /// master data changes over time, supporting bi-temporal audit queries.
5486    fn phase_temporal_attributes(
5487        &mut self,
5488        stats: &mut EnhancedGenerationStatistics,
5489    ) -> SynthResult<
5490        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
5491    > {
5492        if !self.config.temporal_attributes.enabled {
5493            debug!("Phase 27: Skipped (temporal attributes disabled)");
5494            return Ok(Vec::new());
5495        }
5496        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
5497
5498        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5499            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5500
5501        // Build a TemporalAttributeConfig from the user's config.
5502        // Since Phase 27 is already gated on temporal_attributes.enabled,
5503        // default to enabling version chains so users get actual mutations.
5504        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
5505            || self.config.temporal_attributes.enabled;
5506        let temporal_config = {
5507            let ta = &self.config.temporal_attributes;
5508            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
5509                .enabled(ta.enabled)
5510                .closed_probability(ta.valid_time.closed_probability)
5511                .avg_validity_days(ta.valid_time.avg_validity_days)
5512                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
5513                .with_version_chains(if generate_version_chains {
5514                    ta.avg_versions_per_entity
5515                } else {
5516                    1.0
5517                })
5518                .build()
5519        };
5520        // Apply backdating settings if configured
5521        let temporal_config = if self
5522            .config
5523            .temporal_attributes
5524            .transaction_time
5525            .allow_backdating
5526        {
5527            let mut c = temporal_config;
5528            c.transaction_time.allow_backdating = true;
5529            c.transaction_time.backdating_probability = self
5530                .config
5531                .temporal_attributes
5532                .transaction_time
5533                .backdating_probability;
5534            c.transaction_time.max_backdate_days = self
5535                .config
5536                .temporal_attributes
5537                .transaction_time
5538                .max_backdate_days;
5539            c
5540        } else {
5541            temporal_config
5542        };
5543        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
5544            temporal_config,
5545            self.seed + 130,
5546            start_date,
5547        );
5548
5549        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
5550            self.seed + 130,
5551            datasynth_core::GeneratorType::Vendor,
5552        );
5553
5554        let chains: Vec<_> = self
5555            .master_data
5556            .vendors
5557            .iter()
5558            .map(|vendor| {
5559                let id = uuid_factory.next();
5560                gen.generate_version_chain(vendor.clone(), id)
5561            })
5562            .collect();
5563
5564        stats.temporal_version_chain_count = chains.len();
5565        info!("Temporal version chains generated: {} chains", chains.len());
5566        self.check_resources_with_log("post-temporal-attributes")?;
5567
5568        Ok(chains)
5569    }
5570
5571    /// Phase 28: Build entity relationship graph and cross-process links.
5572    ///
5573    /// Part 1 (gated on `relationship_strength.enabled`): builds an
5574    /// `EntityGraph` from master-data vendor/customer entities and
5575    /// journal-entry-derived transaction summaries.
5576    ///
5577    /// Part 2 (gated on `cross_process_links.enabled`): extracts
5578    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
5579    /// generates inventory-movement cross-process links.
5580    fn phase_entity_relationships(
5581        &self,
5582        journal_entries: &[JournalEntry],
5583        document_flows: &DocumentFlowSnapshot,
5584        stats: &mut EnhancedGenerationStatistics,
5585    ) -> SynthResult<(
5586        Option<datasynth_core::models::EntityGraph>,
5587        Vec<datasynth_core::models::CrossProcessLink>,
5588    )> {
5589        use datasynth_generators::relationships::{
5590            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
5591            TransactionSummary,
5592        };
5593
5594        let rs_enabled = self.config.relationship_strength.enabled;
5595        let cpl_enabled = self.config.cross_process_links.enabled
5596            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
5597
5598        if !rs_enabled && !cpl_enabled {
5599            debug!(
5600                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
5601            );
5602            return Ok((None, Vec::new()));
5603        }
5604
5605        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
5606
5607        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5608            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5609
5610        let company_code = self
5611            .config
5612            .companies
5613            .first()
5614            .map(|c| c.code.as_str())
5615            .unwrap_or("1000");
5616
5617        // Build the generator with matching config flags
5618        let gen_config = EntityGraphConfig {
5619            enabled: rs_enabled,
5620            cross_process: datasynth_generators::relationships::CrossProcessConfig {
5621                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
5622                enable_return_flows: false,
5623                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
5624                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
5625                // Use higher link rate for small datasets to avoid probabilistic empty results
5626                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
5627                    1.0
5628                } else {
5629                    0.30
5630                },
5631                ..Default::default()
5632            },
5633            strength_config: datasynth_generators::relationships::StrengthConfig {
5634                transaction_volume_weight: self
5635                    .config
5636                    .relationship_strength
5637                    .calculation
5638                    .transaction_volume_weight,
5639                transaction_count_weight: self
5640                    .config
5641                    .relationship_strength
5642                    .calculation
5643                    .transaction_count_weight,
5644                duration_weight: self
5645                    .config
5646                    .relationship_strength
5647                    .calculation
5648                    .relationship_duration_weight,
5649                recency_weight: self.config.relationship_strength.calculation.recency_weight,
5650                mutual_connections_weight: self
5651                    .config
5652                    .relationship_strength
5653                    .calculation
5654                    .mutual_connections_weight,
5655                recency_half_life_days: self
5656                    .config
5657                    .relationship_strength
5658                    .calculation
5659                    .recency_half_life_days,
5660            },
5661            ..Default::default()
5662        };
5663
5664        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
5665
5666        // --- Part 1: Entity Relationship Graph ---
5667        let entity_graph = if rs_enabled {
5668            // Build EntitySummary lists from master data
5669            let vendor_summaries: Vec<EntitySummary> = self
5670                .master_data
5671                .vendors
5672                .iter()
5673                .map(|v| {
5674                    EntitySummary::new(
5675                        &v.vendor_id,
5676                        &v.name,
5677                        datasynth_core::models::GraphEntityType::Vendor,
5678                        start_date,
5679                    )
5680                })
5681                .collect();
5682
5683            let customer_summaries: Vec<EntitySummary> = self
5684                .master_data
5685                .customers
5686                .iter()
5687                .map(|c| {
5688                    EntitySummary::new(
5689                        &c.customer_id,
5690                        &c.name,
5691                        datasynth_core::models::GraphEntityType::Customer,
5692                        start_date,
5693                    )
5694                })
5695                .collect();
5696
5697            // Build transaction summaries from journal entries.
5698            // Key = (company_code, trading_partner) for entries that have a
5699            // trading partner.  This captures intercompany flows and any JE
5700            // whose line items carry a trading_partner reference.
5701            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
5702                std::collections::HashMap::new();
5703
5704            for je in journal_entries {
5705                let cc = je.header.company_code.clone();
5706                let posting_date = je.header.posting_date;
5707                for line in &je.lines {
5708                    if let Some(ref tp) = line.trading_partner {
5709                        let amount = if line.debit_amount > line.credit_amount {
5710                            line.debit_amount
5711                        } else {
5712                            line.credit_amount
5713                        };
5714                        let entry = txn_summaries
5715                            .entry((cc.clone(), tp.clone()))
5716                            .or_insert_with(|| TransactionSummary {
5717                                total_volume: rust_decimal::Decimal::ZERO,
5718                                transaction_count: 0,
5719                                first_transaction_date: posting_date,
5720                                last_transaction_date: posting_date,
5721                                related_entities: std::collections::HashSet::new(),
5722                            });
5723                        entry.total_volume += amount;
5724                        entry.transaction_count += 1;
5725                        if posting_date < entry.first_transaction_date {
5726                            entry.first_transaction_date = posting_date;
5727                        }
5728                        if posting_date > entry.last_transaction_date {
5729                            entry.last_transaction_date = posting_date;
5730                        }
5731                        entry.related_entities.insert(cc.clone());
5732                    }
5733                }
5734            }
5735
5736            // Also extract transaction relationships from document flow chains.
5737            // P2P chains: Company → Vendor relationships
5738            for chain in &document_flows.p2p_chains {
5739                let cc = chain.purchase_order.header.company_code.clone();
5740                let vendor_id = chain.purchase_order.vendor_id.clone();
5741                let po_date = chain.purchase_order.header.document_date;
5742                let amount = chain.purchase_order.total_net_amount;
5743
5744                let entry = txn_summaries
5745                    .entry((cc.clone(), vendor_id))
5746                    .or_insert_with(|| TransactionSummary {
5747                        total_volume: rust_decimal::Decimal::ZERO,
5748                        transaction_count: 0,
5749                        first_transaction_date: po_date,
5750                        last_transaction_date: po_date,
5751                        related_entities: std::collections::HashSet::new(),
5752                    });
5753                entry.total_volume += amount;
5754                entry.transaction_count += 1;
5755                if po_date < entry.first_transaction_date {
5756                    entry.first_transaction_date = po_date;
5757                }
5758                if po_date > entry.last_transaction_date {
5759                    entry.last_transaction_date = po_date;
5760                }
5761                entry.related_entities.insert(cc);
5762            }
5763
5764            // O2C chains: Company → Customer relationships
5765            for chain in &document_flows.o2c_chains {
5766                let cc = chain.sales_order.header.company_code.clone();
5767                let customer_id = chain.sales_order.customer_id.clone();
5768                let so_date = chain.sales_order.header.document_date;
5769                let amount = chain.sales_order.total_net_amount;
5770
5771                let entry = txn_summaries
5772                    .entry((cc.clone(), customer_id))
5773                    .or_insert_with(|| TransactionSummary {
5774                        total_volume: rust_decimal::Decimal::ZERO,
5775                        transaction_count: 0,
5776                        first_transaction_date: so_date,
5777                        last_transaction_date: so_date,
5778                        related_entities: std::collections::HashSet::new(),
5779                    });
5780                entry.total_volume += amount;
5781                entry.transaction_count += 1;
5782                if so_date < entry.first_transaction_date {
5783                    entry.first_transaction_date = so_date;
5784                }
5785                if so_date > entry.last_transaction_date {
5786                    entry.last_transaction_date = so_date;
5787                }
5788                entry.related_entities.insert(cc);
5789            }
5790
5791            let as_of_date = journal_entries
5792                .last()
5793                .map(|je| je.header.posting_date)
5794                .unwrap_or(start_date);
5795
5796            let graph = gen.generate_entity_graph(
5797                company_code,
5798                as_of_date,
5799                &vendor_summaries,
5800                &customer_summaries,
5801                &txn_summaries,
5802            );
5803
5804            info!(
5805                "Entity relationship graph: {} nodes, {} edges",
5806                graph.nodes.len(),
5807                graph.edges.len()
5808            );
5809            stats.entity_relationship_node_count = graph.nodes.len();
5810            stats.entity_relationship_edge_count = graph.edges.len();
5811            Some(graph)
5812        } else {
5813            None
5814        };
5815
5816        // --- Part 2: Cross-Process Links ---
5817        let cross_process_links = if cpl_enabled {
5818            // Build GoodsReceiptRef from P2P chains
5819            let gr_refs: Vec<GoodsReceiptRef> = document_flows
5820                .p2p_chains
5821                .iter()
5822                .flat_map(|chain| {
5823                    let vendor_id = chain.purchase_order.vendor_id.clone();
5824                    let cc = chain.purchase_order.header.company_code.clone();
5825                    chain.goods_receipts.iter().flat_map(move |gr| {
5826                        gr.items.iter().filter_map({
5827                            let doc_id = gr.header.document_id.clone();
5828                            let v_id = vendor_id.clone();
5829                            let company = cc.clone();
5830                            let receipt_date = gr.header.document_date;
5831                            move |item| {
5832                                item.base
5833                                    .material_id
5834                                    .as_ref()
5835                                    .map(|mat_id| GoodsReceiptRef {
5836                                        document_id: doc_id.clone(),
5837                                        material_id: mat_id.clone(),
5838                                        quantity: item.base.quantity,
5839                                        receipt_date,
5840                                        vendor_id: v_id.clone(),
5841                                        company_code: company.clone(),
5842                                    })
5843                            }
5844                        })
5845                    })
5846                })
5847                .collect();
5848
5849            // Build DeliveryRef from O2C chains
5850            let del_refs: Vec<DeliveryRef> = document_flows
5851                .o2c_chains
5852                .iter()
5853                .flat_map(|chain| {
5854                    let customer_id = chain.sales_order.customer_id.clone();
5855                    let cc = chain.sales_order.header.company_code.clone();
5856                    chain.deliveries.iter().flat_map(move |del| {
5857                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
5858                        del.items.iter().filter_map({
5859                            let doc_id = del.header.document_id.clone();
5860                            let c_id = customer_id.clone();
5861                            let company = cc.clone();
5862                            move |item| {
5863                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
5864                                    document_id: doc_id.clone(),
5865                                    material_id: mat_id.clone(),
5866                                    quantity: item.base.quantity,
5867                                    delivery_date,
5868                                    customer_id: c_id.clone(),
5869                                    company_code: company.clone(),
5870                                })
5871                            }
5872                        })
5873                    })
5874                })
5875                .collect();
5876
5877            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
5878            info!("Cross-process links generated: {} links", links.len());
5879            stats.cross_process_link_count = links.len();
5880            links
5881        } else {
5882            Vec::new()
5883        };
5884
5885        self.check_resources_with_log("post-entity-relationships")?;
5886        Ok((entity_graph, cross_process_links))
5887    }
5888
5889    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
5890    fn phase_industry_data(
5891        &self,
5892        stats: &mut EnhancedGenerationStatistics,
5893    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
5894        if !self.config.industry_specific.enabled {
5895            return None;
5896        }
5897        info!("Phase 29: Generating industry-specific data");
5898        let output = datasynth_generators::industry::factory::generate_industry_output(
5899            self.config.global.industry,
5900        );
5901        stats.industry_gl_account_count = output.gl_accounts.len();
5902        info!(
5903            "Industry data generated: {} GL accounts for {:?}",
5904            output.gl_accounts.len(),
5905            self.config.global.industry
5906        );
5907        Some(output)
5908    }
5909
5910    /// Phase 3b: Generate opening balances for each company.
5911    fn phase_opening_balances(
5912        &mut self,
5913        coa: &Arc<ChartOfAccounts>,
5914        stats: &mut EnhancedGenerationStatistics,
5915    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
5916        if !self.config.balance.generate_opening_balances {
5917            debug!("Phase 3b: Skipped (opening balance generation disabled)");
5918            return Ok(Vec::new());
5919        }
5920        info!("Phase 3b: Generating Opening Balances");
5921
5922        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5923            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5924        let fiscal_year = start_date.year();
5925
5926        let industry = match self.config.global.industry {
5927            IndustrySector::Manufacturing => IndustryType::Manufacturing,
5928            IndustrySector::Retail => IndustryType::Retail,
5929            IndustrySector::FinancialServices => IndustryType::Financial,
5930            IndustrySector::Healthcare => IndustryType::Healthcare,
5931            IndustrySector::Technology => IndustryType::Technology,
5932            _ => IndustryType::Manufacturing,
5933        };
5934
5935        let config = datasynth_generators::OpeningBalanceConfig {
5936            industry,
5937            ..Default::default()
5938        };
5939        let mut gen =
5940            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
5941
5942        let mut results = Vec::new();
5943        for company in &self.config.companies {
5944            let spec = OpeningBalanceSpec::new(
5945                company.code.clone(),
5946                start_date,
5947                fiscal_year,
5948                company.currency.clone(),
5949                rust_decimal::Decimal::new(10_000_000, 0),
5950                industry,
5951            );
5952            let ob = gen.generate(&spec, coa, start_date, &company.code);
5953            results.push(ob);
5954        }
5955
5956        stats.opening_balance_count = results.len();
5957        info!("Opening balances generated: {} companies", results.len());
5958        self.check_resources_with_log("post-opening-balances")?;
5959
5960        Ok(results)
5961    }
5962
5963    /// Phase 9b: Reconcile GL control accounts to subledger balances.
5964    fn phase_subledger_reconciliation(
5965        &mut self,
5966        subledger: &SubledgerSnapshot,
5967        entries: &[JournalEntry],
5968        stats: &mut EnhancedGenerationStatistics,
5969    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
5970        if !self.config.balance.reconcile_subledgers {
5971            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
5972            return Ok(Vec::new());
5973        }
5974        info!("Phase 9b: Reconciling GL to subledger balances");
5975
5976        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5977            .map(|d| d + chrono::Months::new(self.config.global.period_months))
5978            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5979
5980        // Build GL balance map from journal entries using a balance tracker
5981        let tracker_config = BalanceTrackerConfig {
5982            validate_on_each_entry: false,
5983            track_history: false,
5984            fail_on_validation_error: false,
5985            ..Default::default()
5986        };
5987        let recon_currency = self
5988            .config
5989            .companies
5990            .first()
5991            .map(|c| c.currency.clone())
5992            .unwrap_or_else(|| "USD".to_string());
5993        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
5994        let validation_errors = tracker.apply_entries(entries);
5995        if !validation_errors.is_empty() {
5996            warn!(
5997                error_count = validation_errors.len(),
5998                "Balance tracker encountered validation errors during subledger reconciliation"
5999            );
6000            for err in &validation_errors {
6001                debug!("Balance validation error: {:?}", err);
6002            }
6003        }
6004
6005        let mut engine = datasynth_generators::ReconciliationEngine::new(
6006            datasynth_generators::ReconciliationConfig::default(),
6007        );
6008
6009        let mut results = Vec::new();
6010        let company_code = self
6011            .config
6012            .companies
6013            .first()
6014            .map(|c| c.code.as_str())
6015            .unwrap_or("1000");
6016
6017        // Reconcile AR
6018        if !subledger.ar_invoices.is_empty() {
6019            let gl_balance = tracker
6020                .get_account_balance(
6021                    company_code,
6022                    datasynth_core::accounts::control_accounts::AR_CONTROL,
6023                )
6024                .map(|b| b.closing_balance)
6025                .unwrap_or_default();
6026            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
6027            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
6028        }
6029
6030        // Reconcile AP
6031        if !subledger.ap_invoices.is_empty() {
6032            let gl_balance = tracker
6033                .get_account_balance(
6034                    company_code,
6035                    datasynth_core::accounts::control_accounts::AP_CONTROL,
6036                )
6037                .map(|b| b.closing_balance)
6038                .unwrap_or_default();
6039            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
6040            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
6041        }
6042
6043        // Reconcile FA
6044        if !subledger.fa_records.is_empty() {
6045            let gl_asset_balance = tracker
6046                .get_account_balance(
6047                    company_code,
6048                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
6049                )
6050                .map(|b| b.closing_balance)
6051                .unwrap_or_default();
6052            let gl_accum_depr_balance = tracker
6053                .get_account_balance(
6054                    company_code,
6055                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
6056                )
6057                .map(|b| b.closing_balance)
6058                .unwrap_or_default();
6059            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
6060                subledger.fa_records.iter().collect();
6061            let (asset_recon, depr_recon) = engine.reconcile_fa(
6062                company_code,
6063                end_date,
6064                gl_asset_balance,
6065                gl_accum_depr_balance,
6066                &fa_refs,
6067            );
6068            results.push(asset_recon);
6069            results.push(depr_recon);
6070        }
6071
6072        // Reconcile Inventory
6073        if !subledger.inventory_positions.is_empty() {
6074            let gl_balance = tracker
6075                .get_account_balance(
6076                    company_code,
6077                    datasynth_core::accounts::control_accounts::INVENTORY,
6078                )
6079                .map(|b| b.closing_balance)
6080                .unwrap_or_default();
6081            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
6082                subledger.inventory_positions.iter().collect();
6083            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
6084        }
6085
6086        stats.subledger_reconciliation_count = results.len();
6087        info!(
6088            "Subledger reconciliation complete: {} reconciliations",
6089            results.len()
6090        );
6091        self.check_resources_with_log("post-subledger-reconciliation")?;
6092
6093        Ok(results)
6094    }
6095
6096    /// Generate the chart of accounts.
6097    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
6098        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
6099
6100        let coa_framework = self.resolve_coa_framework();
6101
6102        let mut gen = ChartOfAccountsGenerator::new(
6103            self.config.chart_of_accounts.complexity,
6104            self.config.global.industry,
6105            self.seed,
6106        )
6107        .with_coa_framework(coa_framework);
6108
6109        let coa = Arc::new(gen.generate());
6110        self.coa = Some(Arc::clone(&coa));
6111
6112        if let Some(pb) = pb {
6113            pb.finish_with_message("Chart of Accounts complete");
6114        }
6115
6116        Ok(coa)
6117    }
6118
6119    /// Generate master data entities.
6120    fn generate_master_data(&mut self) -> SynthResult<()> {
6121        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6122            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6123        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6124
6125        let total = self.config.companies.len() as u64 * 5; // 5 entity types
6126        let pb = self.create_progress_bar(total, "Generating Master Data");
6127
6128        // Resolve country pack once for all companies (uses primary company's country)
6129        let pack = self.primary_pack().clone();
6130
6131        // Capture config values needed inside the parallel closure
6132        let vendors_per_company = self.phase_config.vendors_per_company;
6133        let customers_per_company = self.phase_config.customers_per_company;
6134        let materials_per_company = self.phase_config.materials_per_company;
6135        let assets_per_company = self.phase_config.assets_per_company;
6136        let coa_framework = self.resolve_coa_framework();
6137
6138        // Generate all master data in parallel across companies.
6139        // Each company's data is independent, making this embarrassingly parallel.
6140        let per_company_results: Vec<_> = self
6141            .config
6142            .companies
6143            .par_iter()
6144            .enumerate()
6145            .map(|(i, company)| {
6146                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
6147                let pack = pack.clone();
6148
6149                // Generate vendors (offset counter so IDs are globally unique across companies)
6150                let mut vendor_gen = VendorGenerator::new(company_seed);
6151                vendor_gen.set_country_pack(pack.clone());
6152                vendor_gen.set_coa_framework(coa_framework);
6153                vendor_gen.set_counter_offset(i * vendors_per_company);
6154                let vendor_pool =
6155                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
6156
6157                // Generate customers (offset counter so IDs are globally unique across companies)
6158                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
6159                customer_gen.set_country_pack(pack.clone());
6160                customer_gen.set_coa_framework(coa_framework);
6161                customer_gen.set_counter_offset(i * customers_per_company);
6162                let customer_pool = customer_gen.generate_customer_pool(
6163                    customers_per_company,
6164                    &company.code,
6165                    start_date,
6166                );
6167
6168                // Generate materials (offset counter so IDs are globally unique across companies)
6169                let mut material_gen = MaterialGenerator::new(company_seed + 200);
6170                material_gen.set_country_pack(pack.clone());
6171                material_gen.set_counter_offset(i * materials_per_company);
6172                let material_pool = material_gen.generate_material_pool(
6173                    materials_per_company,
6174                    &company.code,
6175                    start_date,
6176                );
6177
6178                // Generate fixed assets
6179                let mut asset_gen = AssetGenerator::new(company_seed + 300);
6180                let asset_pool = asset_gen.generate_asset_pool(
6181                    assets_per_company,
6182                    &company.code,
6183                    (start_date, end_date),
6184                );
6185
6186                // Generate employees
6187                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
6188                employee_gen.set_country_pack(pack);
6189                let employee_pool =
6190                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
6191
6192                (
6193                    vendor_pool.vendors,
6194                    customer_pool.customers,
6195                    material_pool.materials,
6196                    asset_pool.assets,
6197                    employee_pool.employees,
6198                )
6199            })
6200            .collect();
6201
6202        // Aggregate results from all companies
6203        for (vendors, customers, materials, assets, employees) in per_company_results {
6204            self.master_data.vendors.extend(vendors);
6205            self.master_data.customers.extend(customers);
6206            self.master_data.materials.extend(materials);
6207            self.master_data.assets.extend(assets);
6208            self.master_data.employees.extend(employees);
6209        }
6210
6211        if let Some(pb) = &pb {
6212            pb.inc(total);
6213        }
6214        if let Some(pb) = pb {
6215            pb.finish_with_message("Master data generation complete");
6216        }
6217
6218        Ok(())
6219    }
6220
6221    /// Generate document flows (P2P and O2C).
6222    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
6223        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6224            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6225
6226        // Generate P2P chains
6227        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
6228        let months = (self.config.global.period_months as usize).max(1);
6229        let p2p_count = self
6230            .phase_config
6231            .p2p_chains
6232            .min(self.master_data.vendors.len() * 2 * months);
6233        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
6234
6235        // Convert P2P config from schema to generator config
6236        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
6237        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
6238        p2p_gen.set_country_pack(self.primary_pack().clone());
6239
6240        for i in 0..p2p_count {
6241            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
6242            let materials: Vec<&Material> = self
6243                .master_data
6244                .materials
6245                .iter()
6246                .skip(i % self.master_data.materials.len().max(1))
6247                .take(2.min(self.master_data.materials.len()))
6248                .collect();
6249
6250            if materials.is_empty() {
6251                continue;
6252            }
6253
6254            let company = &self.config.companies[i % self.config.companies.len()];
6255            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
6256            let fiscal_period = po_date.month() as u8;
6257            let created_by = if self.master_data.employees.is_empty() {
6258                "SYSTEM"
6259            } else {
6260                self.master_data.employees[i % self.master_data.employees.len()]
6261                    .user_id
6262                    .as_str()
6263            };
6264
6265            let chain = p2p_gen.generate_chain(
6266                &company.code,
6267                vendor,
6268                &materials,
6269                po_date,
6270                start_date.year() as u16,
6271                fiscal_period,
6272                created_by,
6273            );
6274
6275            // Flatten documents
6276            flows.purchase_orders.push(chain.purchase_order.clone());
6277            flows.goods_receipts.extend(chain.goods_receipts.clone());
6278            if let Some(vi) = &chain.vendor_invoice {
6279                flows.vendor_invoices.push(vi.clone());
6280            }
6281            if let Some(payment) = &chain.payment {
6282                flows.payments.push(payment.clone());
6283            }
6284            for remainder in &chain.remainder_payments {
6285                flows.payments.push(remainder.clone());
6286            }
6287            flows.p2p_chains.push(chain);
6288
6289            if let Some(pb) = &pb {
6290                pb.inc(1);
6291            }
6292        }
6293
6294        if let Some(pb) = pb {
6295            pb.finish_with_message("P2P document flows complete");
6296        }
6297
6298        // Generate O2C chains
6299        // Cap at ~2 SOs per customer per month to keep order volume realistic
6300        let o2c_count = self
6301            .phase_config
6302            .o2c_chains
6303            .min(self.master_data.customers.len() * 2 * months);
6304        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
6305
6306        // Convert O2C config from schema to generator config
6307        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
6308        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
6309        o2c_gen.set_country_pack(self.primary_pack().clone());
6310
6311        for i in 0..o2c_count {
6312            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
6313            let materials: Vec<&Material> = self
6314                .master_data
6315                .materials
6316                .iter()
6317                .skip(i % self.master_data.materials.len().max(1))
6318                .take(2.min(self.master_data.materials.len()))
6319                .collect();
6320
6321            if materials.is_empty() {
6322                continue;
6323            }
6324
6325            let company = &self.config.companies[i % self.config.companies.len()];
6326            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
6327            let fiscal_period = so_date.month() as u8;
6328            let created_by = if self.master_data.employees.is_empty() {
6329                "SYSTEM"
6330            } else {
6331                self.master_data.employees[i % self.master_data.employees.len()]
6332                    .user_id
6333                    .as_str()
6334            };
6335
6336            let chain = o2c_gen.generate_chain(
6337                &company.code,
6338                customer,
6339                &materials,
6340                so_date,
6341                start_date.year() as u16,
6342                fiscal_period,
6343                created_by,
6344            );
6345
6346            // Flatten documents
6347            flows.sales_orders.push(chain.sales_order.clone());
6348            flows.deliveries.extend(chain.deliveries.clone());
6349            if let Some(ci) = &chain.customer_invoice {
6350                flows.customer_invoices.push(ci.clone());
6351            }
6352            if let Some(receipt) = &chain.customer_receipt {
6353                flows.payments.push(receipt.clone());
6354            }
6355            // Extract remainder receipts (follow-up to partial payments)
6356            for receipt in &chain.remainder_receipts {
6357                flows.payments.push(receipt.clone());
6358            }
6359            flows.o2c_chains.push(chain);
6360
6361            if let Some(pb) = &pb {
6362                pb.inc(1);
6363            }
6364        }
6365
6366        if let Some(pb) = pb {
6367            pb.finish_with_message("O2C document flows complete");
6368        }
6369
6370        Ok(())
6371    }
6372
6373    /// Generate journal entries using parallel generation across multiple cores.
6374    fn generate_journal_entries(
6375        &mut self,
6376        coa: &Arc<ChartOfAccounts>,
6377    ) -> SynthResult<Vec<JournalEntry>> {
6378        use datasynth_core::traits::ParallelGenerator;
6379
6380        let total = self.calculate_total_transactions();
6381        let pb = self.create_progress_bar(total, "Generating Journal Entries");
6382
6383        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6384            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6385        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6386
6387        let company_codes: Vec<String> = self
6388            .config
6389            .companies
6390            .iter()
6391            .map(|c| c.code.clone())
6392            .collect();
6393
6394        let generator = JournalEntryGenerator::new_with_params(
6395            self.config.transactions.clone(),
6396            Arc::clone(coa),
6397            company_codes,
6398            start_date,
6399            end_date,
6400            self.seed,
6401        );
6402
6403        // Connect generated master data to ensure JEs reference real entities
6404        // Enable persona-based error injection for realistic human behavior
6405        // Pass fraud configuration for fraud injection
6406        let je_pack = self.primary_pack();
6407
6408        let mut generator = generator
6409            .with_master_data(
6410                &self.master_data.vendors,
6411                &self.master_data.customers,
6412                &self.master_data.materials,
6413            )
6414            .with_country_pack_names(je_pack)
6415            .with_country_pack_temporal(
6416                self.config.temporal_patterns.clone(),
6417                self.seed + 200,
6418                je_pack,
6419            )
6420            .with_persona_errors(true)
6421            .with_fraud_config(self.config.fraud.clone());
6422
6423        // Apply temporal drift if configured
6424        if self.config.temporal.enabled {
6425            let drift_config = self.config.temporal.to_core_config();
6426            generator = generator.with_drift_config(drift_config, self.seed + 100);
6427        }
6428
6429        // Check memory limit at start
6430        self.check_memory_limit()?;
6431
6432        // Determine parallelism: use available cores, but cap at total entries
6433        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
6434
6435        // Use parallel generation for datasets with 10K+ entries.
6436        // Below this threshold, the statistical properties of a single-seeded
6437        // generator (e.g. Benford compliance) are better preserved.
6438        let entries = if total >= 10_000 && num_threads > 1 {
6439            // Parallel path: split the generator across cores and generate in parallel.
6440            // Each sub-generator gets a unique seed for deterministic, independent generation.
6441            let sub_generators = generator.split(num_threads);
6442            let entries_per_thread = total as usize / num_threads;
6443            let remainder = total as usize % num_threads;
6444
6445            let batches: Vec<Vec<JournalEntry>> = sub_generators
6446                .into_par_iter()
6447                .enumerate()
6448                .map(|(i, mut gen)| {
6449                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
6450                    gen.generate_batch(count)
6451                })
6452                .collect();
6453
6454            // Merge all batches into a single Vec
6455            let entries = JournalEntryGenerator::merge_results(batches);
6456
6457            if let Some(pb) = &pb {
6458                pb.inc(total);
6459            }
6460            entries
6461        } else {
6462            // Sequential path for small datasets (< 1000 entries)
6463            let mut entries = Vec::with_capacity(total as usize);
6464            for _ in 0..total {
6465                let entry = generator.generate();
6466                entries.push(entry);
6467                if let Some(pb) = &pb {
6468                    pb.inc(1);
6469                }
6470            }
6471            entries
6472        };
6473
6474        if let Some(pb) = pb {
6475            pb.finish_with_message("Journal entries complete");
6476        }
6477
6478        Ok(entries)
6479    }
6480
6481    /// Generate journal entries from document flows.
6482    ///
6483    /// This creates proper GL entries for each document in the P2P and O2C flows,
6484    /// ensuring that document activity is reflected in the general ledger.
6485    fn generate_jes_from_document_flows(
6486        &mut self,
6487        flows: &DocumentFlowSnapshot,
6488    ) -> SynthResult<Vec<JournalEntry>> {
6489        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
6490        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
6491
6492        let je_config = match self.resolve_coa_framework() {
6493            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
6494            CoAFramework::GermanSkr04 => {
6495                let fa = datasynth_core::FrameworkAccounts::german_gaap();
6496                DocumentFlowJeConfig::from(&fa)
6497            }
6498            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
6499        };
6500
6501        let populate_fec = je_config.populate_fec_fields;
6502        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
6503
6504        // Build auxiliary account lookup from vendor/customer master data so that
6505        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
6506        // PCG "4010001") instead of raw partner IDs.
6507        if populate_fec {
6508            let mut aux_lookup = std::collections::HashMap::new();
6509            for vendor in &self.master_data.vendors {
6510                if let Some(ref aux) = vendor.auxiliary_gl_account {
6511                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
6512                }
6513            }
6514            for customer in &self.master_data.customers {
6515                if let Some(ref aux) = customer.auxiliary_gl_account {
6516                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
6517                }
6518            }
6519            if !aux_lookup.is_empty() {
6520                generator.set_auxiliary_account_lookup(aux_lookup);
6521            }
6522        }
6523
6524        let mut entries = Vec::new();
6525
6526        // Generate JEs from P2P chains
6527        for chain in &flows.p2p_chains {
6528            let chain_entries = generator.generate_from_p2p_chain(chain);
6529            entries.extend(chain_entries);
6530            if let Some(pb) = &pb {
6531                pb.inc(1);
6532            }
6533        }
6534
6535        // Generate JEs from O2C chains
6536        for chain in &flows.o2c_chains {
6537            let chain_entries = generator.generate_from_o2c_chain(chain);
6538            entries.extend(chain_entries);
6539            if let Some(pb) = &pb {
6540                pb.inc(1);
6541            }
6542        }
6543
6544        if let Some(pb) = pb {
6545            pb.finish_with_message(format!(
6546                "Generated {} JEs from document flows",
6547                entries.len()
6548            ));
6549        }
6550
6551        Ok(entries)
6552    }
6553
6554    /// Generate journal entries from payroll runs.
6555    ///
6556    /// Creates one JE per payroll run:
6557    /// - DR Salaries & Wages (6100) for gross pay
6558    /// - CR Payroll Clearing (9100) for gross pay
6559    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
6560        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
6561
6562        let mut jes = Vec::with_capacity(payroll_runs.len());
6563
6564        for run in payroll_runs {
6565            let mut je = JournalEntry::new_simple(
6566                format!("JE-PAYROLL-{}", run.payroll_id),
6567                run.company_code.clone(),
6568                run.run_date,
6569                format!("Payroll {}", run.payroll_id),
6570            );
6571
6572            // Debit Salaries & Wages for gross pay
6573            je.add_line(JournalEntryLine {
6574                line_number: 1,
6575                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
6576                debit_amount: run.total_gross,
6577                reference: Some(run.payroll_id.clone()),
6578                text: Some(format!(
6579                    "Payroll {} ({} employees)",
6580                    run.payroll_id, run.employee_count
6581                )),
6582                ..Default::default()
6583            });
6584
6585            // Credit Payroll Clearing for gross pay
6586            je.add_line(JournalEntryLine {
6587                line_number: 2,
6588                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
6589                credit_amount: run.total_gross,
6590                reference: Some(run.payroll_id.clone()),
6591                ..Default::default()
6592            });
6593
6594            jes.push(je);
6595        }
6596
6597        jes
6598    }
6599
6600    /// Generate journal entries from production orders.
6601    ///
6602    /// Creates one JE per completed production order:
6603    /// - DR Raw Materials (5100) for material consumption (actual_cost)
6604    /// - CR Inventory (1200) for material consumption
6605    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
6606        use datasynth_core::accounts::{control_accounts, expense_accounts};
6607        use datasynth_core::models::ProductionOrderStatus;
6608
6609        let mut jes = Vec::new();
6610
6611        for order in production_orders {
6612            // Only generate JEs for completed or closed orders
6613            if !matches!(
6614                order.status,
6615                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
6616            ) {
6617                continue;
6618            }
6619
6620            let mut je = JournalEntry::new_simple(
6621                format!("JE-MFG-{}", order.order_id),
6622                order.company_code.clone(),
6623                order.actual_end.unwrap_or(order.planned_end),
6624                format!(
6625                    "Production Order {} - {}",
6626                    order.order_id, order.material_description
6627                ),
6628            );
6629
6630            // Debit Raw Materials / Manufacturing expense for actual cost
6631            je.add_line(JournalEntryLine {
6632                line_number: 1,
6633                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
6634                debit_amount: order.actual_cost,
6635                reference: Some(order.order_id.clone()),
6636                text: Some(format!(
6637                    "Material consumption for {}",
6638                    order.material_description
6639                )),
6640                quantity: Some(order.actual_quantity),
6641                unit: Some("EA".to_string()),
6642                ..Default::default()
6643            });
6644
6645            // Credit Inventory for material consumption
6646            je.add_line(JournalEntryLine {
6647                line_number: 2,
6648                gl_account: control_accounts::INVENTORY.to_string(),
6649                credit_amount: order.actual_cost,
6650                reference: Some(order.order_id.clone()),
6651                ..Default::default()
6652            });
6653
6654            jes.push(je);
6655        }
6656
6657        jes
6658    }
6659
6660    /// Link document flows to subledger records.
6661    ///
6662    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
6663    /// ensuring subledger data is coherent with document flow data.
6664    fn link_document_flows_to_subledgers(
6665        &mut self,
6666        flows: &DocumentFlowSnapshot,
6667    ) -> SynthResult<SubledgerSnapshot> {
6668        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
6669        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
6670
6671        // Build vendor/customer name maps from master data for realistic subledger names
6672        let vendor_names: std::collections::HashMap<String, String> = self
6673            .master_data
6674            .vendors
6675            .iter()
6676            .map(|v| (v.vendor_id.clone(), v.name.clone()))
6677            .collect();
6678        let customer_names: std::collections::HashMap<String, String> = self
6679            .master_data
6680            .customers
6681            .iter()
6682            .map(|c| (c.customer_id.clone(), c.name.clone()))
6683            .collect();
6684
6685        let mut linker = DocumentFlowLinker::new()
6686            .with_vendor_names(vendor_names)
6687            .with_customer_names(customer_names);
6688
6689        // Convert vendor invoices to AP invoices
6690        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
6691        if let Some(pb) = &pb {
6692            pb.inc(flows.vendor_invoices.len() as u64);
6693        }
6694
6695        // Convert customer invoices to AR invoices
6696        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
6697        if let Some(pb) = &pb {
6698            pb.inc(flows.customer_invoices.len() as u64);
6699        }
6700
6701        if let Some(pb) = pb {
6702            pb.finish_with_message(format!(
6703                "Linked {} AP and {} AR invoices",
6704                ap_invoices.len(),
6705                ar_invoices.len()
6706            ));
6707        }
6708
6709        Ok(SubledgerSnapshot {
6710            ap_invoices,
6711            ar_invoices,
6712            fa_records: Vec::new(),
6713            inventory_positions: Vec::new(),
6714            inventory_movements: Vec::new(),
6715        })
6716    }
6717
6718    /// Generate OCPM events from document flows.
6719    ///
6720    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
6721    /// capturing the object-centric process perspective.
6722    #[allow(clippy::too_many_arguments)]
6723    fn generate_ocpm_events(
6724        &mut self,
6725        flows: &DocumentFlowSnapshot,
6726        sourcing: &SourcingSnapshot,
6727        hr: &HrSnapshot,
6728        manufacturing: &ManufacturingSnapshot,
6729        banking: &BankingSnapshot,
6730        audit: &AuditSnapshot,
6731        financial_reporting: &FinancialReportingSnapshot,
6732    ) -> SynthResult<OcpmSnapshot> {
6733        let total_chains = flows.p2p_chains.len()
6734            + flows.o2c_chains.len()
6735            + sourcing.sourcing_projects.len()
6736            + hr.payroll_runs.len()
6737            + manufacturing.production_orders.len()
6738            + banking.customers.len()
6739            + audit.engagements.len()
6740            + financial_reporting.bank_reconciliations.len();
6741        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
6742
6743        // Create OCPM event log with standard types
6744        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
6745        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
6746
6747        // Configure the OCPM generator
6748        let ocpm_config = OcpmGeneratorConfig {
6749            generate_p2p: true,
6750            generate_o2c: true,
6751            generate_s2c: !sourcing.sourcing_projects.is_empty(),
6752            generate_h2r: !hr.payroll_runs.is_empty(),
6753            generate_mfg: !manufacturing.production_orders.is_empty(),
6754            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
6755            generate_bank: !banking.customers.is_empty(),
6756            generate_audit: !audit.engagements.is_empty(),
6757            happy_path_rate: 0.75,
6758            exception_path_rate: 0.20,
6759            error_path_rate: 0.05,
6760            add_duration_variability: true,
6761            duration_std_dev_factor: 0.3,
6762        };
6763        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
6764        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
6765
6766        // Get available users for resource assignment
6767        let available_users: Vec<String> = self
6768            .master_data
6769            .employees
6770            .iter()
6771            .take(20)
6772            .map(|e| e.user_id.clone())
6773            .collect();
6774
6775        // Deterministic base date from config (avoids Utc::now() non-determinism)
6776        let fallback_date =
6777            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
6778        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6779            .unwrap_or(fallback_date);
6780        let base_midnight = base_date
6781            .and_hms_opt(0, 0, 0)
6782            .expect("midnight is always valid");
6783        let base_datetime =
6784            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
6785
6786        // Helper closure to add case results to event log
6787        let add_result = |event_log: &mut OcpmEventLog,
6788                          result: datasynth_ocpm::CaseGenerationResult| {
6789            for event in result.events {
6790                event_log.add_event(event);
6791            }
6792            for object in result.objects {
6793                event_log.add_object(object);
6794            }
6795            for relationship in result.relationships {
6796                event_log.add_relationship(relationship);
6797            }
6798            for corr in result.correlation_events {
6799                event_log.add_correlation_event(corr);
6800            }
6801            event_log.add_case(result.case_trace);
6802        };
6803
6804        // Generate events from P2P chains
6805        for chain in &flows.p2p_chains {
6806            let po = &chain.purchase_order;
6807            let documents = P2pDocuments::new(
6808                &po.header.document_id,
6809                &po.vendor_id,
6810                &po.header.company_code,
6811                po.total_net_amount,
6812                &po.header.currency,
6813                &ocpm_uuid_factory,
6814            )
6815            .with_goods_receipt(
6816                chain
6817                    .goods_receipts
6818                    .first()
6819                    .map(|gr| gr.header.document_id.as_str())
6820                    .unwrap_or(""),
6821                &ocpm_uuid_factory,
6822            )
6823            .with_invoice(
6824                chain
6825                    .vendor_invoice
6826                    .as_ref()
6827                    .map(|vi| vi.header.document_id.as_str())
6828                    .unwrap_or(""),
6829                &ocpm_uuid_factory,
6830            )
6831            .with_payment(
6832                chain
6833                    .payment
6834                    .as_ref()
6835                    .map(|p| p.header.document_id.as_str())
6836                    .unwrap_or(""),
6837                &ocpm_uuid_factory,
6838            );
6839
6840            let start_time =
6841                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
6842            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
6843            add_result(&mut event_log, result);
6844
6845            if let Some(pb) = &pb {
6846                pb.inc(1);
6847            }
6848        }
6849
6850        // Generate events from O2C chains
6851        for chain in &flows.o2c_chains {
6852            let so = &chain.sales_order;
6853            let documents = O2cDocuments::new(
6854                &so.header.document_id,
6855                &so.customer_id,
6856                &so.header.company_code,
6857                so.total_net_amount,
6858                &so.header.currency,
6859                &ocpm_uuid_factory,
6860            )
6861            .with_delivery(
6862                chain
6863                    .deliveries
6864                    .first()
6865                    .map(|d| d.header.document_id.as_str())
6866                    .unwrap_or(""),
6867                &ocpm_uuid_factory,
6868            )
6869            .with_invoice(
6870                chain
6871                    .customer_invoice
6872                    .as_ref()
6873                    .map(|ci| ci.header.document_id.as_str())
6874                    .unwrap_or(""),
6875                &ocpm_uuid_factory,
6876            )
6877            .with_receipt(
6878                chain
6879                    .customer_receipt
6880                    .as_ref()
6881                    .map(|r| r.header.document_id.as_str())
6882                    .unwrap_or(""),
6883                &ocpm_uuid_factory,
6884            );
6885
6886            let start_time =
6887                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
6888            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
6889            add_result(&mut event_log, result);
6890
6891            if let Some(pb) = &pb {
6892                pb.inc(1);
6893            }
6894        }
6895
6896        // Generate events from S2C sourcing projects
6897        for project in &sourcing.sourcing_projects {
6898            // Find vendor from contracts or qualifications
6899            let vendor_id = sourcing
6900                .contracts
6901                .iter()
6902                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6903                .map(|c| c.vendor_id.clone())
6904                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
6905                .or_else(|| {
6906                    self.master_data
6907                        .vendors
6908                        .first()
6909                        .map(|v| v.vendor_id.clone())
6910                })
6911                .unwrap_or_else(|| "V000".to_string());
6912            let mut docs = S2cDocuments::new(
6913                &project.project_id,
6914                &vendor_id,
6915                &project.company_code,
6916                project.estimated_annual_spend,
6917                &ocpm_uuid_factory,
6918            );
6919            // Link RFx if available
6920            if let Some(rfx) = sourcing
6921                .rfx_events
6922                .iter()
6923                .find(|r| r.sourcing_project_id == project.project_id)
6924            {
6925                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
6926                // Link winning bid (status == Accepted)
6927                if let Some(bid) = sourcing.bids.iter().find(|b| {
6928                    b.rfx_id == rfx.rfx_id
6929                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
6930                }) {
6931                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
6932                }
6933            }
6934            // Link contract
6935            if let Some(contract) = sourcing
6936                .contracts
6937                .iter()
6938                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6939            {
6940                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
6941            }
6942            let start_time = base_datetime - chrono::Duration::days(90);
6943            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
6944            add_result(&mut event_log, result);
6945
6946            if let Some(pb) = &pb {
6947                pb.inc(1);
6948            }
6949        }
6950
6951        // Generate events from H2R payroll runs
6952        for run in &hr.payroll_runs {
6953            // Use first matching payroll line item's employee, or fallback
6954            let employee_id = hr
6955                .payroll_line_items
6956                .iter()
6957                .find(|li| li.payroll_id == run.payroll_id)
6958                .map(|li| li.employee_id.as_str())
6959                .unwrap_or("EMP000");
6960            let docs = H2rDocuments::new(
6961                &run.payroll_id,
6962                employee_id,
6963                &run.company_code,
6964                run.total_gross,
6965                &ocpm_uuid_factory,
6966            )
6967            .with_time_entries(
6968                hr.time_entries
6969                    .iter()
6970                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
6971                    .take(5)
6972                    .map(|t| t.entry_id.as_str())
6973                    .collect(),
6974            );
6975            let start_time = base_datetime - chrono::Duration::days(30);
6976            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
6977            add_result(&mut event_log, result);
6978
6979            if let Some(pb) = &pb {
6980                pb.inc(1);
6981            }
6982        }
6983
6984        // Generate events from MFG production orders
6985        for order in &manufacturing.production_orders {
6986            let mut docs = MfgDocuments::new(
6987                &order.order_id,
6988                &order.material_id,
6989                &order.company_code,
6990                order.planned_quantity,
6991                &ocpm_uuid_factory,
6992            )
6993            .with_operations(
6994                order
6995                    .operations
6996                    .iter()
6997                    .map(|o| format!("OP-{:04}", o.operation_number))
6998                    .collect::<Vec<_>>()
6999                    .iter()
7000                    .map(std::string::String::as_str)
7001                    .collect(),
7002            );
7003            // Link quality inspection if available (via reference_id matching order_id)
7004            if let Some(insp) = manufacturing
7005                .quality_inspections
7006                .iter()
7007                .find(|i| i.reference_id == order.order_id)
7008            {
7009                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
7010            }
7011            // Link cycle count if available (match by material_id in items)
7012            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
7013                cc.items
7014                    .iter()
7015                    .any(|item| item.material_id == order.material_id)
7016            }) {
7017                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
7018            }
7019            let start_time = base_datetime - chrono::Duration::days(60);
7020            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
7021            add_result(&mut event_log, result);
7022
7023            if let Some(pb) = &pb {
7024                pb.inc(1);
7025            }
7026        }
7027
7028        // Generate events from Banking customers
7029        for customer in &banking.customers {
7030            let customer_id_str = customer.customer_id.to_string();
7031            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
7032            // Link accounts (primary_owner_id matches customer_id)
7033            if let Some(account) = banking
7034                .accounts
7035                .iter()
7036                .find(|a| a.primary_owner_id == customer.customer_id)
7037            {
7038                let account_id_str = account.account_id.to_string();
7039                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
7040                // Link transactions for this account
7041                let txn_strs: Vec<String> = banking
7042                    .transactions
7043                    .iter()
7044                    .filter(|t| t.account_id == account.account_id)
7045                    .take(10)
7046                    .map(|t| t.transaction_id.to_string())
7047                    .collect();
7048                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
7049                let txn_amounts: Vec<rust_decimal::Decimal> = banking
7050                    .transactions
7051                    .iter()
7052                    .filter(|t| t.account_id == account.account_id)
7053                    .take(10)
7054                    .map(|t| t.amount)
7055                    .collect();
7056                if !txn_ids.is_empty() {
7057                    docs = docs.with_transactions(txn_ids, txn_amounts);
7058                }
7059            }
7060            let start_time = base_datetime - chrono::Duration::days(180);
7061            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
7062            add_result(&mut event_log, result);
7063
7064            if let Some(pb) = &pb {
7065                pb.inc(1);
7066            }
7067        }
7068
7069        // Generate events from Audit engagements
7070        for engagement in &audit.engagements {
7071            let engagement_id_str = engagement.engagement_id.to_string();
7072            let docs = AuditDocuments::new(
7073                &engagement_id_str,
7074                &engagement.client_entity_id,
7075                &ocpm_uuid_factory,
7076            )
7077            .with_workpapers(
7078                audit
7079                    .workpapers
7080                    .iter()
7081                    .filter(|w| w.engagement_id == engagement.engagement_id)
7082                    .take(10)
7083                    .map(|w| w.workpaper_id.to_string())
7084                    .collect::<Vec<_>>()
7085                    .iter()
7086                    .map(std::string::String::as_str)
7087                    .collect(),
7088            )
7089            .with_evidence(
7090                audit
7091                    .evidence
7092                    .iter()
7093                    .filter(|e| e.engagement_id == engagement.engagement_id)
7094                    .take(10)
7095                    .map(|e| e.evidence_id.to_string())
7096                    .collect::<Vec<_>>()
7097                    .iter()
7098                    .map(std::string::String::as_str)
7099                    .collect(),
7100            )
7101            .with_risks(
7102                audit
7103                    .risk_assessments
7104                    .iter()
7105                    .filter(|r| r.engagement_id == engagement.engagement_id)
7106                    .take(5)
7107                    .map(|r| r.risk_id.to_string())
7108                    .collect::<Vec<_>>()
7109                    .iter()
7110                    .map(std::string::String::as_str)
7111                    .collect(),
7112            )
7113            .with_findings(
7114                audit
7115                    .findings
7116                    .iter()
7117                    .filter(|f| f.engagement_id == engagement.engagement_id)
7118                    .take(5)
7119                    .map(|f| f.finding_id.to_string())
7120                    .collect::<Vec<_>>()
7121                    .iter()
7122                    .map(std::string::String::as_str)
7123                    .collect(),
7124            )
7125            .with_judgments(
7126                audit
7127                    .judgments
7128                    .iter()
7129                    .filter(|j| j.engagement_id == engagement.engagement_id)
7130                    .take(5)
7131                    .map(|j| j.judgment_id.to_string())
7132                    .collect::<Vec<_>>()
7133                    .iter()
7134                    .map(std::string::String::as_str)
7135                    .collect(),
7136            );
7137            let start_time = base_datetime - chrono::Duration::days(120);
7138            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
7139            add_result(&mut event_log, result);
7140
7141            if let Some(pb) = &pb {
7142                pb.inc(1);
7143            }
7144        }
7145
7146        // Generate events from Bank Reconciliations
7147        for recon in &financial_reporting.bank_reconciliations {
7148            let docs = BankReconDocuments::new(
7149                &recon.reconciliation_id,
7150                &recon.bank_account_id,
7151                &recon.company_code,
7152                recon.bank_ending_balance,
7153                &ocpm_uuid_factory,
7154            )
7155            .with_statement_lines(
7156                recon
7157                    .statement_lines
7158                    .iter()
7159                    .take(20)
7160                    .map(|l| l.line_id.as_str())
7161                    .collect(),
7162            )
7163            .with_reconciling_items(
7164                recon
7165                    .reconciling_items
7166                    .iter()
7167                    .take(10)
7168                    .map(|i| i.item_id.as_str())
7169                    .collect(),
7170            );
7171            let start_time = base_datetime - chrono::Duration::days(30);
7172            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
7173            add_result(&mut event_log, result);
7174
7175            if let Some(pb) = &pb {
7176                pb.inc(1);
7177            }
7178        }
7179
7180        // Compute process variants
7181        event_log.compute_variants();
7182
7183        let summary = event_log.summary();
7184
7185        if let Some(pb) = pb {
7186            pb.finish_with_message(format!(
7187                "Generated {} OCPM events, {} objects",
7188                summary.event_count, summary.object_count
7189            ));
7190        }
7191
7192        Ok(OcpmSnapshot {
7193            event_count: summary.event_count,
7194            object_count: summary.object_count,
7195            case_count: summary.case_count,
7196            event_log: Some(event_log),
7197        })
7198    }
7199
7200    /// Inject anomalies into journal entries.
7201    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
7202        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
7203
7204        // Read anomaly rates from config instead of using hardcoded values.
7205        // Priority: anomaly_injection config > fraud config > default 0.02
7206        let total_rate = if self.config.anomaly_injection.enabled {
7207            self.config.anomaly_injection.rates.total_rate
7208        } else if self.config.fraud.enabled {
7209            self.config.fraud.fraud_rate
7210        } else {
7211            0.02
7212        };
7213
7214        let fraud_rate = if self.config.anomaly_injection.enabled {
7215            self.config.anomaly_injection.rates.fraud_rate
7216        } else {
7217            AnomalyRateConfig::default().fraud_rate
7218        };
7219
7220        let error_rate = if self.config.anomaly_injection.enabled {
7221            self.config.anomaly_injection.rates.error_rate
7222        } else {
7223            AnomalyRateConfig::default().error_rate
7224        };
7225
7226        let process_issue_rate = if self.config.anomaly_injection.enabled {
7227            self.config.anomaly_injection.rates.process_rate
7228        } else {
7229            AnomalyRateConfig::default().process_issue_rate
7230        };
7231
7232        let anomaly_config = AnomalyInjectorConfig {
7233            rates: AnomalyRateConfig {
7234                total_rate,
7235                fraud_rate,
7236                error_rate,
7237                process_issue_rate,
7238                ..Default::default()
7239            },
7240            seed: self.seed + 5000,
7241            ..Default::default()
7242        };
7243
7244        let mut injector = AnomalyInjector::new(anomaly_config);
7245        let result = injector.process_entries(entries);
7246
7247        if let Some(pb) = &pb {
7248            pb.inc(entries.len() as u64);
7249            pb.finish_with_message("Anomaly injection complete");
7250        }
7251
7252        let mut by_type = HashMap::new();
7253        for label in &result.labels {
7254            *by_type
7255                .entry(format!("{:?}", label.anomaly_type))
7256                .or_insert(0) += 1;
7257        }
7258
7259        Ok(AnomalyLabels {
7260            labels: result.labels,
7261            summary: Some(result.summary),
7262            by_type,
7263        })
7264    }
7265
7266    /// Validate journal entries using running balance tracker.
7267    ///
7268    /// Applies all entries to the balance tracker and validates:
7269    /// - Each entry is internally balanced (debits = credits)
7270    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
7271    ///
7272    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
7273    /// excluded from balance validation as they may be intentionally unbalanced.
7274    fn validate_journal_entries(
7275        &mut self,
7276        entries: &[JournalEntry],
7277    ) -> SynthResult<BalanceValidationResult> {
7278        // Filter out entries with human errors as they may be intentionally unbalanced
7279        let clean_entries: Vec<&JournalEntry> = entries
7280            .iter()
7281            .filter(|e| {
7282                e.header
7283                    .header_text
7284                    .as_ref()
7285                    .map(|t| !t.contains("[HUMAN_ERROR:"))
7286                    .unwrap_or(true)
7287            })
7288            .collect();
7289
7290        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
7291
7292        // Configure tracker to not fail on errors (collect them instead)
7293        let config = BalanceTrackerConfig {
7294            validate_on_each_entry: false,   // We'll validate at the end
7295            track_history: false,            // Skip history for performance
7296            fail_on_validation_error: false, // Collect errors, don't fail
7297            ..Default::default()
7298        };
7299        let validation_currency = self
7300            .config
7301            .companies
7302            .first()
7303            .map(|c| c.currency.clone())
7304            .unwrap_or_else(|| "USD".to_string());
7305
7306        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
7307
7308        // Apply clean entries (without human errors)
7309        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
7310        let errors = tracker.apply_entries(&clean_refs);
7311
7312        if let Some(pb) = &pb {
7313            pb.inc(entries.len() as u64);
7314        }
7315
7316        // Check if any entries were unbalanced
7317        // Note: When fail_on_validation_error is false, errors are stored in tracker
7318        let has_unbalanced = tracker
7319            .get_validation_errors()
7320            .iter()
7321            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
7322
7323        // Validate balance sheet for each company
7324        // Include both returned errors and collected validation errors
7325        let mut all_errors = errors;
7326        all_errors.extend(tracker.get_validation_errors().iter().cloned());
7327        let company_codes: Vec<String> = self
7328            .config
7329            .companies
7330            .iter()
7331            .map(|c| c.code.clone())
7332            .collect();
7333
7334        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7335            .map(|d| d + chrono::Months::new(self.config.global.period_months))
7336            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7337
7338        for company_code in &company_codes {
7339            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
7340                all_errors.push(e);
7341            }
7342        }
7343
7344        // Get statistics after all mutable operations are done
7345        let stats = tracker.get_statistics();
7346
7347        // Determine if balanced overall
7348        let is_balanced = all_errors.is_empty();
7349
7350        if let Some(pb) = pb {
7351            let msg = if is_balanced {
7352                "Balance validation passed"
7353            } else {
7354                "Balance validation completed with errors"
7355            };
7356            pb.finish_with_message(msg);
7357        }
7358
7359        Ok(BalanceValidationResult {
7360            validated: true,
7361            is_balanced,
7362            entries_processed: stats.entries_processed,
7363            total_debits: stats.total_debits,
7364            total_credits: stats.total_credits,
7365            accounts_tracked: stats.accounts_tracked,
7366            companies_tracked: stats.companies_tracked,
7367            validation_errors: all_errors,
7368            has_unbalanced_entries: has_unbalanced,
7369        })
7370    }
7371
7372    /// Inject data quality variations into journal entries.
7373    ///
7374    /// Applies typos, missing values, and format variations to make
7375    /// the synthetic data more realistic for testing data cleaning pipelines.
7376    fn inject_data_quality(
7377        &mut self,
7378        entries: &mut [JournalEntry],
7379    ) -> SynthResult<DataQualityStats> {
7380        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
7381
7382        // Build config from user-specified schema settings when data_quality is enabled;
7383        // otherwise fall back to the low-rate minimal() preset.
7384        let config = if self.config.data_quality.enabled {
7385            let dq = &self.config.data_quality;
7386            DataQualityConfig {
7387                enable_missing_values: dq.missing_values.enabled,
7388                missing_values: datasynth_generators::MissingValueConfig {
7389                    global_rate: dq.effective_missing_rate(),
7390                    ..Default::default()
7391                },
7392                enable_format_variations: dq.format_variations.enabled,
7393                format_variations: datasynth_generators::FormatVariationConfig {
7394                    date_variation_rate: dq.format_variations.dates.rate,
7395                    amount_variation_rate: dq.format_variations.amounts.rate,
7396                    identifier_variation_rate: dq.format_variations.identifiers.rate,
7397                    ..Default::default()
7398                },
7399                enable_duplicates: dq.duplicates.enabled,
7400                duplicates: datasynth_generators::DuplicateConfig {
7401                    duplicate_rate: dq.effective_duplicate_rate(),
7402                    ..Default::default()
7403                },
7404                enable_typos: dq.typos.enabled,
7405                typos: datasynth_generators::TypoConfig {
7406                    char_error_rate: dq.effective_typo_rate(),
7407                    ..Default::default()
7408                },
7409                enable_encoding_issues: dq.encoding_issues.enabled,
7410                encoding_issue_rate: dq.encoding_issues.rate,
7411                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
7412                track_statistics: true,
7413            }
7414        } else {
7415            DataQualityConfig::minimal()
7416        };
7417        let mut injector = DataQualityInjector::new(config);
7418
7419        // Wire country pack for locale-aware format baselines
7420        injector.set_country_pack(self.primary_pack().clone());
7421
7422        // Build context for missing value decisions
7423        let context = HashMap::new();
7424
7425        for entry in entries.iter_mut() {
7426            // Process header_text field (common target for typos)
7427            if let Some(text) = &entry.header.header_text {
7428                let processed = injector.process_text_field(
7429                    "header_text",
7430                    text,
7431                    &entry.header.document_id.to_string(),
7432                    &context,
7433                );
7434                match processed {
7435                    Some(new_text) if new_text != *text => {
7436                        entry.header.header_text = Some(new_text);
7437                    }
7438                    None => {
7439                        entry.header.header_text = None; // Missing value
7440                    }
7441                    _ => {}
7442                }
7443            }
7444
7445            // Process reference field
7446            if let Some(ref_text) = &entry.header.reference {
7447                let processed = injector.process_text_field(
7448                    "reference",
7449                    ref_text,
7450                    &entry.header.document_id.to_string(),
7451                    &context,
7452                );
7453                match processed {
7454                    Some(new_text) if new_text != *ref_text => {
7455                        entry.header.reference = Some(new_text);
7456                    }
7457                    None => {
7458                        entry.header.reference = None;
7459                    }
7460                    _ => {}
7461                }
7462            }
7463
7464            // Process user_persona field (potential for typos in user IDs)
7465            let user_persona = entry.header.user_persona.clone();
7466            if let Some(processed) = injector.process_text_field(
7467                "user_persona",
7468                &user_persona,
7469                &entry.header.document_id.to_string(),
7470                &context,
7471            ) {
7472                if processed != user_persona {
7473                    entry.header.user_persona = processed;
7474                }
7475            }
7476
7477            // Process line items
7478            for line in &mut entry.lines {
7479                // Process line description if present
7480                if let Some(ref text) = line.line_text {
7481                    let processed = injector.process_text_field(
7482                        "line_text",
7483                        text,
7484                        &entry.header.document_id.to_string(),
7485                        &context,
7486                    );
7487                    match processed {
7488                        Some(new_text) if new_text != *text => {
7489                            line.line_text = Some(new_text);
7490                        }
7491                        None => {
7492                            line.line_text = None;
7493                        }
7494                        _ => {}
7495                    }
7496                }
7497
7498                // Process cost_center if present
7499                if let Some(cc) = &line.cost_center {
7500                    let processed = injector.process_text_field(
7501                        "cost_center",
7502                        cc,
7503                        &entry.header.document_id.to_string(),
7504                        &context,
7505                    );
7506                    match processed {
7507                        Some(new_cc) if new_cc != *cc => {
7508                            line.cost_center = Some(new_cc);
7509                        }
7510                        None => {
7511                            line.cost_center = None;
7512                        }
7513                        _ => {}
7514                    }
7515                }
7516            }
7517
7518            if let Some(pb) = &pb {
7519                pb.inc(1);
7520            }
7521        }
7522
7523        if let Some(pb) = pb {
7524            pb.finish_with_message("Data quality injection complete");
7525        }
7526
7527        Ok(injector.stats().clone())
7528    }
7529
7530    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
7531    ///
7532    /// Creates complete audit documentation for each company in the configuration,
7533    /// following ISA standards:
7534    /// - ISA 210/220: Engagement acceptance and terms
7535    /// - ISA 230: Audit documentation (workpapers)
7536    /// - ISA 265: Control deficiencies (findings)
7537    /// - ISA 315/330: Risk assessment and response
7538    /// - ISA 500: Audit evidence
7539    /// - ISA 200: Professional judgment
7540    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
7541        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7542            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7543        let fiscal_year = start_date.year() as u16;
7544        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
7545
7546        // Calculate rough total revenue from entries for materiality
7547        let total_revenue: rust_decimal::Decimal = entries
7548            .iter()
7549            .flat_map(|e| e.lines.iter())
7550            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
7551            .map(|l| l.credit_amount)
7552            .sum();
7553
7554        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
7555        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
7556
7557        let mut snapshot = AuditSnapshot::default();
7558
7559        // Initialize generators
7560        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
7561        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
7562        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
7563        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
7564        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
7565        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
7566
7567        // Get list of accounts from CoA for risk assessment
7568        let accounts: Vec<String> = self
7569            .coa
7570            .as_ref()
7571            .map(|coa| {
7572                coa.get_postable_accounts()
7573                    .iter()
7574                    .map(|acc| acc.account_code().to_string())
7575                    .collect()
7576            })
7577            .unwrap_or_default();
7578
7579        // Generate engagements for each company
7580        for (i, company) in self.config.companies.iter().enumerate() {
7581            // Calculate company-specific revenue (proportional to volume weight)
7582            let company_revenue = total_revenue
7583                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
7584
7585            // Generate engagements for this company
7586            let engagements_for_company =
7587                self.phase_config.audit_engagements / self.config.companies.len().max(1);
7588            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
7589                1
7590            } else {
7591                0
7592            };
7593
7594            for _eng_idx in 0..(engagements_for_company + extra) {
7595                // Generate the engagement
7596                let mut engagement = engagement_gen.generate_engagement(
7597                    &company.code,
7598                    &company.name,
7599                    fiscal_year,
7600                    period_end,
7601                    company_revenue,
7602                    None, // Use default engagement type
7603                );
7604
7605                // Replace synthetic team IDs with real employee IDs from master data
7606                if !self.master_data.employees.is_empty() {
7607                    let emp_count = self.master_data.employees.len();
7608                    // Use employee IDs deterministically based on engagement index
7609                    let base = (i * 10 + _eng_idx) % emp_count;
7610                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
7611                        .employee_id
7612                        .clone();
7613                    engagement.engagement_manager_id = self.master_data.employees
7614                        [(base + 1) % emp_count]
7615                        .employee_id
7616                        .clone();
7617                    let real_team: Vec<String> = engagement
7618                        .team_member_ids
7619                        .iter()
7620                        .enumerate()
7621                        .map(|(j, _)| {
7622                            self.master_data.employees[(base + 2 + j) % emp_count]
7623                                .employee_id
7624                                .clone()
7625                        })
7626                        .collect();
7627                    engagement.team_member_ids = real_team;
7628                }
7629
7630                if let Some(pb) = &pb {
7631                    pb.inc(1);
7632                }
7633
7634                // Get team members from the engagement
7635                let team_members: Vec<String> = engagement.team_member_ids.clone();
7636
7637                // Generate workpapers for the engagement
7638                let workpapers =
7639                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
7640
7641                for wp in &workpapers {
7642                    if let Some(pb) = &pb {
7643                        pb.inc(1);
7644                    }
7645
7646                    // Generate evidence for each workpaper
7647                    let evidence = evidence_gen.generate_evidence_for_workpaper(
7648                        wp,
7649                        &team_members,
7650                        wp.preparer_date,
7651                    );
7652
7653                    for _ in &evidence {
7654                        if let Some(pb) = &pb {
7655                            pb.inc(1);
7656                        }
7657                    }
7658
7659                    snapshot.evidence.extend(evidence);
7660                }
7661
7662                // Generate risk assessments for the engagement
7663                let risks =
7664                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
7665
7666                for _ in &risks {
7667                    if let Some(pb) = &pb {
7668                        pb.inc(1);
7669                    }
7670                }
7671                snapshot.risk_assessments.extend(risks);
7672
7673                // Generate findings for the engagement
7674                let findings = finding_gen.generate_findings_for_engagement(
7675                    &engagement,
7676                    &workpapers,
7677                    &team_members,
7678                );
7679
7680                for _ in &findings {
7681                    if let Some(pb) = &pb {
7682                        pb.inc(1);
7683                    }
7684                }
7685                snapshot.findings.extend(findings);
7686
7687                // Generate professional judgments for the engagement
7688                let judgments =
7689                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
7690
7691                for _ in &judgments {
7692                    if let Some(pb) = &pb {
7693                        pb.inc(1);
7694                    }
7695                }
7696                snapshot.judgments.extend(judgments);
7697
7698                // Add workpapers after findings since findings need them
7699                snapshot.workpapers.extend(workpapers);
7700                snapshot.engagements.push(engagement);
7701            }
7702        }
7703
7704        if let Some(pb) = pb {
7705            pb.finish_with_message(format!(
7706                "Audit data: {} engagements, {} workpapers, {} evidence",
7707                snapshot.engagements.len(),
7708                snapshot.workpapers.len(),
7709                snapshot.evidence.len()
7710            ));
7711        }
7712
7713        Ok(snapshot)
7714    }
7715
7716    /// Export journal entries as graph data for ML training and network reconstruction.
7717    ///
7718    /// Builds a transaction graph where:
7719    /// - Nodes are GL accounts
7720    /// - Edges are money flows from credit to debit accounts
7721    /// - Edge attributes include amount, date, business process, anomaly flags
7722    fn export_graphs(
7723        &mut self,
7724        entries: &[JournalEntry],
7725        _coa: &Arc<ChartOfAccounts>,
7726        stats: &mut EnhancedGenerationStatistics,
7727    ) -> SynthResult<GraphExportSnapshot> {
7728        let pb = self.create_progress_bar(100, "Exporting Graphs");
7729
7730        let mut snapshot = GraphExportSnapshot::default();
7731
7732        // Get output directory
7733        let output_dir = self
7734            .output_path
7735            .clone()
7736            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7737        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7738
7739        // Process each graph type configuration
7740        for graph_type in &self.config.graph_export.graph_types {
7741            if let Some(pb) = &pb {
7742                pb.inc(10);
7743            }
7744
7745            // Build transaction graph
7746            let graph_config = TransactionGraphConfig {
7747                include_vendors: false,
7748                include_customers: false,
7749                create_debit_credit_edges: true,
7750                include_document_nodes: graph_type.include_document_nodes,
7751                min_edge_weight: graph_type.min_edge_weight,
7752                aggregate_parallel_edges: graph_type.aggregate_edges,
7753                framework: None,
7754            };
7755
7756            let mut builder = TransactionGraphBuilder::new(graph_config);
7757            builder.add_journal_entries(entries);
7758            let graph = builder.build();
7759
7760            // Update stats
7761            stats.graph_node_count += graph.node_count();
7762            stats.graph_edge_count += graph.edge_count();
7763
7764            if let Some(pb) = &pb {
7765                pb.inc(40);
7766            }
7767
7768            // Export to each configured format
7769            for format in &self.config.graph_export.formats {
7770                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
7771
7772                // Create output directory
7773                if let Err(e) = std::fs::create_dir_all(&format_dir) {
7774                    warn!("Failed to create graph output directory: {}", e);
7775                    continue;
7776                }
7777
7778                match format {
7779                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
7780                        let pyg_config = PyGExportConfig {
7781                            common: datasynth_graph::CommonExportConfig {
7782                                export_node_features: true,
7783                                export_edge_features: true,
7784                                export_node_labels: true,
7785                                export_edge_labels: true,
7786                                export_masks: true,
7787                                train_ratio: self.config.graph_export.train_ratio,
7788                                val_ratio: self.config.graph_export.validation_ratio,
7789                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7790                            },
7791                            one_hot_categoricals: false,
7792                        };
7793
7794                        let exporter = PyGExporter::new(pyg_config);
7795                        match exporter.export(&graph, &format_dir) {
7796                            Ok(metadata) => {
7797                                snapshot.exports.insert(
7798                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
7799                                    GraphExportInfo {
7800                                        name: graph_type.name.clone(),
7801                                        format: "pytorch_geometric".to_string(),
7802                                        output_path: format_dir.clone(),
7803                                        node_count: metadata.num_nodes,
7804                                        edge_count: metadata.num_edges,
7805                                    },
7806                                );
7807                                snapshot.graph_count += 1;
7808                            }
7809                            Err(e) => {
7810                                warn!("Failed to export PyTorch Geometric graph: {}", e);
7811                            }
7812                        }
7813                    }
7814                    datasynth_config::schema::GraphExportFormat::Neo4j => {
7815                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
7816
7817                        let neo4j_config = Neo4jExportConfig {
7818                            export_node_properties: true,
7819                            export_edge_properties: true,
7820                            export_features: true,
7821                            generate_cypher: true,
7822                            generate_admin_import: true,
7823                            database_name: "synth".to_string(),
7824                            cypher_batch_size: 1000,
7825                        };
7826
7827                        let exporter = Neo4jExporter::new(neo4j_config);
7828                        match exporter.export(&graph, &format_dir) {
7829                            Ok(metadata) => {
7830                                snapshot.exports.insert(
7831                                    format!("{}_{}", graph_type.name, "neo4j"),
7832                                    GraphExportInfo {
7833                                        name: graph_type.name.clone(),
7834                                        format: "neo4j".to_string(),
7835                                        output_path: format_dir.clone(),
7836                                        node_count: metadata.num_nodes,
7837                                        edge_count: metadata.num_edges,
7838                                    },
7839                                );
7840                                snapshot.graph_count += 1;
7841                            }
7842                            Err(e) => {
7843                                warn!("Failed to export Neo4j graph: {}", e);
7844                            }
7845                        }
7846                    }
7847                    datasynth_config::schema::GraphExportFormat::Dgl => {
7848                        use datasynth_graph::{DGLExportConfig, DGLExporter};
7849
7850                        let dgl_config = DGLExportConfig {
7851                            common: datasynth_graph::CommonExportConfig {
7852                                export_node_features: true,
7853                                export_edge_features: true,
7854                                export_node_labels: true,
7855                                export_edge_labels: true,
7856                                export_masks: true,
7857                                train_ratio: self.config.graph_export.train_ratio,
7858                                val_ratio: self.config.graph_export.validation_ratio,
7859                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7860                            },
7861                            heterogeneous: false,
7862                            include_pickle_script: true, // DGL ecosystem standard helper
7863                        };
7864
7865                        let exporter = DGLExporter::new(dgl_config);
7866                        match exporter.export(&graph, &format_dir) {
7867                            Ok(metadata) => {
7868                                snapshot.exports.insert(
7869                                    format!("{}_{}", graph_type.name, "dgl"),
7870                                    GraphExportInfo {
7871                                        name: graph_type.name.clone(),
7872                                        format: "dgl".to_string(),
7873                                        output_path: format_dir.clone(),
7874                                        node_count: metadata.common.num_nodes,
7875                                        edge_count: metadata.common.num_edges,
7876                                    },
7877                                );
7878                                snapshot.graph_count += 1;
7879                            }
7880                            Err(e) => {
7881                                warn!("Failed to export DGL graph: {}", e);
7882                            }
7883                        }
7884                    }
7885                    datasynth_config::schema::GraphExportFormat::RustGraph => {
7886                        use datasynth_graph::{
7887                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
7888                        };
7889
7890                        let rustgraph_config = RustGraphExportConfig {
7891                            include_features: true,
7892                            include_temporal: true,
7893                            include_labels: true,
7894                            source_name: "datasynth".to_string(),
7895                            batch_id: None,
7896                            output_format: RustGraphOutputFormat::JsonLines,
7897                            export_node_properties: true,
7898                            export_edge_properties: true,
7899                            pretty_print: false,
7900                        };
7901
7902                        let exporter = RustGraphExporter::new(rustgraph_config);
7903                        match exporter.export(&graph, &format_dir) {
7904                            Ok(metadata) => {
7905                                snapshot.exports.insert(
7906                                    format!("{}_{}", graph_type.name, "rustgraph"),
7907                                    GraphExportInfo {
7908                                        name: graph_type.name.clone(),
7909                                        format: "rustgraph".to_string(),
7910                                        output_path: format_dir.clone(),
7911                                        node_count: metadata.num_nodes,
7912                                        edge_count: metadata.num_edges,
7913                                    },
7914                                );
7915                                snapshot.graph_count += 1;
7916                            }
7917                            Err(e) => {
7918                                warn!("Failed to export RustGraph: {}", e);
7919                            }
7920                        }
7921                    }
7922                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
7923                        // Hypergraph export is handled separately in Phase 10b
7924                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
7925                    }
7926                }
7927            }
7928
7929            if let Some(pb) = &pb {
7930                pb.inc(40);
7931            }
7932        }
7933
7934        stats.graph_export_count = snapshot.graph_count;
7935        snapshot.exported = snapshot.graph_count > 0;
7936
7937        if let Some(pb) = pb {
7938            pb.finish_with_message(format!(
7939                "Graphs exported: {} graphs ({} nodes, {} edges)",
7940                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
7941            ));
7942        }
7943
7944        Ok(snapshot)
7945    }
7946
7947    /// Build additional graph types (banking, approval, entity) when relevant data
7948    /// is available. These run as a late phase because the data they need (banking
7949    /// snapshot, intercompany snapshot) is only generated after the main graph
7950    /// export phase.
7951    fn build_additional_graphs(
7952        &self,
7953        banking: &BankingSnapshot,
7954        intercompany: &IntercompanySnapshot,
7955        entries: &[JournalEntry],
7956        stats: &mut EnhancedGenerationStatistics,
7957    ) {
7958        let output_dir = self
7959            .output_path
7960            .clone()
7961            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7962        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7963
7964        // Banking graph: build when banking customers and transactions exist
7965        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
7966            info!("Phase 10c: Building banking network graph");
7967            let config = BankingGraphConfig::default();
7968            let mut builder = BankingGraphBuilder::new(config);
7969            builder.add_customers(&banking.customers);
7970            builder.add_accounts(&banking.accounts, &banking.customers);
7971            builder.add_transactions(&banking.transactions);
7972            let graph = builder.build();
7973
7974            let node_count = graph.node_count();
7975            let edge_count = graph.edge_count();
7976            stats.graph_node_count += node_count;
7977            stats.graph_edge_count += edge_count;
7978
7979            // Export as PyG if configured
7980            for format in &self.config.graph_export.formats {
7981                if matches!(
7982                    format,
7983                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
7984                ) {
7985                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
7986                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
7987                        warn!("Failed to create banking graph output dir: {}", e);
7988                        continue;
7989                    }
7990                    let pyg_config = PyGExportConfig::default();
7991                    let exporter = PyGExporter::new(pyg_config);
7992                    if let Err(e) = exporter.export(&graph, &format_dir) {
7993                        warn!("Failed to export banking graph as PyG: {}", e);
7994                    } else {
7995                        info!(
7996                            "Banking network graph exported: {} nodes, {} edges",
7997                            node_count, edge_count
7998                        );
7999                    }
8000                }
8001            }
8002        }
8003
8004        // Approval graph: build from journal entry approval workflows
8005        let approval_entries: Vec<_> = entries
8006            .iter()
8007            .filter(|je| je.header.approval_workflow.is_some())
8008            .collect();
8009
8010        if !approval_entries.is_empty() {
8011            info!(
8012                "Phase 10c: Building approval network graph ({} entries with approvals)",
8013                approval_entries.len()
8014            );
8015            let config = ApprovalGraphConfig::default();
8016            let mut builder = ApprovalGraphBuilder::new(config);
8017
8018            for je in &approval_entries {
8019                if let Some(ref wf) = je.header.approval_workflow {
8020                    for action in &wf.actions {
8021                        let record = datasynth_core::models::ApprovalRecord {
8022                            approval_id: format!(
8023                                "APR-{}-{}",
8024                                je.header.document_id, action.approval_level
8025                            ),
8026                            document_number: je.header.document_id.to_string(),
8027                            document_type: "JE".to_string(),
8028                            company_code: je.company_code().to_string(),
8029                            requester_id: wf.preparer_id.clone(),
8030                            requester_name: Some(wf.preparer_name.clone()),
8031                            approver_id: action.actor_id.clone(),
8032                            approver_name: action.actor_name.clone(),
8033                            approval_date: je.posting_date(),
8034                            action: format!("{:?}", action.action),
8035                            amount: wf.amount,
8036                            approval_limit: None,
8037                            comments: action.comments.clone(),
8038                            delegation_from: None,
8039                            is_auto_approved: false,
8040                        };
8041                        builder.add_approval(&record);
8042                    }
8043                }
8044            }
8045
8046            let graph = builder.build();
8047            let node_count = graph.node_count();
8048            let edge_count = graph.edge_count();
8049            stats.graph_node_count += node_count;
8050            stats.graph_edge_count += edge_count;
8051
8052            // Export as PyG if configured
8053            for format in &self.config.graph_export.formats {
8054                if matches!(
8055                    format,
8056                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8057                ) {
8058                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
8059                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8060                        warn!("Failed to create approval graph output dir: {}", e);
8061                        continue;
8062                    }
8063                    let pyg_config = PyGExportConfig::default();
8064                    let exporter = PyGExporter::new(pyg_config);
8065                    if let Err(e) = exporter.export(&graph, &format_dir) {
8066                        warn!("Failed to export approval graph as PyG: {}", e);
8067                    } else {
8068                        info!(
8069                            "Approval network graph exported: {} nodes, {} edges",
8070                            node_count, edge_count
8071                        );
8072                    }
8073                }
8074            }
8075        }
8076
8077        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
8078        if self.config.companies.len() >= 2 {
8079            info!(
8080                "Phase 10c: Building entity relationship graph ({} companies)",
8081                self.config.companies.len()
8082            );
8083
8084            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8085                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
8086
8087            // Map CompanyConfig → Company objects
8088            let parent_code = &self.config.companies[0].code;
8089            let mut companies: Vec<datasynth_core::models::Company> =
8090                Vec::with_capacity(self.config.companies.len());
8091
8092            // First company is the parent
8093            let first = &self.config.companies[0];
8094            companies.push(datasynth_core::models::Company::parent(
8095                &first.code,
8096                &first.name,
8097                &first.country,
8098                &first.currency,
8099            ));
8100
8101            // Remaining companies are subsidiaries (100% owned by parent)
8102            for cc in self.config.companies.iter().skip(1) {
8103                companies.push(datasynth_core::models::Company::subsidiary(
8104                    &cc.code,
8105                    &cc.name,
8106                    &cc.country,
8107                    &cc.currency,
8108                    parent_code,
8109                    rust_decimal::Decimal::from(100),
8110                ));
8111            }
8112
8113            // Build IntercompanyRelationship records (same logic as phase_intercompany)
8114            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
8115                self.config
8116                    .companies
8117                    .iter()
8118                    .skip(1)
8119                    .enumerate()
8120                    .map(|(i, cc)| {
8121                        let mut rel =
8122                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
8123                                format!("REL{:03}", i + 1),
8124                                parent_code.clone(),
8125                                cc.code.clone(),
8126                                rust_decimal::Decimal::from(100),
8127                                start_date,
8128                            );
8129                        rel.functional_currency = cc.currency.clone();
8130                        rel
8131                    })
8132                    .collect();
8133
8134            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
8135            builder.add_companies(&companies);
8136            builder.add_ownership_relationships(&relationships);
8137
8138            // Thread IC matched-pair transaction edges into the entity graph
8139            for pair in &intercompany.matched_pairs {
8140                builder.add_intercompany_edge(
8141                    &pair.seller_company,
8142                    &pair.buyer_company,
8143                    pair.amount,
8144                    &format!("{:?}", pair.transaction_type),
8145                );
8146            }
8147
8148            let graph = builder.build();
8149            let node_count = graph.node_count();
8150            let edge_count = graph.edge_count();
8151            stats.graph_node_count += node_count;
8152            stats.graph_edge_count += edge_count;
8153
8154            // Export as PyG if configured
8155            for format in &self.config.graph_export.formats {
8156                if matches!(
8157                    format,
8158                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8159                ) {
8160                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
8161                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8162                        warn!("Failed to create entity graph output dir: {}", e);
8163                        continue;
8164                    }
8165                    let pyg_config = PyGExportConfig::default();
8166                    let exporter = PyGExporter::new(pyg_config);
8167                    if let Err(e) = exporter.export(&graph, &format_dir) {
8168                        warn!("Failed to export entity graph as PyG: {}", e);
8169                    } else {
8170                        info!(
8171                            "Entity relationship graph exported: {} nodes, {} edges",
8172                            node_count, edge_count
8173                        );
8174                    }
8175                }
8176            }
8177        } else {
8178            debug!(
8179                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
8180                self.config.companies.len()
8181            );
8182        }
8183    }
8184
8185    /// Export a multi-layer hypergraph for RustGraph integration.
8186    ///
8187    /// Builds a 3-layer hypergraph:
8188    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
8189    /// - Layer 2: Process Events (all process family document flows + OCPM events)
8190    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
8191    #[allow(clippy::too_many_arguments)]
8192    fn export_hypergraph(
8193        &self,
8194        coa: &Arc<ChartOfAccounts>,
8195        entries: &[JournalEntry],
8196        document_flows: &DocumentFlowSnapshot,
8197        sourcing: &SourcingSnapshot,
8198        hr: &HrSnapshot,
8199        manufacturing: &ManufacturingSnapshot,
8200        banking: &BankingSnapshot,
8201        audit: &AuditSnapshot,
8202        financial_reporting: &FinancialReportingSnapshot,
8203        ocpm: &OcpmSnapshot,
8204        compliance: &ComplianceRegulationsSnapshot,
8205        stats: &mut EnhancedGenerationStatistics,
8206    ) -> SynthResult<HypergraphExportInfo> {
8207        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
8208        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
8209        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
8210        use datasynth_graph::models::hypergraph::AggregationStrategy;
8211
8212        let hg_settings = &self.config.graph_export.hypergraph;
8213
8214        // Parse aggregation strategy from config string
8215        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
8216            "truncate" => AggregationStrategy::Truncate,
8217            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
8218            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
8219            "importance_sample" => AggregationStrategy::ImportanceSample,
8220            _ => AggregationStrategy::PoolByCounterparty,
8221        };
8222
8223        let builder_config = HypergraphConfig {
8224            max_nodes: hg_settings.max_nodes,
8225            aggregation_strategy,
8226            include_coso: hg_settings.governance_layer.include_coso,
8227            include_controls: hg_settings.governance_layer.include_controls,
8228            include_sox: hg_settings.governance_layer.include_sox,
8229            include_vendors: hg_settings.governance_layer.include_vendors,
8230            include_customers: hg_settings.governance_layer.include_customers,
8231            include_employees: hg_settings.governance_layer.include_employees,
8232            include_p2p: hg_settings.process_layer.include_p2p,
8233            include_o2c: hg_settings.process_layer.include_o2c,
8234            include_s2c: hg_settings.process_layer.include_s2c,
8235            include_h2r: hg_settings.process_layer.include_h2r,
8236            include_mfg: hg_settings.process_layer.include_mfg,
8237            include_bank: hg_settings.process_layer.include_bank,
8238            include_audit: hg_settings.process_layer.include_audit,
8239            include_r2r: hg_settings.process_layer.include_r2r,
8240            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
8241            docs_per_counterparty_threshold: hg_settings
8242                .process_layer
8243                .docs_per_counterparty_threshold,
8244            include_accounts: hg_settings.accounting_layer.include_accounts,
8245            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
8246            include_cross_layer_edges: hg_settings.cross_layer.enabled,
8247            include_compliance: self.config.compliance_regulations.enabled,
8248        };
8249
8250        let mut builder = HypergraphBuilder::new(builder_config);
8251
8252        // Layer 1: Governance & Controls
8253        builder.add_coso_framework();
8254
8255        // Add controls if available (generated during JE generation)
8256        // Controls are generated per-company; we use the standard set
8257        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
8258            let controls = InternalControl::standard_controls();
8259            builder.add_controls(&controls);
8260        }
8261
8262        // Add master data
8263        builder.add_vendors(&self.master_data.vendors);
8264        builder.add_customers(&self.master_data.customers);
8265        builder.add_employees(&self.master_data.employees);
8266
8267        // Layer 2: Process Events (all process families)
8268        builder.add_p2p_documents(
8269            &document_flows.purchase_orders,
8270            &document_flows.goods_receipts,
8271            &document_flows.vendor_invoices,
8272            &document_flows.payments,
8273        );
8274        builder.add_o2c_documents(
8275            &document_flows.sales_orders,
8276            &document_flows.deliveries,
8277            &document_flows.customer_invoices,
8278        );
8279        builder.add_s2c_documents(
8280            &sourcing.sourcing_projects,
8281            &sourcing.qualifications,
8282            &sourcing.rfx_events,
8283            &sourcing.bids,
8284            &sourcing.bid_evaluations,
8285            &sourcing.contracts,
8286        );
8287        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
8288        builder.add_mfg_documents(
8289            &manufacturing.production_orders,
8290            &manufacturing.quality_inspections,
8291            &manufacturing.cycle_counts,
8292        );
8293        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
8294        builder.add_audit_documents(
8295            &audit.engagements,
8296            &audit.workpapers,
8297            &audit.findings,
8298            &audit.evidence,
8299            &audit.risk_assessments,
8300            &audit.judgments,
8301        );
8302        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
8303
8304        // OCPM events as hyperedges
8305        if let Some(ref event_log) = ocpm.event_log {
8306            builder.add_ocpm_events(event_log);
8307        }
8308
8309        // Compliance regulations as cross-layer nodes
8310        if self.config.compliance_regulations.enabled
8311            && hg_settings.governance_layer.include_controls
8312        {
8313            // Reconstruct ComplianceStandard objects from the registry
8314            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8315            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
8316                .standard_records
8317                .iter()
8318                .filter_map(|r| {
8319                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
8320                    registry.get(&sid).cloned()
8321                })
8322                .collect();
8323
8324            builder.add_compliance_regulations(
8325                &standards,
8326                &compliance.findings,
8327                &compliance.filings,
8328            );
8329        }
8330
8331        // Layer 3: Accounting Network
8332        builder.add_accounts(coa);
8333        builder.add_journal_entries_as_hyperedges(entries);
8334
8335        // Build the hypergraph
8336        let hypergraph = builder.build();
8337
8338        // Export
8339        let output_dir = self
8340            .output_path
8341            .clone()
8342            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8343        let hg_dir = output_dir
8344            .join(&self.config.graph_export.output_subdirectory)
8345            .join(&hg_settings.output_subdirectory);
8346
8347        // Branch on output format
8348        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
8349            "unified" => {
8350                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8351                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8352                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
8353                })?;
8354                (
8355                    metadata.num_nodes,
8356                    metadata.num_edges,
8357                    metadata.num_hyperedges,
8358                )
8359            }
8360            _ => {
8361                // "native" or any unrecognized format → use existing exporter
8362                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
8363                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8364                    SynthError::generation(format!("Hypergraph export failed: {e}"))
8365                })?;
8366                (
8367                    metadata.num_nodes,
8368                    metadata.num_edges,
8369                    metadata.num_hyperedges,
8370                )
8371            }
8372        };
8373
8374        // Stream to RustGraph ingest endpoint if configured
8375        #[cfg(feature = "streaming")]
8376        if let Some(ref target_url) = hg_settings.stream_target {
8377            use crate::stream_client::{StreamClient, StreamConfig};
8378            use std::io::Write as _;
8379
8380            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
8381            let stream_config = StreamConfig {
8382                target_url: target_url.clone(),
8383                batch_size: hg_settings.stream_batch_size,
8384                api_key,
8385                ..StreamConfig::default()
8386            };
8387
8388            match StreamClient::new(stream_config) {
8389                Ok(mut client) => {
8390                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8391                    match exporter.export_to_writer(&hypergraph, &mut client) {
8392                        Ok(_) => {
8393                            if let Err(e) = client.flush() {
8394                                warn!("Failed to flush stream client: {}", e);
8395                            } else {
8396                                info!("Streamed {} records to {}", client.total_sent(), target_url);
8397                            }
8398                        }
8399                        Err(e) => {
8400                            warn!("Streaming export failed: {}", e);
8401                        }
8402                    }
8403                }
8404                Err(e) => {
8405                    warn!("Failed to create stream client: {}", e);
8406                }
8407            }
8408        }
8409
8410        // Update stats
8411        stats.graph_node_count += num_nodes;
8412        stats.graph_edge_count += num_edges;
8413        stats.graph_export_count += 1;
8414
8415        Ok(HypergraphExportInfo {
8416            node_count: num_nodes,
8417            edge_count: num_edges,
8418            hyperedge_count: num_hyperedges,
8419            output_path: hg_dir,
8420        })
8421    }
8422
8423    /// Generate banking KYC/AML data.
8424    ///
8425    /// Creates banking customers, accounts, and transactions with AML typology injection.
8426    /// Uses the BankingOrchestrator from synth-banking crate.
8427    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
8428        let pb = self.create_progress_bar(100, "Generating Banking Data");
8429
8430        // Build the banking orchestrator from config
8431        let orchestrator = BankingOrchestratorBuilder::new()
8432            .config(self.config.banking.clone())
8433            .seed(self.seed + 9000)
8434            .country_pack(self.primary_pack().clone())
8435            .build();
8436
8437        if let Some(pb) = &pb {
8438            pb.inc(10);
8439        }
8440
8441        // Generate the banking data
8442        let result = orchestrator.generate();
8443
8444        if let Some(pb) = &pb {
8445            pb.inc(90);
8446            pb.finish_with_message(format!(
8447                "Banking: {} customers, {} transactions",
8448                result.customers.len(),
8449                result.transactions.len()
8450            ));
8451        }
8452
8453        // Cross-reference banking customers with core master data so that
8454        // banking customer names align with the enterprise customer list.
8455        // We rotate through core customers, overlaying their name and country
8456        // onto the generated banking customers where possible.
8457        let mut banking_customers = result.customers;
8458        let core_customers = &self.master_data.customers;
8459        if !core_customers.is_empty() {
8460            for (i, bc) in banking_customers.iter_mut().enumerate() {
8461                let core = &core_customers[i % core_customers.len()];
8462                bc.name = CustomerName::business(&core.name);
8463                bc.residence_country = core.country.clone();
8464                bc.enterprise_customer_id = Some(core.customer_id.clone());
8465            }
8466            debug!(
8467                "Cross-referenced {} banking customers with {} core customers",
8468                banking_customers.len(),
8469                core_customers.len()
8470            );
8471        }
8472
8473        Ok(BankingSnapshot {
8474            customers: banking_customers,
8475            accounts: result.accounts,
8476            transactions: result.transactions,
8477            transaction_labels: result.transaction_labels,
8478            customer_labels: result.customer_labels,
8479            account_labels: result.account_labels,
8480            relationship_labels: result.relationship_labels,
8481            narratives: result.narratives,
8482            suspicious_count: result.stats.suspicious_count,
8483            scenario_count: result.scenarios.len(),
8484        })
8485    }
8486
8487    /// Calculate total transactions to generate.
8488    fn calculate_total_transactions(&self) -> u64 {
8489        let months = self.config.global.period_months as f64;
8490        self.config
8491            .companies
8492            .iter()
8493            .map(|c| {
8494                let annual = c.annual_transaction_volume.count() as f64;
8495                let weighted = annual * c.volume_weight;
8496                (weighted * months / 12.0) as u64
8497            })
8498            .sum()
8499    }
8500
8501    /// Create a progress bar if progress display is enabled.
8502    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
8503        if !self.phase_config.show_progress {
8504            return None;
8505        }
8506
8507        let pb = if let Some(mp) = &self.multi_progress {
8508            mp.add(ProgressBar::new(total))
8509        } else {
8510            ProgressBar::new(total)
8511        };
8512
8513        pb.set_style(
8514            ProgressStyle::default_bar()
8515                .template(&format!(
8516                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
8517                ))
8518                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
8519                .progress_chars("#>-"),
8520        );
8521
8522        Some(pb)
8523    }
8524
8525    /// Get the generated chart of accounts.
8526    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
8527        self.coa.clone()
8528    }
8529
8530    /// Get the generated master data.
8531    pub fn get_master_data(&self) -> &MasterDataSnapshot {
8532        &self.master_data
8533    }
8534
8535    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
8536    fn phase_compliance_regulations(
8537        &mut self,
8538        _stats: &mut EnhancedGenerationStatistics,
8539    ) -> SynthResult<ComplianceRegulationsSnapshot> {
8540        if !self.phase_config.generate_compliance_regulations {
8541            return Ok(ComplianceRegulationsSnapshot::default());
8542        }
8543
8544        info!("Phase: Generating Compliance Regulations Data");
8545
8546        let cr_config = &self.config.compliance_regulations;
8547
8548        // Determine jurisdictions: from config or inferred from companies
8549        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
8550            self.config
8551                .companies
8552                .iter()
8553                .map(|c| c.country.clone())
8554                .collect::<std::collections::HashSet<_>>()
8555                .into_iter()
8556                .collect()
8557        } else {
8558            cr_config.jurisdictions.clone()
8559        };
8560
8561        // Determine reference date
8562        let fallback_date =
8563            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
8564        let reference_date = cr_config
8565            .reference_date
8566            .as_ref()
8567            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
8568            .unwrap_or_else(|| {
8569                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8570                    .unwrap_or(fallback_date)
8571            });
8572
8573        // Generate standards registry data
8574        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
8575        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
8576        let cross_reference_records = reg_gen.generate_cross_reference_records();
8577        let jurisdiction_records =
8578            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
8579
8580        info!(
8581            "  Standards: {} records, {} cross-references, {} jurisdictions",
8582            standard_records.len(),
8583            cross_reference_records.len(),
8584            jurisdiction_records.len()
8585        );
8586
8587        // Generate audit procedures (if enabled)
8588        let audit_procedures = if cr_config.audit_procedures.enabled {
8589            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
8590                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
8591                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
8592                confidence_level: cr_config.audit_procedures.confidence_level,
8593                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
8594            };
8595            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
8596                self.seed + 9000,
8597                proc_config,
8598            );
8599            let registry = reg_gen.registry();
8600            let mut all_procs = Vec::new();
8601            for jurisdiction in &jurisdictions {
8602                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
8603                all_procs.extend(procs);
8604            }
8605            info!("  Audit procedures: {}", all_procs.len());
8606            all_procs
8607        } else {
8608            Vec::new()
8609        };
8610
8611        // Generate compliance findings (if enabled)
8612        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
8613            let finding_config =
8614                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
8615                    finding_rate: cr_config.findings.finding_rate,
8616                    material_weakness_rate: cr_config.findings.material_weakness_rate,
8617                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
8618                    generate_remediation: cr_config.findings.generate_remediation,
8619                };
8620            let mut finding_gen =
8621                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
8622                    self.seed + 9100,
8623                    finding_config,
8624                );
8625            let mut all_findings = Vec::new();
8626            for company in &self.config.companies {
8627                let company_findings =
8628                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
8629                all_findings.extend(company_findings);
8630            }
8631            info!("  Compliance findings: {}", all_findings.len());
8632            all_findings
8633        } else {
8634            Vec::new()
8635        };
8636
8637        // Generate regulatory filings (if enabled)
8638        let filings = if cr_config.filings.enabled {
8639            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
8640                filing_types: cr_config.filings.filing_types.clone(),
8641                generate_status_progression: cr_config.filings.generate_status_progression,
8642            };
8643            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
8644                self.seed + 9200,
8645                filing_config,
8646            );
8647            let company_codes: Vec<String> = self
8648                .config
8649                .companies
8650                .iter()
8651                .map(|c| c.code.clone())
8652                .collect();
8653            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8654                .unwrap_or(fallback_date);
8655            let filings = filing_gen.generate_filings(
8656                &company_codes,
8657                &jurisdictions,
8658                start_date,
8659                self.config.global.period_months,
8660            );
8661            info!("  Regulatory filings: {}", filings.len());
8662            filings
8663        } else {
8664            Vec::new()
8665        };
8666
8667        // Build compliance graph (if enabled)
8668        let compliance_graph = if cr_config.graph.enabled {
8669            let graph_config = datasynth_graph::ComplianceGraphConfig {
8670                include_standard_nodes: cr_config.graph.include_compliance_nodes,
8671                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
8672                include_cross_references: cr_config.graph.include_cross_references,
8673                include_supersession_edges: cr_config.graph.include_supersession_edges,
8674                include_account_links: cr_config.graph.include_account_links,
8675                include_control_links: cr_config.graph.include_control_links,
8676                include_company_links: cr_config.graph.include_company_links,
8677            };
8678            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
8679
8680            // Add standard nodes
8681            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
8682                .iter()
8683                .map(|r| datasynth_graph::StandardNodeInput {
8684                    standard_id: r.standard_id.clone(),
8685                    title: r.title.clone(),
8686                    category: r.category.clone(),
8687                    domain: r.domain.clone(),
8688                    is_active: r.is_active,
8689                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
8690                    applicable_account_types: r.applicable_account_types.clone(),
8691                    applicable_processes: r.applicable_processes.clone(),
8692                })
8693                .collect();
8694            builder.add_standards(&standard_inputs);
8695
8696            // Add jurisdiction nodes
8697            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
8698                jurisdiction_records
8699                    .iter()
8700                    .map(|r| datasynth_graph::JurisdictionNodeInput {
8701                        country_code: r.country_code.clone(),
8702                        country_name: r.country_name.clone(),
8703                        framework: r.accounting_framework.clone(),
8704                        standard_count: r.standard_count,
8705                        tax_rate: r.statutory_tax_rate,
8706                    })
8707                    .collect();
8708            builder.add_jurisdictions(&jurisdiction_inputs);
8709
8710            // Add cross-reference edges
8711            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
8712                cross_reference_records
8713                    .iter()
8714                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
8715                        from_standard: r.from_standard.clone(),
8716                        to_standard: r.to_standard.clone(),
8717                        relationship: r.relationship.clone(),
8718                        convergence_level: r.convergence_level,
8719                    })
8720                    .collect();
8721            builder.add_cross_references(&xref_inputs);
8722
8723            // Add jurisdiction→standard mappings
8724            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
8725                .iter()
8726                .map(|r| datasynth_graph::JurisdictionMappingInput {
8727                    country_code: r.jurisdiction.clone(),
8728                    standard_id: r.standard_id.clone(),
8729                })
8730                .collect();
8731            builder.add_jurisdiction_mappings(&mapping_inputs);
8732
8733            // Add procedure nodes
8734            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
8735                .iter()
8736                .map(|p| datasynth_graph::ProcedureNodeInput {
8737                    procedure_id: p.procedure_id.clone(),
8738                    standard_id: p.standard_id.clone(),
8739                    procedure_type: p.procedure_type.clone(),
8740                    sample_size: p.sample_size,
8741                    confidence_level: p.confidence_level,
8742                })
8743                .collect();
8744            builder.add_procedures(&proc_inputs);
8745
8746            // Add finding nodes
8747            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
8748                .iter()
8749                .map(|f| datasynth_graph::FindingNodeInput {
8750                    finding_id: f.finding_id.to_string(),
8751                    standard_id: f
8752                        .related_standards
8753                        .first()
8754                        .map(|s| s.as_str().to_string())
8755                        .unwrap_or_default(),
8756                    severity: f.severity.to_string(),
8757                    deficiency_level: f.deficiency_level.to_string(),
8758                    severity_score: f.deficiency_level.severity_score(),
8759                    control_id: f.control_id.clone(),
8760                    affected_accounts: f.affected_accounts.clone(),
8761                })
8762                .collect();
8763            builder.add_findings(&finding_inputs);
8764
8765            // Cross-domain: link standards to accounts from chart of accounts
8766            if cr_config.graph.include_account_links {
8767                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
8768                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
8769                for std_record in &standard_records {
8770                    if let Some(std_obj) =
8771                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
8772                            &std_record.standard_id,
8773                        ))
8774                    {
8775                        for acct_type in &std_obj.applicable_account_types {
8776                            account_links.push(datasynth_graph::AccountLinkInput {
8777                                standard_id: std_record.standard_id.clone(),
8778                                account_code: acct_type.clone(),
8779                                account_name: acct_type.clone(),
8780                            });
8781                        }
8782                    }
8783                }
8784                builder.add_account_links(&account_links);
8785            }
8786
8787            // Cross-domain: link standards to internal controls
8788            if cr_config.graph.include_control_links {
8789                let mut control_links = Vec::new();
8790                // SOX/PCAOB standards link to all controls
8791                let sox_like_ids: Vec<String> = standard_records
8792                    .iter()
8793                    .filter(|r| {
8794                        r.standard_id.starts_with("SOX")
8795                            || r.standard_id.starts_with("PCAOB-AS-2201")
8796                    })
8797                    .map(|r| r.standard_id.clone())
8798                    .collect();
8799                // Get control IDs from config (C001-C060 standard controls)
8800                let control_ids = [
8801                    ("C001", "Cash Controls"),
8802                    ("C002", "Large Transaction Approval"),
8803                    ("C010", "PO Approval"),
8804                    ("C011", "Three-Way Match"),
8805                    ("C020", "Revenue Recognition"),
8806                    ("C021", "Credit Check"),
8807                    ("C030", "Manual JE Approval"),
8808                    ("C031", "Period Close Review"),
8809                    ("C032", "Account Reconciliation"),
8810                    ("C040", "Payroll Processing"),
8811                    ("C050", "Fixed Asset Capitalization"),
8812                    ("C060", "Intercompany Elimination"),
8813                ];
8814                for sox_id in &sox_like_ids {
8815                    for (ctrl_id, ctrl_name) in &control_ids {
8816                        control_links.push(datasynth_graph::ControlLinkInput {
8817                            standard_id: sox_id.clone(),
8818                            control_id: ctrl_id.to_string(),
8819                            control_name: ctrl_name.to_string(),
8820                        });
8821                    }
8822                }
8823                builder.add_control_links(&control_links);
8824            }
8825
8826            // Cross-domain: filing nodes with company links
8827            if cr_config.graph.include_company_links {
8828                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
8829                    .iter()
8830                    .enumerate()
8831                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
8832                        filing_id: format!("F{:04}", i + 1),
8833                        filing_type: f.filing_type.to_string(),
8834                        company_code: f.company_code.clone(),
8835                        jurisdiction: f.jurisdiction.clone(),
8836                        status: format!("{:?}", f.status),
8837                    })
8838                    .collect();
8839                builder.add_filings(&filing_inputs);
8840            }
8841
8842            let graph = builder.build();
8843            info!(
8844                "  Compliance graph: {} nodes, {} edges",
8845                graph.nodes.len(),
8846                graph.edges.len()
8847            );
8848            Some(graph)
8849        } else {
8850            None
8851        };
8852
8853        self.check_resources_with_log("post-compliance-regulations")?;
8854
8855        Ok(ComplianceRegulationsSnapshot {
8856            standard_records,
8857            cross_reference_records,
8858            jurisdiction_records,
8859            audit_procedures,
8860            findings,
8861            filings,
8862            compliance_graph,
8863        })
8864    }
8865
8866    /// Build a lineage graph describing config → phase → output relationships.
8867    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
8868        use super::lineage::LineageGraphBuilder;
8869
8870        let mut builder = LineageGraphBuilder::new();
8871
8872        // Config sections
8873        builder.add_config_section("config:global", "Global Config");
8874        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
8875        builder.add_config_section("config:transactions", "Transaction Config");
8876
8877        // Generator phases
8878        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
8879        builder.add_generator_phase("phase:je", "Journal Entry Generation");
8880
8881        // Config → phase edges
8882        builder.configured_by("phase:coa", "config:chart_of_accounts");
8883        builder.configured_by("phase:je", "config:transactions");
8884
8885        // Output files
8886        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
8887        builder.produced_by("output:je", "phase:je");
8888
8889        // Optional phases based on config
8890        if self.phase_config.generate_master_data {
8891            builder.add_config_section("config:master_data", "Master Data Config");
8892            builder.add_generator_phase("phase:master_data", "Master Data Generation");
8893            builder.configured_by("phase:master_data", "config:master_data");
8894            builder.input_to("phase:master_data", "phase:je");
8895        }
8896
8897        if self.phase_config.generate_document_flows {
8898            builder.add_config_section("config:document_flows", "Document Flow Config");
8899            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
8900            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
8901            builder.configured_by("phase:p2p", "config:document_flows");
8902            builder.configured_by("phase:o2c", "config:document_flows");
8903
8904            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
8905            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
8906            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
8907            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
8908            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
8909
8910            builder.produced_by("output:po", "phase:p2p");
8911            builder.produced_by("output:gr", "phase:p2p");
8912            builder.produced_by("output:vi", "phase:p2p");
8913            builder.produced_by("output:so", "phase:o2c");
8914            builder.produced_by("output:ci", "phase:o2c");
8915        }
8916
8917        if self.phase_config.inject_anomalies {
8918            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
8919            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
8920            builder.configured_by("phase:anomaly", "config:fraud");
8921            builder.add_output_file(
8922                "output:labels",
8923                "Anomaly Labels",
8924                "labels/anomaly_labels.csv",
8925            );
8926            builder.produced_by("output:labels", "phase:anomaly");
8927        }
8928
8929        if self.phase_config.generate_audit {
8930            builder.add_config_section("config:audit", "Audit Config");
8931            builder.add_generator_phase("phase:audit", "Audit Data Generation");
8932            builder.configured_by("phase:audit", "config:audit");
8933        }
8934
8935        if self.phase_config.generate_banking {
8936            builder.add_config_section("config:banking", "Banking Config");
8937            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
8938            builder.configured_by("phase:banking", "config:banking");
8939        }
8940
8941        if self.config.llm.enabled {
8942            builder.add_config_section("config:llm", "LLM Enrichment Config");
8943            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
8944            builder.configured_by("phase:llm_enrichment", "config:llm");
8945        }
8946
8947        if self.config.diffusion.enabled {
8948            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
8949            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
8950            builder.configured_by("phase:diffusion", "config:diffusion");
8951        }
8952
8953        if self.config.causal.enabled {
8954            builder.add_config_section("config:causal", "Causal Generation Config");
8955            builder.add_generator_phase("phase:causal", "Causal Overlay");
8956            builder.configured_by("phase:causal", "config:causal");
8957        }
8958
8959        builder.build()
8960    }
8961}
8962
8963/// Get the directory name for a graph export format.
8964fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
8965    match format {
8966        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
8967        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
8968        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
8969        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
8970        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
8971    }
8972}
8973
8974#[cfg(test)]
8975#[allow(clippy::unwrap_used)]
8976mod tests {
8977    use super::*;
8978    use datasynth_config::schema::*;
8979
8980    fn create_test_config() -> GeneratorConfig {
8981        GeneratorConfig {
8982            global: GlobalConfig {
8983                industry: IndustrySector::Manufacturing,
8984                start_date: "2024-01-01".to_string(),
8985                period_months: 1,
8986                seed: Some(42),
8987                parallel: false,
8988                group_currency: "USD".to_string(),
8989                worker_threads: 0,
8990                memory_limit_mb: 0,
8991                fiscal_year_months: None,
8992            },
8993            companies: vec![CompanyConfig {
8994                code: "1000".to_string(),
8995                name: "Test Company".to_string(),
8996                currency: "USD".to_string(),
8997                country: "US".to_string(),
8998                annual_transaction_volume: TransactionVolume::TenK,
8999                volume_weight: 1.0,
9000                fiscal_year_variant: "K4".to_string(),
9001            }],
9002            chart_of_accounts: ChartOfAccountsConfig {
9003                complexity: CoAComplexity::Small,
9004                industry_specific: true,
9005                custom_accounts: None,
9006                min_hierarchy_depth: 2,
9007                max_hierarchy_depth: 4,
9008            },
9009            transactions: TransactionConfig::default(),
9010            output: OutputConfig::default(),
9011            fraud: FraudConfig::default(),
9012            internal_controls: InternalControlsConfig::default(),
9013            business_processes: BusinessProcessConfig::default(),
9014            user_personas: UserPersonaConfig::default(),
9015            templates: TemplateConfig::default(),
9016            approval: ApprovalConfig::default(),
9017            departments: DepartmentConfig::default(),
9018            master_data: MasterDataConfig::default(),
9019            document_flows: DocumentFlowConfig::default(),
9020            intercompany: IntercompanyConfig::default(),
9021            balance: BalanceConfig::default(),
9022            ocpm: OcpmConfig::default(),
9023            audit: AuditGenerationConfig::default(),
9024            banking: datasynth_banking::BankingConfig::default(),
9025            data_quality: DataQualitySchemaConfig::default(),
9026            scenario: ScenarioConfig::default(),
9027            temporal: TemporalDriftConfig::default(),
9028            graph_export: GraphExportConfig::default(),
9029            streaming: StreamingSchemaConfig::default(),
9030            rate_limit: RateLimitSchemaConfig::default(),
9031            temporal_attributes: TemporalAttributeSchemaConfig::default(),
9032            relationships: RelationshipSchemaConfig::default(),
9033            accounting_standards: AccountingStandardsConfig::default(),
9034            audit_standards: AuditStandardsConfig::default(),
9035            distributions: Default::default(),
9036            temporal_patterns: Default::default(),
9037            vendor_network: VendorNetworkSchemaConfig::default(),
9038            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
9039            relationship_strength: RelationshipStrengthSchemaConfig::default(),
9040            cross_process_links: CrossProcessLinksSchemaConfig::default(),
9041            organizational_events: OrganizationalEventsSchemaConfig::default(),
9042            behavioral_drift: BehavioralDriftSchemaConfig::default(),
9043            market_drift: MarketDriftSchemaConfig::default(),
9044            drift_labeling: DriftLabelingSchemaConfig::default(),
9045            anomaly_injection: Default::default(),
9046            industry_specific: Default::default(),
9047            fingerprint_privacy: Default::default(),
9048            quality_gates: Default::default(),
9049            compliance: Default::default(),
9050            webhooks: Default::default(),
9051            llm: Default::default(),
9052            diffusion: Default::default(),
9053            causal: Default::default(),
9054            source_to_pay: Default::default(),
9055            financial_reporting: Default::default(),
9056            hr: Default::default(),
9057            manufacturing: Default::default(),
9058            sales_quotes: Default::default(),
9059            tax: Default::default(),
9060            treasury: Default::default(),
9061            project_accounting: Default::default(),
9062            esg: Default::default(),
9063            country_packs: None,
9064            scenarios: Default::default(),
9065            session: Default::default(),
9066            compliance_regulations: Default::default(),
9067        }
9068    }
9069
9070    #[test]
9071    fn test_enhanced_orchestrator_creation() {
9072        let config = create_test_config();
9073        let orchestrator = EnhancedOrchestrator::with_defaults(config);
9074        assert!(orchestrator.is_ok());
9075    }
9076
9077    #[test]
9078    fn test_minimal_generation() {
9079        let config = create_test_config();
9080        let phase_config = PhaseConfig {
9081            generate_master_data: false,
9082            generate_document_flows: false,
9083            generate_journal_entries: true,
9084            inject_anomalies: false,
9085            show_progress: false,
9086            ..Default::default()
9087        };
9088
9089        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9090        let result = orchestrator.generate();
9091
9092        assert!(result.is_ok());
9093        let result = result.unwrap();
9094        assert!(!result.journal_entries.is_empty());
9095    }
9096
9097    #[test]
9098    fn test_master_data_generation() {
9099        let config = create_test_config();
9100        let phase_config = PhaseConfig {
9101            generate_master_data: true,
9102            generate_document_flows: false,
9103            generate_journal_entries: false,
9104            inject_anomalies: false,
9105            show_progress: false,
9106            vendors_per_company: 5,
9107            customers_per_company: 5,
9108            materials_per_company: 10,
9109            assets_per_company: 5,
9110            employees_per_company: 10,
9111            ..Default::default()
9112        };
9113
9114        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9115        let result = orchestrator.generate().unwrap();
9116
9117        assert!(!result.master_data.vendors.is_empty());
9118        assert!(!result.master_data.customers.is_empty());
9119        assert!(!result.master_data.materials.is_empty());
9120    }
9121
9122    #[test]
9123    fn test_document_flow_generation() {
9124        let config = create_test_config();
9125        let phase_config = PhaseConfig {
9126            generate_master_data: true,
9127            generate_document_flows: true,
9128            generate_journal_entries: false,
9129            inject_anomalies: false,
9130            inject_data_quality: false,
9131            validate_balances: false,
9132            generate_ocpm_events: false,
9133            show_progress: false,
9134            vendors_per_company: 5,
9135            customers_per_company: 5,
9136            materials_per_company: 10,
9137            assets_per_company: 5,
9138            employees_per_company: 10,
9139            p2p_chains: 5,
9140            o2c_chains: 5,
9141            ..Default::default()
9142        };
9143
9144        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9145        let result = orchestrator.generate().unwrap();
9146
9147        // Should have generated P2P and O2C chains
9148        assert!(!result.document_flows.p2p_chains.is_empty());
9149        assert!(!result.document_flows.o2c_chains.is_empty());
9150
9151        // Flattened documents should be populated
9152        assert!(!result.document_flows.purchase_orders.is_empty());
9153        assert!(!result.document_flows.sales_orders.is_empty());
9154    }
9155
9156    #[test]
9157    fn test_anomaly_injection() {
9158        let config = create_test_config();
9159        let phase_config = PhaseConfig {
9160            generate_master_data: false,
9161            generate_document_flows: false,
9162            generate_journal_entries: true,
9163            inject_anomalies: true,
9164            show_progress: false,
9165            ..Default::default()
9166        };
9167
9168        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9169        let result = orchestrator.generate().unwrap();
9170
9171        // Should have journal entries
9172        assert!(!result.journal_entries.is_empty());
9173
9174        // With ~833 entries and 2% rate, expect some anomalies
9175        // Note: This is probabilistic, so we just verify the structure exists
9176        assert!(result.anomaly_labels.summary.is_some());
9177    }
9178
9179    #[test]
9180    fn test_full_generation_pipeline() {
9181        let config = create_test_config();
9182        let phase_config = PhaseConfig {
9183            generate_master_data: true,
9184            generate_document_flows: true,
9185            generate_journal_entries: true,
9186            inject_anomalies: false,
9187            inject_data_quality: false,
9188            validate_balances: true,
9189            generate_ocpm_events: false,
9190            show_progress: false,
9191            vendors_per_company: 3,
9192            customers_per_company: 3,
9193            materials_per_company: 5,
9194            assets_per_company: 3,
9195            employees_per_company: 5,
9196            p2p_chains: 3,
9197            o2c_chains: 3,
9198            ..Default::default()
9199        };
9200
9201        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9202        let result = orchestrator.generate().unwrap();
9203
9204        // All phases should have results
9205        assert!(!result.master_data.vendors.is_empty());
9206        assert!(!result.master_data.customers.is_empty());
9207        assert!(!result.document_flows.p2p_chains.is_empty());
9208        assert!(!result.document_flows.o2c_chains.is_empty());
9209        assert!(!result.journal_entries.is_empty());
9210        assert!(result.statistics.accounts_count > 0);
9211
9212        // Subledger linking should have run
9213        assert!(!result.subledger.ap_invoices.is_empty());
9214        assert!(!result.subledger.ar_invoices.is_empty());
9215
9216        // Balance validation should have run
9217        assert!(result.balance_validation.validated);
9218        assert!(result.balance_validation.entries_processed > 0);
9219    }
9220
9221    #[test]
9222    fn test_subledger_linking() {
9223        let config = create_test_config();
9224        let phase_config = PhaseConfig {
9225            generate_master_data: true,
9226            generate_document_flows: true,
9227            generate_journal_entries: false,
9228            inject_anomalies: false,
9229            inject_data_quality: false,
9230            validate_balances: false,
9231            generate_ocpm_events: false,
9232            show_progress: false,
9233            vendors_per_company: 5,
9234            customers_per_company: 5,
9235            materials_per_company: 10,
9236            assets_per_company: 3,
9237            employees_per_company: 5,
9238            p2p_chains: 5,
9239            o2c_chains: 5,
9240            ..Default::default()
9241        };
9242
9243        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9244        let result = orchestrator.generate().unwrap();
9245
9246        // Should have document flows
9247        assert!(!result.document_flows.vendor_invoices.is_empty());
9248        assert!(!result.document_flows.customer_invoices.is_empty());
9249
9250        // Subledger should be linked from document flows
9251        assert!(!result.subledger.ap_invoices.is_empty());
9252        assert!(!result.subledger.ar_invoices.is_empty());
9253
9254        // AP invoices count should match vendor invoices count
9255        assert_eq!(
9256            result.subledger.ap_invoices.len(),
9257            result.document_flows.vendor_invoices.len()
9258        );
9259
9260        // AR invoices count should match customer invoices count
9261        assert_eq!(
9262            result.subledger.ar_invoices.len(),
9263            result.document_flows.customer_invoices.len()
9264        );
9265
9266        // Statistics should reflect subledger counts
9267        assert_eq!(
9268            result.statistics.ap_invoice_count,
9269            result.subledger.ap_invoices.len()
9270        );
9271        assert_eq!(
9272            result.statistics.ar_invoice_count,
9273            result.subledger.ar_invoices.len()
9274        );
9275    }
9276
9277    #[test]
9278    fn test_balance_validation() {
9279        let config = create_test_config();
9280        let phase_config = PhaseConfig {
9281            generate_master_data: false,
9282            generate_document_flows: false,
9283            generate_journal_entries: true,
9284            inject_anomalies: false,
9285            validate_balances: true,
9286            show_progress: false,
9287            ..Default::default()
9288        };
9289
9290        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9291        let result = orchestrator.generate().unwrap();
9292
9293        // Balance validation should run
9294        assert!(result.balance_validation.validated);
9295        assert!(result.balance_validation.entries_processed > 0);
9296
9297        // Generated JEs should be balanced (no unbalanced entries)
9298        assert!(!result.balance_validation.has_unbalanced_entries);
9299
9300        // Total debits should equal total credits
9301        assert_eq!(
9302            result.balance_validation.total_debits,
9303            result.balance_validation.total_credits
9304        );
9305    }
9306
9307    #[test]
9308    fn test_statistics_accuracy() {
9309        let config = create_test_config();
9310        let phase_config = PhaseConfig {
9311            generate_master_data: true,
9312            generate_document_flows: false,
9313            generate_journal_entries: true,
9314            inject_anomalies: false,
9315            show_progress: false,
9316            vendors_per_company: 10,
9317            customers_per_company: 20,
9318            materials_per_company: 15,
9319            assets_per_company: 5,
9320            employees_per_company: 8,
9321            ..Default::default()
9322        };
9323
9324        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9325        let result = orchestrator.generate().unwrap();
9326
9327        // Statistics should match actual data
9328        assert_eq!(
9329            result.statistics.vendor_count,
9330            result.master_data.vendors.len()
9331        );
9332        assert_eq!(
9333            result.statistics.customer_count,
9334            result.master_data.customers.len()
9335        );
9336        assert_eq!(
9337            result.statistics.material_count,
9338            result.master_data.materials.len()
9339        );
9340        assert_eq!(
9341            result.statistics.total_entries as usize,
9342            result.journal_entries.len()
9343        );
9344    }
9345
9346    #[test]
9347    fn test_phase_config_defaults() {
9348        let config = PhaseConfig::default();
9349        assert!(config.generate_master_data);
9350        assert!(config.generate_document_flows);
9351        assert!(config.generate_journal_entries);
9352        assert!(!config.inject_anomalies);
9353        assert!(config.validate_balances);
9354        assert!(config.show_progress);
9355        assert!(config.vendors_per_company > 0);
9356        assert!(config.customers_per_company > 0);
9357    }
9358
9359    #[test]
9360    fn test_get_coa_before_generation() {
9361        let config = create_test_config();
9362        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
9363
9364        // Before generation, CoA should be None
9365        assert!(orchestrator.get_coa().is_none());
9366    }
9367
9368    #[test]
9369    fn test_get_coa_after_generation() {
9370        let config = create_test_config();
9371        let phase_config = PhaseConfig {
9372            generate_master_data: false,
9373            generate_document_flows: false,
9374            generate_journal_entries: true,
9375            inject_anomalies: false,
9376            show_progress: false,
9377            ..Default::default()
9378        };
9379
9380        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9381        let _ = orchestrator.generate().unwrap();
9382
9383        // After generation, CoA should be available
9384        assert!(orchestrator.get_coa().is_some());
9385    }
9386
9387    #[test]
9388    fn test_get_master_data() {
9389        let config = create_test_config();
9390        let phase_config = PhaseConfig {
9391            generate_master_data: true,
9392            generate_document_flows: false,
9393            generate_journal_entries: false,
9394            inject_anomalies: false,
9395            show_progress: false,
9396            vendors_per_company: 5,
9397            customers_per_company: 5,
9398            materials_per_company: 5,
9399            assets_per_company: 5,
9400            employees_per_company: 5,
9401            ..Default::default()
9402        };
9403
9404        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9405        let result = orchestrator.generate().unwrap();
9406
9407        // After generate(), master_data is moved into the result
9408        assert!(!result.master_data.vendors.is_empty());
9409    }
9410
9411    #[test]
9412    fn test_with_progress_builder() {
9413        let config = create_test_config();
9414        let orchestrator = EnhancedOrchestrator::with_defaults(config)
9415            .unwrap()
9416            .with_progress(false);
9417
9418        // Should still work without progress
9419        assert!(!orchestrator.phase_config.show_progress);
9420    }
9421
9422    #[test]
9423    fn test_multi_company_generation() {
9424        let mut config = create_test_config();
9425        config.companies.push(CompanyConfig {
9426            code: "2000".to_string(),
9427            name: "Subsidiary".to_string(),
9428            currency: "EUR".to_string(),
9429            country: "DE".to_string(),
9430            annual_transaction_volume: TransactionVolume::TenK,
9431            volume_weight: 0.5,
9432            fiscal_year_variant: "K4".to_string(),
9433        });
9434
9435        let phase_config = PhaseConfig {
9436            generate_master_data: true,
9437            generate_document_flows: false,
9438            generate_journal_entries: true,
9439            inject_anomalies: false,
9440            show_progress: false,
9441            vendors_per_company: 5,
9442            customers_per_company: 5,
9443            materials_per_company: 5,
9444            assets_per_company: 5,
9445            employees_per_company: 5,
9446            ..Default::default()
9447        };
9448
9449        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9450        let result = orchestrator.generate().unwrap();
9451
9452        // Should have master data for both companies
9453        assert!(result.statistics.vendor_count >= 10); // 5 per company
9454        assert!(result.statistics.customer_count >= 10);
9455        assert!(result.statistics.companies_count == 2);
9456    }
9457
9458    #[test]
9459    fn test_empty_master_data_skips_document_flows() {
9460        let config = create_test_config();
9461        let phase_config = PhaseConfig {
9462            generate_master_data: false,   // Skip master data
9463            generate_document_flows: true, // Try to generate flows
9464            generate_journal_entries: false,
9465            inject_anomalies: false,
9466            show_progress: false,
9467            ..Default::default()
9468        };
9469
9470        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9471        let result = orchestrator.generate().unwrap();
9472
9473        // Without master data, document flows should be empty
9474        assert!(result.document_flows.p2p_chains.is_empty());
9475        assert!(result.document_flows.o2c_chains.is_empty());
9476    }
9477
9478    #[test]
9479    fn test_journal_entry_line_item_count() {
9480        let config = create_test_config();
9481        let phase_config = PhaseConfig {
9482            generate_master_data: false,
9483            generate_document_flows: false,
9484            generate_journal_entries: true,
9485            inject_anomalies: false,
9486            show_progress: false,
9487            ..Default::default()
9488        };
9489
9490        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9491        let result = orchestrator.generate().unwrap();
9492
9493        // Total line items should match sum of all entry line counts
9494        let calculated_line_items: u64 = result
9495            .journal_entries
9496            .iter()
9497            .map(|e| e.line_count() as u64)
9498            .sum();
9499        assert_eq!(result.statistics.total_line_items, calculated_line_items);
9500    }
9501
9502    #[test]
9503    fn test_audit_generation() {
9504        let config = create_test_config();
9505        let phase_config = PhaseConfig {
9506            generate_master_data: false,
9507            generate_document_flows: false,
9508            generate_journal_entries: true,
9509            inject_anomalies: false,
9510            show_progress: false,
9511            generate_audit: true,
9512            audit_engagements: 2,
9513            workpapers_per_engagement: 5,
9514            evidence_per_workpaper: 2,
9515            risks_per_engagement: 3,
9516            findings_per_engagement: 2,
9517            judgments_per_engagement: 2,
9518            ..Default::default()
9519        };
9520
9521        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9522        let result = orchestrator.generate().unwrap();
9523
9524        // Should have generated audit data
9525        assert_eq!(result.audit.engagements.len(), 2);
9526        assert!(!result.audit.workpapers.is_empty());
9527        assert!(!result.audit.evidence.is_empty());
9528        assert!(!result.audit.risk_assessments.is_empty());
9529        assert!(!result.audit.findings.is_empty());
9530        assert!(!result.audit.judgments.is_empty());
9531
9532        // Statistics should match
9533        assert_eq!(
9534            result.statistics.audit_engagement_count,
9535            result.audit.engagements.len()
9536        );
9537        assert_eq!(
9538            result.statistics.audit_workpaper_count,
9539            result.audit.workpapers.len()
9540        );
9541        assert_eq!(
9542            result.statistics.audit_evidence_count,
9543            result.audit.evidence.len()
9544        );
9545        assert_eq!(
9546            result.statistics.audit_risk_count,
9547            result.audit.risk_assessments.len()
9548        );
9549        assert_eq!(
9550            result.statistics.audit_finding_count,
9551            result.audit.findings.len()
9552        );
9553        assert_eq!(
9554            result.statistics.audit_judgment_count,
9555            result.audit.judgments.len()
9556        );
9557    }
9558
9559    #[test]
9560    fn test_new_phases_disabled_by_default() {
9561        let config = create_test_config();
9562        // Verify new config fields default to disabled
9563        assert!(!config.llm.enabled);
9564        assert!(!config.diffusion.enabled);
9565        assert!(!config.causal.enabled);
9566
9567        let phase_config = PhaseConfig {
9568            generate_master_data: false,
9569            generate_document_flows: false,
9570            generate_journal_entries: true,
9571            inject_anomalies: false,
9572            show_progress: false,
9573            ..Default::default()
9574        };
9575
9576        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9577        let result = orchestrator.generate().unwrap();
9578
9579        // All new phase statistics should be zero when disabled
9580        assert_eq!(result.statistics.llm_enrichment_ms, 0);
9581        assert_eq!(result.statistics.llm_vendors_enriched, 0);
9582        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
9583        assert_eq!(result.statistics.diffusion_samples_generated, 0);
9584        assert_eq!(result.statistics.causal_generation_ms, 0);
9585        assert_eq!(result.statistics.causal_samples_generated, 0);
9586        assert!(result.statistics.causal_validation_passed.is_none());
9587        assert_eq!(result.statistics.counterfactual_pair_count, 0);
9588        assert!(result.counterfactual_pairs.is_empty());
9589    }
9590
9591    #[test]
9592    fn test_counterfactual_generation_enabled() {
9593        let config = create_test_config();
9594        let phase_config = PhaseConfig {
9595            generate_master_data: false,
9596            generate_document_flows: false,
9597            generate_journal_entries: true,
9598            inject_anomalies: false,
9599            show_progress: false,
9600            generate_counterfactuals: true,
9601            ..Default::default()
9602        };
9603
9604        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9605        let result = orchestrator.generate().unwrap();
9606
9607        // With JE generation enabled, counterfactual pairs should be generated
9608        if !result.journal_entries.is_empty() {
9609            assert_eq!(
9610                result.counterfactual_pairs.len(),
9611                result.journal_entries.len()
9612            );
9613            assert_eq!(
9614                result.statistics.counterfactual_pair_count,
9615                result.journal_entries.len()
9616            );
9617            // Each pair should have a distinct pair_id
9618            let ids: std::collections::HashSet<_> = result
9619                .counterfactual_pairs
9620                .iter()
9621                .map(|p| p.pair_id.clone())
9622                .collect();
9623            assert_eq!(ids.len(), result.counterfactual_pairs.len());
9624        }
9625    }
9626
9627    #[test]
9628    fn test_llm_enrichment_enabled() {
9629        let mut config = create_test_config();
9630        config.llm.enabled = true;
9631        config.llm.max_vendor_enrichments = 3;
9632
9633        let phase_config = PhaseConfig {
9634            generate_master_data: true,
9635            generate_document_flows: false,
9636            generate_journal_entries: false,
9637            inject_anomalies: false,
9638            show_progress: false,
9639            vendors_per_company: 5,
9640            customers_per_company: 3,
9641            materials_per_company: 3,
9642            assets_per_company: 3,
9643            employees_per_company: 3,
9644            ..Default::default()
9645        };
9646
9647        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9648        let result = orchestrator.generate().unwrap();
9649
9650        // LLM enrichment should have run
9651        assert!(result.statistics.llm_vendors_enriched > 0);
9652        assert!(result.statistics.llm_vendors_enriched <= 3);
9653    }
9654
9655    #[test]
9656    fn test_diffusion_enhancement_enabled() {
9657        let mut config = create_test_config();
9658        config.diffusion.enabled = true;
9659        config.diffusion.n_steps = 50;
9660        config.diffusion.sample_size = 20;
9661
9662        let phase_config = PhaseConfig {
9663            generate_master_data: false,
9664            generate_document_flows: false,
9665            generate_journal_entries: true,
9666            inject_anomalies: false,
9667            show_progress: false,
9668            ..Default::default()
9669        };
9670
9671        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9672        let result = orchestrator.generate().unwrap();
9673
9674        // Diffusion phase should have generated samples
9675        assert_eq!(result.statistics.diffusion_samples_generated, 20);
9676    }
9677
9678    #[test]
9679    fn test_causal_overlay_enabled() {
9680        let mut config = create_test_config();
9681        config.causal.enabled = true;
9682        config.causal.template = "fraud_detection".to_string();
9683        config.causal.sample_size = 100;
9684        config.causal.validate = true;
9685
9686        let phase_config = PhaseConfig {
9687            generate_master_data: false,
9688            generate_document_flows: false,
9689            generate_journal_entries: true,
9690            inject_anomalies: false,
9691            show_progress: false,
9692            ..Default::default()
9693        };
9694
9695        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9696        let result = orchestrator.generate().unwrap();
9697
9698        // Causal phase should have generated samples
9699        assert_eq!(result.statistics.causal_samples_generated, 100);
9700        // Validation should have run
9701        assert!(result.statistics.causal_validation_passed.is_some());
9702    }
9703
9704    #[test]
9705    fn test_causal_overlay_revenue_cycle_template() {
9706        let mut config = create_test_config();
9707        config.causal.enabled = true;
9708        config.causal.template = "revenue_cycle".to_string();
9709        config.causal.sample_size = 50;
9710        config.causal.validate = false;
9711
9712        let phase_config = PhaseConfig {
9713            generate_master_data: false,
9714            generate_document_flows: false,
9715            generate_journal_entries: true,
9716            inject_anomalies: false,
9717            show_progress: false,
9718            ..Default::default()
9719        };
9720
9721        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9722        let result = orchestrator.generate().unwrap();
9723
9724        // Causal phase should have generated samples
9725        assert_eq!(result.statistics.causal_samples_generated, 50);
9726        // Validation was disabled
9727        assert!(result.statistics.causal_validation_passed.is_none());
9728    }
9729
9730    #[test]
9731    fn test_all_new_phases_enabled_together() {
9732        let mut config = create_test_config();
9733        config.llm.enabled = true;
9734        config.llm.max_vendor_enrichments = 2;
9735        config.diffusion.enabled = true;
9736        config.diffusion.n_steps = 20;
9737        config.diffusion.sample_size = 10;
9738        config.causal.enabled = true;
9739        config.causal.sample_size = 50;
9740        config.causal.validate = true;
9741
9742        let phase_config = PhaseConfig {
9743            generate_master_data: true,
9744            generate_document_flows: false,
9745            generate_journal_entries: true,
9746            inject_anomalies: false,
9747            show_progress: false,
9748            vendors_per_company: 5,
9749            customers_per_company: 3,
9750            materials_per_company: 3,
9751            assets_per_company: 3,
9752            employees_per_company: 3,
9753            ..Default::default()
9754        };
9755
9756        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9757        let result = orchestrator.generate().unwrap();
9758
9759        // All three phases should have run
9760        assert!(result.statistics.llm_vendors_enriched > 0);
9761        assert_eq!(result.statistics.diffusion_samples_generated, 10);
9762        assert_eq!(result.statistics.causal_samples_generated, 50);
9763        assert!(result.statistics.causal_validation_passed.is_some());
9764    }
9765
9766    #[test]
9767    fn test_statistics_serialization_with_new_fields() {
9768        let stats = EnhancedGenerationStatistics {
9769            total_entries: 100,
9770            total_line_items: 500,
9771            llm_enrichment_ms: 42,
9772            llm_vendors_enriched: 10,
9773            diffusion_enhancement_ms: 100,
9774            diffusion_samples_generated: 50,
9775            causal_generation_ms: 200,
9776            causal_samples_generated: 100,
9777            causal_validation_passed: Some(true),
9778            ..Default::default()
9779        };
9780
9781        let json = serde_json::to_string(&stats).unwrap();
9782        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
9783
9784        assert_eq!(deserialized.llm_enrichment_ms, 42);
9785        assert_eq!(deserialized.llm_vendors_enriched, 10);
9786        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
9787        assert_eq!(deserialized.diffusion_samples_generated, 50);
9788        assert_eq!(deserialized.causal_generation_ms, 200);
9789        assert_eq!(deserialized.causal_samples_generated, 100);
9790        assert_eq!(deserialized.causal_validation_passed, Some(true));
9791    }
9792
9793    #[test]
9794    fn test_statistics_backward_compat_deserialization() {
9795        // Old JSON without the new fields should still deserialize
9796        let old_json = r#"{
9797            "total_entries": 100,
9798            "total_line_items": 500,
9799            "accounts_count": 50,
9800            "companies_count": 1,
9801            "period_months": 12,
9802            "vendor_count": 10,
9803            "customer_count": 20,
9804            "material_count": 15,
9805            "asset_count": 5,
9806            "employee_count": 8,
9807            "p2p_chain_count": 5,
9808            "o2c_chain_count": 5,
9809            "ap_invoice_count": 5,
9810            "ar_invoice_count": 5,
9811            "ocpm_event_count": 0,
9812            "ocpm_object_count": 0,
9813            "ocpm_case_count": 0,
9814            "audit_engagement_count": 0,
9815            "audit_workpaper_count": 0,
9816            "audit_evidence_count": 0,
9817            "audit_risk_count": 0,
9818            "audit_finding_count": 0,
9819            "audit_judgment_count": 0,
9820            "anomalies_injected": 0,
9821            "data_quality_issues": 0,
9822            "banking_customer_count": 0,
9823            "banking_account_count": 0,
9824            "banking_transaction_count": 0,
9825            "banking_suspicious_count": 0,
9826            "graph_export_count": 0,
9827            "graph_node_count": 0,
9828            "graph_edge_count": 0
9829        }"#;
9830
9831        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
9832
9833        // New fields should default to 0 / None
9834        assert_eq!(stats.llm_enrichment_ms, 0);
9835        assert_eq!(stats.llm_vendors_enriched, 0);
9836        assert_eq!(stats.diffusion_enhancement_ms, 0);
9837        assert_eq!(stats.diffusion_samples_generated, 0);
9838        assert_eq!(stats.causal_generation_ms, 0);
9839        assert_eq!(stats.causal_samples_generated, 0);
9840        assert!(stats.causal_validation_passed.is_none());
9841    }
9842}