Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
40};
41use datasynth_core::models::sourcing::{
42    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
43    SupplierBid, SupplierQualification, SupplierScorecard,
44};
45use datasynth_core::models::subledger::ap::APInvoice;
46use datasynth_core::models::subledger::ar::ARInvoice;
47use datasynth_core::models::*;
48use datasynth_core::traits::Generator;
49use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
50use datasynth_fingerprint::{
51    io::FingerprintReader,
52    models::Fingerprint,
53    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
54};
55use datasynth_generators::{
56    // Anomaly injection
57    AnomalyInjector,
58    AnomalyInjectorConfig,
59    AssetGenerator,
60    // Audit generators
61    AuditEngagementGenerator,
62    BalanceTrackerConfig,
63    // Bank reconciliation generator
64    BankReconciliationGenerator,
65    // S2C sourcing generators
66    BidEvaluationGenerator,
67    BidGenerator,
68    CatalogGenerator,
69    // Core generators
70    ChartOfAccountsGenerator,
71    ContractGenerator,
72    // Control generator
73    ControlGenerator,
74    ControlGeneratorConfig,
75    CustomerGenerator,
76    DataQualityConfig,
77    // Data quality
78    DataQualityInjector,
79    DataQualityStats,
80    // Document flow JE generator
81    DocumentFlowJeConfig,
82    DocumentFlowJeGenerator,
83    // Subledger linker
84    DocumentFlowLinker,
85    EmployeeGenerator,
86    // ESG anomaly labels
87    EsgAnomalyLabel,
88    EvidenceGenerator,
89    // Financial statement generator
90    FinancialStatementGenerator,
91    FindingGenerator,
92    JournalEntryGenerator,
93    JudgmentGenerator,
94    LatePaymentDistribution,
95    MaterialGenerator,
96    O2CDocumentChain,
97    O2CGenerator,
98    O2CGeneratorConfig,
99    O2CPaymentBehavior,
100    P2PDocumentChain,
101    // Document flow generators
102    P2PGenerator,
103    P2PGeneratorConfig,
104    P2PPaymentBehavior,
105    PaymentReference,
106    QualificationGenerator,
107    RfxGenerator,
108    RiskAssessmentGenerator,
109    // Balance validation
110    RunningBalanceTracker,
111    ScorecardGenerator,
112    SourcingProjectGenerator,
113    SpendAnalysisGenerator,
114    ValidationError,
115    // Master data generators
116    VendorGenerator,
117    WorkpaperGenerator,
118};
119use datasynth_graph::{
120    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
121    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
122    TransactionGraphConfig,
123};
124use datasynth_ocpm::{
125    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
126    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
127    OcpmUuidFactory, P2pDocuments, S2cDocuments,
128};
129
130use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
131use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
132use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
133use datasynth_core::llm::MockLlmProvider;
134use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
135use datasynth_core::models::documents::PaymentMethod;
136use datasynth_core::models::IndustrySector;
137use datasynth_generators::coa_generator::CoAFramework;
138use datasynth_generators::llm_enrichment::VendorLlmEnricher;
139use rayon::prelude::*;
140
141// ============================================================================
142// Configuration Conversion Functions
143// ============================================================================
144
145/// Convert P2P flow config from schema to generator config.
146fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
147    let payment_behavior = &schema_config.payment_behavior;
148    let late_dist = &payment_behavior.late_payment_days_distribution;
149
150    P2PGeneratorConfig {
151        three_way_match_rate: schema_config.three_way_match_rate,
152        partial_delivery_rate: schema_config.partial_delivery_rate,
153        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
154        price_variance_rate: schema_config.price_variance_rate,
155        max_price_variance_percent: schema_config.max_price_variance_percent,
156        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
157        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
158        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
159        payment_method_distribution: vec![
160            (PaymentMethod::BankTransfer, 0.60),
161            (PaymentMethod::Check, 0.25),
162            (PaymentMethod::Wire, 0.10),
163            (PaymentMethod::CreditCard, 0.05),
164        ],
165        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
166        payment_behavior: P2PPaymentBehavior {
167            late_payment_rate: payment_behavior.late_payment_rate,
168            late_payment_distribution: LatePaymentDistribution {
169                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
170                late_8_to_14: late_dist.late_8_to_14,
171                very_late_15_to_30: late_dist.very_late_15_to_30,
172                severely_late_31_to_60: late_dist.severely_late_31_to_60,
173                extremely_late_over_60: late_dist.extremely_late_over_60,
174            },
175            partial_payment_rate: payment_behavior.partial_payment_rate,
176            payment_correction_rate: payment_behavior.payment_correction_rate,
177            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
178        },
179    }
180}
181
182/// Convert O2C flow config from schema to generator config.
183fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
184    let payment_behavior = &schema_config.payment_behavior;
185
186    O2CGeneratorConfig {
187        credit_check_failure_rate: schema_config.credit_check_failure_rate,
188        partial_shipment_rate: schema_config.partial_shipment_rate,
189        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
190        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
191        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
192        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
193        bad_debt_rate: schema_config.bad_debt_rate,
194        returns_rate: schema_config.return_rate,
195        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
196        payment_method_distribution: vec![
197            (PaymentMethod::BankTransfer, 0.50),
198            (PaymentMethod::Check, 0.30),
199            (PaymentMethod::Wire, 0.15),
200            (PaymentMethod::CreditCard, 0.05),
201        ],
202        payment_behavior: O2CPaymentBehavior {
203            partial_payment_rate: payment_behavior.partial_payments.rate,
204            short_payment_rate: payment_behavior.short_payments.rate,
205            max_short_percent: payment_behavior.short_payments.max_short_percent,
206            on_account_rate: payment_behavior.on_account_payments.rate,
207            payment_correction_rate: payment_behavior.payment_corrections.rate,
208            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
209        },
210    }
211}
212
213/// Configuration for which generation phases to run.
214#[derive(Debug, Clone)]
215pub struct PhaseConfig {
216    /// Generate master data (vendors, customers, materials, assets, employees).
217    pub generate_master_data: bool,
218    /// Generate document flows (P2P, O2C).
219    pub generate_document_flows: bool,
220    /// Generate OCPM events from document flows.
221    pub generate_ocpm_events: bool,
222    /// Generate journal entries.
223    pub generate_journal_entries: bool,
224    /// Inject anomalies.
225    pub inject_anomalies: bool,
226    /// Inject data quality variations (typos, missing values, format variations).
227    pub inject_data_quality: bool,
228    /// Validate balance sheet equation after generation.
229    pub validate_balances: bool,
230    /// Show progress bars.
231    pub show_progress: bool,
232    /// Number of vendors to generate per company.
233    pub vendors_per_company: usize,
234    /// Number of customers to generate per company.
235    pub customers_per_company: usize,
236    /// Number of materials to generate per company.
237    pub materials_per_company: usize,
238    /// Number of assets to generate per company.
239    pub assets_per_company: usize,
240    /// Number of employees to generate per company.
241    pub employees_per_company: usize,
242    /// Number of P2P chains to generate.
243    pub p2p_chains: usize,
244    /// Number of O2C chains to generate.
245    pub o2c_chains: usize,
246    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
247    pub generate_audit: bool,
248    /// Number of audit engagements to generate.
249    pub audit_engagements: usize,
250    /// Number of workpapers per engagement.
251    pub workpapers_per_engagement: usize,
252    /// Number of evidence items per workpaper.
253    pub evidence_per_workpaper: usize,
254    /// Number of risk assessments per engagement.
255    pub risks_per_engagement: usize,
256    /// Number of findings per engagement.
257    pub findings_per_engagement: usize,
258    /// Number of professional judgments per engagement.
259    pub judgments_per_engagement: usize,
260    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
261    pub generate_banking: bool,
262    /// Generate graph exports (accounting network for ML training).
263    pub generate_graph_export: bool,
264    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
265    pub generate_sourcing: bool,
266    /// Generate bank reconciliations from payments.
267    pub generate_bank_reconciliation: bool,
268    /// Generate financial statements from trial balances.
269    pub generate_financial_statements: bool,
270    /// Generate accounting standards data (revenue recognition, impairment).
271    pub generate_accounting_standards: bool,
272    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
273    pub generate_manufacturing: bool,
274    /// Generate sales quotes, management KPIs, and budgets.
275    pub generate_sales_kpi_budgets: bool,
276    /// Generate tax jurisdictions and tax codes.
277    pub generate_tax: bool,
278    /// Generate ESG data (emissions, energy, water, waste, social, governance).
279    pub generate_esg: bool,
280    /// Generate intercompany transactions and eliminations.
281    pub generate_intercompany: bool,
282    /// Generate process evolution and organizational events.
283    pub generate_evolution_events: bool,
284    /// Generate counterfactual (original, mutated) JE pairs for ML training.
285    pub generate_counterfactuals: bool,
286}
287
288impl Default for PhaseConfig {
289    fn default() -> Self {
290        Self {
291            generate_master_data: true,
292            generate_document_flows: true,
293            generate_ocpm_events: false, // Off by default
294            generate_journal_entries: true,
295            inject_anomalies: false,
296            inject_data_quality: false, // Off by default (to preserve clean test data)
297            validate_balances: true,
298            show_progress: true,
299            vendors_per_company: 50,
300            customers_per_company: 100,
301            materials_per_company: 200,
302            assets_per_company: 50,
303            employees_per_company: 100,
304            p2p_chains: 100,
305            o2c_chains: 100,
306            generate_audit: false, // Off by default
307            audit_engagements: 5,
308            workpapers_per_engagement: 20,
309            evidence_per_workpaper: 5,
310            risks_per_engagement: 15,
311            findings_per_engagement: 8,
312            judgments_per_engagement: 10,
313            generate_banking: false,              // Off by default
314            generate_graph_export: false,         // Off by default
315            generate_sourcing: false,             // Off by default
316            generate_bank_reconciliation: false,  // Off by default
317            generate_financial_statements: false, // Off by default
318            generate_accounting_standards: false, // Off by default
319            generate_manufacturing: false,        // Off by default
320            generate_sales_kpi_budgets: false,    // Off by default
321            generate_tax: false,                  // Off by default
322            generate_esg: false,                  // Off by default
323            generate_intercompany: false,         // Off by default
324            generate_evolution_events: true,      // On by default
325            generate_counterfactuals: false,      // Off by default (opt-in for ML workloads)
326        }
327    }
328}
329
330/// Master data snapshot containing all generated entities.
331#[derive(Debug, Clone, Default)]
332pub struct MasterDataSnapshot {
333    /// Generated vendors.
334    pub vendors: Vec<Vendor>,
335    /// Generated customers.
336    pub customers: Vec<Customer>,
337    /// Generated materials.
338    pub materials: Vec<Material>,
339    /// Generated fixed assets.
340    pub assets: Vec<FixedAsset>,
341    /// Generated employees.
342    pub employees: Vec<Employee>,
343}
344
345/// Info about a completed hypergraph export.
346#[derive(Debug, Clone)]
347pub struct HypergraphExportInfo {
348    /// Number of nodes exported.
349    pub node_count: usize,
350    /// Number of pairwise edges exported.
351    pub edge_count: usize,
352    /// Number of hyperedges exported.
353    pub hyperedge_count: usize,
354    /// Output directory path.
355    pub output_path: PathBuf,
356}
357
358/// Document flow snapshot containing all generated document chains.
359#[derive(Debug, Clone, Default)]
360pub struct DocumentFlowSnapshot {
361    /// P2P document chains.
362    pub p2p_chains: Vec<P2PDocumentChain>,
363    /// O2C document chains.
364    pub o2c_chains: Vec<O2CDocumentChain>,
365    /// All purchase orders (flattened).
366    pub purchase_orders: Vec<documents::PurchaseOrder>,
367    /// All goods receipts (flattened).
368    pub goods_receipts: Vec<documents::GoodsReceipt>,
369    /// All vendor invoices (flattened).
370    pub vendor_invoices: Vec<documents::VendorInvoice>,
371    /// All sales orders (flattened).
372    pub sales_orders: Vec<documents::SalesOrder>,
373    /// All deliveries (flattened).
374    pub deliveries: Vec<documents::Delivery>,
375    /// All customer invoices (flattened).
376    pub customer_invoices: Vec<documents::CustomerInvoice>,
377    /// All payments (flattened).
378    pub payments: Vec<documents::Payment>,
379}
380
381/// Subledger snapshot containing generated subledger records.
382#[derive(Debug, Clone, Default)]
383pub struct SubledgerSnapshot {
384    /// AP invoices linked from document flow vendor invoices.
385    pub ap_invoices: Vec<APInvoice>,
386    /// AR invoices linked from document flow customer invoices.
387    pub ar_invoices: Vec<ARInvoice>,
388    /// FA subledger records (asset acquisitions from FA generator).
389    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
390    /// Inventory positions from inventory generator.
391    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
392    /// Inventory movements from inventory generator.
393    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
394}
395
396/// OCPM snapshot containing generated OCPM event log data.
397#[derive(Debug, Clone, Default)]
398pub struct OcpmSnapshot {
399    /// OCPM event log (if generated)
400    pub event_log: Option<OcpmEventLog>,
401    /// Number of events generated
402    pub event_count: usize,
403    /// Number of objects generated
404    pub object_count: usize,
405    /// Number of cases generated
406    pub case_count: usize,
407}
408
409/// Audit data snapshot containing all generated audit-related entities.
410#[derive(Debug, Clone, Default)]
411pub struct AuditSnapshot {
412    /// Audit engagements per ISA 210/220.
413    pub engagements: Vec<AuditEngagement>,
414    /// Workpapers per ISA 230.
415    pub workpapers: Vec<Workpaper>,
416    /// Audit evidence per ISA 500.
417    pub evidence: Vec<AuditEvidence>,
418    /// Risk assessments per ISA 315/330.
419    pub risk_assessments: Vec<RiskAssessment>,
420    /// Audit findings per ISA 265.
421    pub findings: Vec<AuditFinding>,
422    /// Professional judgments per ISA 200.
423    pub judgments: Vec<ProfessionalJudgment>,
424}
425
426/// Banking KYC/AML data snapshot containing all generated banking entities.
427#[derive(Debug, Clone, Default)]
428pub struct BankingSnapshot {
429    /// Banking customers (retail, business, trust).
430    pub customers: Vec<BankingCustomer>,
431    /// Bank accounts.
432    pub accounts: Vec<BankAccount>,
433    /// Bank transactions with AML labels.
434    pub transactions: Vec<BankTransaction>,
435    /// Transaction-level AML labels with features.
436    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
437    /// Customer-level AML labels.
438    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
439    /// Account-level AML labels.
440    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
441    /// Relationship-level AML labels.
442    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
443    /// Case narratives for AML scenarios.
444    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
445    /// Number of suspicious transactions.
446    pub suspicious_count: usize,
447    /// Number of AML scenarios generated.
448    pub scenario_count: usize,
449}
450
451/// Graph export snapshot containing exported graph metadata.
452#[derive(Debug, Clone, Default, Serialize)]
453pub struct GraphExportSnapshot {
454    /// Whether graph export was performed.
455    pub exported: bool,
456    /// Number of graphs exported.
457    pub graph_count: usize,
458    /// Exported graph metadata (by format name).
459    pub exports: HashMap<String, GraphExportInfo>,
460}
461
462/// Information about an exported graph.
463#[derive(Debug, Clone, Serialize)]
464pub struct GraphExportInfo {
465    /// Graph name.
466    pub name: String,
467    /// Export format (pytorch_geometric, neo4j, dgl).
468    pub format: String,
469    /// Output directory path.
470    pub output_path: PathBuf,
471    /// Number of nodes.
472    pub node_count: usize,
473    /// Number of edges.
474    pub edge_count: usize,
475}
476
477/// S2C sourcing data snapshot.
478#[derive(Debug, Clone, Default)]
479pub struct SourcingSnapshot {
480    /// Spend analyses.
481    pub spend_analyses: Vec<SpendAnalysis>,
482    /// Sourcing projects.
483    pub sourcing_projects: Vec<SourcingProject>,
484    /// Supplier qualifications.
485    pub qualifications: Vec<SupplierQualification>,
486    /// RFx events (RFI, RFP, RFQ).
487    pub rfx_events: Vec<RfxEvent>,
488    /// Supplier bids.
489    pub bids: Vec<SupplierBid>,
490    /// Bid evaluations.
491    pub bid_evaluations: Vec<BidEvaluation>,
492    /// Procurement contracts.
493    pub contracts: Vec<ProcurementContract>,
494    /// Catalog items.
495    pub catalog_items: Vec<CatalogItem>,
496    /// Supplier scorecards.
497    pub scorecards: Vec<SupplierScorecard>,
498}
499
500/// A single period's trial balance with metadata.
501#[derive(Debug, Clone, Serialize, Deserialize)]
502pub struct PeriodTrialBalance {
503    /// Fiscal year.
504    pub fiscal_year: u16,
505    /// Fiscal period (1-12).
506    pub fiscal_period: u8,
507    /// Period start date.
508    pub period_start: NaiveDate,
509    /// Period end date.
510    pub period_end: NaiveDate,
511    /// Trial balance entries for this period.
512    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
513}
514
515/// Financial reporting snapshot (financial statements + bank reconciliations).
516#[derive(Debug, Clone, Default)]
517pub struct FinancialReportingSnapshot {
518    /// Financial statements (balance sheet, income statement, cash flow).
519    pub financial_statements: Vec<FinancialStatement>,
520    /// Bank reconciliations.
521    pub bank_reconciliations: Vec<BankReconciliation>,
522    /// Period-close trial balances (one per period).
523    pub trial_balances: Vec<PeriodTrialBalance>,
524}
525
526/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments).
527#[derive(Debug, Clone, Default)]
528pub struct HrSnapshot {
529    /// Payroll runs (actual data).
530    pub payroll_runs: Vec<PayrollRun>,
531    /// Payroll line items (actual data).
532    pub payroll_line_items: Vec<PayrollLineItem>,
533    /// Time entries (actual data).
534    pub time_entries: Vec<TimeEntry>,
535    /// Expense reports (actual data).
536    pub expense_reports: Vec<ExpenseReport>,
537    /// Benefit enrollments (actual data).
538    pub benefit_enrollments: Vec<BenefitEnrollment>,
539    /// Payroll runs.
540    pub payroll_run_count: usize,
541    /// Payroll line item count.
542    pub payroll_line_item_count: usize,
543    /// Time entry count.
544    pub time_entry_count: usize,
545    /// Expense report count.
546    pub expense_report_count: usize,
547    /// Benefit enrollment count.
548    pub benefit_enrollment_count: usize,
549}
550
551/// Accounting standards data snapshot (revenue recognition, impairment).
552#[derive(Debug, Clone, Default)]
553pub struct AccountingStandardsSnapshot {
554    /// Revenue recognition contracts (actual data).
555    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
556    /// Impairment tests (actual data).
557    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
558    /// Revenue recognition contract count.
559    pub revenue_contract_count: usize,
560    /// Impairment test count.
561    pub impairment_test_count: usize,
562}
563
564/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
565#[derive(Debug, Clone, Default)]
566pub struct ManufacturingSnapshot {
567    /// Production orders (actual data).
568    pub production_orders: Vec<ProductionOrder>,
569    /// Quality inspections (actual data).
570    pub quality_inspections: Vec<QualityInspection>,
571    /// Cycle counts (actual data).
572    pub cycle_counts: Vec<CycleCount>,
573    /// BOM components (actual data).
574    pub bom_components: Vec<BomComponent>,
575    /// Inventory movements (actual data).
576    pub inventory_movements: Vec<InventoryMovement>,
577    /// Production order count.
578    pub production_order_count: usize,
579    /// Quality inspection count.
580    pub quality_inspection_count: usize,
581    /// Cycle count count.
582    pub cycle_count_count: usize,
583    /// BOM component count.
584    pub bom_component_count: usize,
585    /// Inventory movement count.
586    pub inventory_movement_count: usize,
587}
588
589/// Sales, KPI, and budget data snapshot.
590#[derive(Debug, Clone, Default)]
591pub struct SalesKpiBudgetsSnapshot {
592    /// Sales quotes (actual data).
593    pub sales_quotes: Vec<SalesQuote>,
594    /// Management KPIs (actual data).
595    pub kpis: Vec<ManagementKpi>,
596    /// Budgets (actual data).
597    pub budgets: Vec<Budget>,
598    /// Sales quote count.
599    pub sales_quote_count: usize,
600    /// Management KPI count.
601    pub kpi_count: usize,
602    /// Budget line count.
603    pub budget_line_count: usize,
604}
605
606/// Anomaly labels generated during injection.
607#[derive(Debug, Clone, Default)]
608pub struct AnomalyLabels {
609    /// All anomaly labels.
610    pub labels: Vec<LabeledAnomaly>,
611    /// Summary statistics.
612    pub summary: Option<AnomalySummary>,
613    /// Count by anomaly type.
614    pub by_type: HashMap<String, usize>,
615}
616
617/// Balance validation results from running balance tracker.
618#[derive(Debug, Clone, Default)]
619pub struct BalanceValidationResult {
620    /// Whether validation was performed.
621    pub validated: bool,
622    /// Whether balance sheet equation is satisfied.
623    pub is_balanced: bool,
624    /// Number of entries processed.
625    pub entries_processed: u64,
626    /// Total debits across all entries.
627    pub total_debits: rust_decimal::Decimal,
628    /// Total credits across all entries.
629    pub total_credits: rust_decimal::Decimal,
630    /// Number of accounts tracked.
631    pub accounts_tracked: usize,
632    /// Number of companies tracked.
633    pub companies_tracked: usize,
634    /// Validation errors encountered.
635    pub validation_errors: Vec<ValidationError>,
636    /// Whether any unbalanced entries were found.
637    pub has_unbalanced_entries: bool,
638}
639
640/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
641#[derive(Debug, Clone, Default)]
642pub struct TaxSnapshot {
643    /// Tax jurisdictions.
644    pub jurisdictions: Vec<TaxJurisdiction>,
645    /// Tax codes.
646    pub codes: Vec<TaxCode>,
647    /// Tax lines computed on documents.
648    pub tax_lines: Vec<TaxLine>,
649    /// Tax returns filed per period.
650    pub tax_returns: Vec<TaxReturn>,
651    /// Tax provisions.
652    pub tax_provisions: Vec<TaxProvision>,
653    /// Withholding tax records.
654    pub withholding_records: Vec<WithholdingTaxRecord>,
655    /// Tax anomaly labels.
656    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
657    /// Jurisdiction count.
658    pub jurisdiction_count: usize,
659    /// Code count.
660    pub code_count: usize,
661}
662
663/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
664#[derive(Debug, Clone, Default, Serialize, Deserialize)]
665pub struct IntercompanySnapshot {
666    /// IC matched pairs (transaction pairs between related entities).
667    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
668    /// IC journal entries generated from matched pairs (seller side).
669    pub seller_journal_entries: Vec<JournalEntry>,
670    /// IC journal entries generated from matched pairs (buyer side).
671    pub buyer_journal_entries: Vec<JournalEntry>,
672    /// Elimination entries for consolidation.
673    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
674    /// IC matched pair count.
675    pub matched_pair_count: usize,
676    /// IC elimination entry count.
677    pub elimination_entry_count: usize,
678    /// IC matching rate (0.0 to 1.0).
679    pub match_rate: f64,
680}
681
682/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
683#[derive(Debug, Clone, Default)]
684pub struct EsgSnapshot {
685    /// Emission records (scope 1, 2, 3).
686    pub emissions: Vec<EmissionRecord>,
687    /// Energy consumption records.
688    pub energy: Vec<EnergyConsumption>,
689    /// Water usage records.
690    pub water: Vec<WaterUsage>,
691    /// Waste records.
692    pub waste: Vec<WasteRecord>,
693    /// Workforce diversity metrics.
694    pub diversity: Vec<WorkforceDiversityMetric>,
695    /// Pay equity metrics.
696    pub pay_equity: Vec<PayEquityMetric>,
697    /// Safety incidents.
698    pub safety_incidents: Vec<SafetyIncident>,
699    /// Safety metrics.
700    pub safety_metrics: Vec<SafetyMetric>,
701    /// Governance metrics.
702    pub governance: Vec<GovernanceMetric>,
703    /// Supplier ESG assessments.
704    pub supplier_assessments: Vec<SupplierEsgAssessment>,
705    /// Materiality assessments.
706    pub materiality: Vec<MaterialityAssessment>,
707    /// ESG disclosures.
708    pub disclosures: Vec<EsgDisclosure>,
709    /// Climate scenarios.
710    pub climate_scenarios: Vec<ClimateScenario>,
711    /// ESG anomaly labels.
712    pub anomaly_labels: Vec<EsgAnomalyLabel>,
713    /// Total emission record count.
714    pub emission_count: usize,
715    /// Total disclosure count.
716    pub disclosure_count: usize,
717}
718
719/// Treasury data snapshot (cash management, hedging, debt, pooling).
720#[derive(Debug, Clone, Default)]
721pub struct TreasurySnapshot {
722    /// Cash positions (daily balances per account).
723    pub cash_positions: Vec<CashPosition>,
724    /// Cash forecasts.
725    pub cash_forecasts: Vec<CashForecast>,
726    /// Cash pools.
727    pub cash_pools: Vec<CashPool>,
728    /// Cash pool sweep transactions.
729    pub cash_pool_sweeps: Vec<CashPoolSweep>,
730    /// Hedging instruments.
731    pub hedging_instruments: Vec<HedgingInstrument>,
732    /// Hedge relationships (ASC 815/IFRS 9 designations).
733    pub hedge_relationships: Vec<HedgeRelationship>,
734    /// Debt instruments.
735    pub debt_instruments: Vec<DebtInstrument>,
736    /// Bank guarantees and letters of credit.
737    pub bank_guarantees: Vec<BankGuarantee>,
738    /// Intercompany netting runs.
739    pub netting_runs: Vec<NettingRun>,
740    /// Treasury anomaly labels.
741    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
742}
743
744/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
745#[derive(Debug, Clone, Default)]
746pub struct ProjectAccountingSnapshot {
747    /// Projects with WBS hierarchies.
748    pub projects: Vec<Project>,
749    /// Project cost lines (linked from source documents).
750    pub cost_lines: Vec<ProjectCostLine>,
751    /// Revenue recognition records.
752    pub revenue_records: Vec<ProjectRevenue>,
753    /// Earned value metrics.
754    pub earned_value_metrics: Vec<EarnedValueMetric>,
755    /// Change orders.
756    pub change_orders: Vec<ChangeOrder>,
757    /// Project milestones.
758    pub milestones: Vec<ProjectMilestone>,
759}
760
761/// Complete result of enhanced generation run.
762#[derive(Debug)]
763pub struct EnhancedGenerationResult {
764    /// Generated chart of accounts.
765    pub chart_of_accounts: ChartOfAccounts,
766    /// Master data snapshot.
767    pub master_data: MasterDataSnapshot,
768    /// Document flow snapshot.
769    pub document_flows: DocumentFlowSnapshot,
770    /// Subledger snapshot (linked from document flows).
771    pub subledger: SubledgerSnapshot,
772    /// OCPM event log snapshot (if OCPM generation enabled).
773    pub ocpm: OcpmSnapshot,
774    /// Audit data snapshot (if audit generation enabled).
775    pub audit: AuditSnapshot,
776    /// Banking KYC/AML data snapshot (if banking generation enabled).
777    pub banking: BankingSnapshot,
778    /// Graph export snapshot (if graph export enabled).
779    pub graph_export: GraphExportSnapshot,
780    /// S2C sourcing data snapshot (if sourcing generation enabled).
781    pub sourcing: SourcingSnapshot,
782    /// Financial reporting snapshot (financial statements + bank reconciliations).
783    pub financial_reporting: FinancialReportingSnapshot,
784    /// HR data snapshot (payroll, time entries, expenses).
785    pub hr: HrSnapshot,
786    /// Accounting standards snapshot (revenue recognition, impairment).
787    pub accounting_standards: AccountingStandardsSnapshot,
788    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
789    pub manufacturing: ManufacturingSnapshot,
790    /// Sales, KPI, and budget snapshot.
791    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
792    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
793    pub tax: TaxSnapshot,
794    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
795    pub esg: EsgSnapshot,
796    /// Treasury data snapshot (cash management, hedging, debt).
797    pub treasury: TreasurySnapshot,
798    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
799    pub project_accounting: ProjectAccountingSnapshot,
800    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
801    pub process_evolution: Vec<ProcessEvolutionEvent>,
802    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
803    pub organizational_events: Vec<OrganizationalEvent>,
804    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
805    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
806    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
807    pub intercompany: IntercompanySnapshot,
808    /// Generated journal entries.
809    pub journal_entries: Vec<JournalEntry>,
810    /// Anomaly labels (if injection enabled).
811    pub anomaly_labels: AnomalyLabels,
812    /// Balance validation results (if validation enabled).
813    pub balance_validation: BalanceValidationResult,
814    /// Data quality statistics (if injection enabled).
815    pub data_quality_stats: DataQualityStats,
816    /// Generation statistics.
817    pub statistics: EnhancedGenerationStatistics,
818    /// Data lineage graph (if tracking enabled).
819    pub lineage: Option<super::lineage::LineageGraph>,
820    /// Quality gate evaluation result.
821    pub gate_result: Option<datasynth_eval::gates::GateResult>,
822    /// Internal controls (if controls generation enabled).
823    pub internal_controls: Vec<InternalControl>,
824    /// Opening balances (if opening balance generation enabled).
825    pub opening_balances: Vec<GeneratedOpeningBalance>,
826    /// GL-to-subledger reconciliation results (if reconciliation enabled).
827    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
828    /// Counterfactual (original, mutated) JE pairs for ML training.
829    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
830    /// Fraud red-flag indicators on P2P/O2C documents.
831    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
832    /// Collusion rings (coordinated fraud networks).
833    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
834    /// Bi-temporal version chains for vendor entities.
835    pub temporal_vendor_chains:
836        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
837    /// Entity relationship graph (nodes + edges with strength scores).
838    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
839    /// Cross-process links (P2P ↔ O2C via inventory movements).
840    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
841    /// Industry-specific GL accounts and metadata.
842    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
843}
844
845/// Enhanced statistics about a generation run.
846#[derive(Debug, Clone, Default, Serialize, Deserialize)]
847pub struct EnhancedGenerationStatistics {
848    /// Total journal entries generated.
849    pub total_entries: u64,
850    /// Total line items generated.
851    pub total_line_items: u64,
852    /// Number of accounts in CoA.
853    pub accounts_count: usize,
854    /// Number of companies.
855    pub companies_count: usize,
856    /// Period in months.
857    pub period_months: u32,
858    /// Master data counts.
859    pub vendor_count: usize,
860    pub customer_count: usize,
861    pub material_count: usize,
862    pub asset_count: usize,
863    pub employee_count: usize,
864    /// Document flow counts.
865    pub p2p_chain_count: usize,
866    pub o2c_chain_count: usize,
867    /// Subledger counts.
868    pub ap_invoice_count: usize,
869    pub ar_invoice_count: usize,
870    /// OCPM counts.
871    pub ocpm_event_count: usize,
872    pub ocpm_object_count: usize,
873    pub ocpm_case_count: usize,
874    /// Audit counts.
875    pub audit_engagement_count: usize,
876    pub audit_workpaper_count: usize,
877    pub audit_evidence_count: usize,
878    pub audit_risk_count: usize,
879    pub audit_finding_count: usize,
880    pub audit_judgment_count: usize,
881    /// Anomaly counts.
882    pub anomalies_injected: usize,
883    /// Data quality issue counts.
884    pub data_quality_issues: usize,
885    /// Banking counts.
886    pub banking_customer_count: usize,
887    pub banking_account_count: usize,
888    pub banking_transaction_count: usize,
889    pub banking_suspicious_count: usize,
890    /// Graph export counts.
891    pub graph_export_count: usize,
892    pub graph_node_count: usize,
893    pub graph_edge_count: usize,
894    /// LLM enrichment timing (milliseconds).
895    #[serde(default)]
896    pub llm_enrichment_ms: u64,
897    /// Number of vendor names enriched by LLM.
898    #[serde(default)]
899    pub llm_vendors_enriched: usize,
900    /// Diffusion enhancement timing (milliseconds).
901    #[serde(default)]
902    pub diffusion_enhancement_ms: u64,
903    /// Number of diffusion samples generated.
904    #[serde(default)]
905    pub diffusion_samples_generated: usize,
906    /// Causal generation timing (milliseconds).
907    #[serde(default)]
908    pub causal_generation_ms: u64,
909    /// Number of causal samples generated.
910    #[serde(default)]
911    pub causal_samples_generated: usize,
912    /// Whether causal validation passed.
913    #[serde(default)]
914    pub causal_validation_passed: Option<bool>,
915    /// S2C sourcing counts.
916    #[serde(default)]
917    pub sourcing_project_count: usize,
918    #[serde(default)]
919    pub rfx_event_count: usize,
920    #[serde(default)]
921    pub bid_count: usize,
922    #[serde(default)]
923    pub contract_count: usize,
924    #[serde(default)]
925    pub catalog_item_count: usize,
926    #[serde(default)]
927    pub scorecard_count: usize,
928    /// Financial reporting counts.
929    #[serde(default)]
930    pub financial_statement_count: usize,
931    #[serde(default)]
932    pub bank_reconciliation_count: usize,
933    /// HR counts.
934    #[serde(default)]
935    pub payroll_run_count: usize,
936    #[serde(default)]
937    pub time_entry_count: usize,
938    #[serde(default)]
939    pub expense_report_count: usize,
940    #[serde(default)]
941    pub benefit_enrollment_count: usize,
942    /// Accounting standards counts.
943    #[serde(default)]
944    pub revenue_contract_count: usize,
945    #[serde(default)]
946    pub impairment_test_count: usize,
947    /// Manufacturing counts.
948    #[serde(default)]
949    pub production_order_count: usize,
950    #[serde(default)]
951    pub quality_inspection_count: usize,
952    #[serde(default)]
953    pub cycle_count_count: usize,
954    #[serde(default)]
955    pub bom_component_count: usize,
956    #[serde(default)]
957    pub inventory_movement_count: usize,
958    /// Sales & reporting counts.
959    #[serde(default)]
960    pub sales_quote_count: usize,
961    #[serde(default)]
962    pub kpi_count: usize,
963    #[serde(default)]
964    pub budget_line_count: usize,
965    /// Tax counts.
966    #[serde(default)]
967    pub tax_jurisdiction_count: usize,
968    #[serde(default)]
969    pub tax_code_count: usize,
970    /// ESG counts.
971    #[serde(default)]
972    pub esg_emission_count: usize,
973    #[serde(default)]
974    pub esg_disclosure_count: usize,
975    /// Intercompany counts.
976    #[serde(default)]
977    pub ic_matched_pair_count: usize,
978    #[serde(default)]
979    pub ic_elimination_count: usize,
980    /// Number of intercompany journal entries (seller + buyer side).
981    #[serde(default)]
982    pub ic_transaction_count: usize,
983    /// Number of fixed asset subledger records.
984    #[serde(default)]
985    pub fa_subledger_count: usize,
986    /// Number of inventory subledger records.
987    #[serde(default)]
988    pub inventory_subledger_count: usize,
989    /// Treasury debt instrument count.
990    #[serde(default)]
991    pub treasury_debt_instrument_count: usize,
992    /// Treasury hedging instrument count.
993    #[serde(default)]
994    pub treasury_hedging_instrument_count: usize,
995    /// Project accounting project count.
996    #[serde(default)]
997    pub project_count: usize,
998    /// Project accounting change order count.
999    #[serde(default)]
1000    pub project_change_order_count: usize,
1001    /// Tax provision count.
1002    #[serde(default)]
1003    pub tax_provision_count: usize,
1004    /// Opening balance count.
1005    #[serde(default)]
1006    pub opening_balance_count: usize,
1007    /// Subledger reconciliation count.
1008    #[serde(default)]
1009    pub subledger_reconciliation_count: usize,
1010    /// Tax line count.
1011    #[serde(default)]
1012    pub tax_line_count: usize,
1013    /// Project cost line count.
1014    #[serde(default)]
1015    pub project_cost_line_count: usize,
1016    /// Cash position count.
1017    #[serde(default)]
1018    pub cash_position_count: usize,
1019    /// Cash forecast count.
1020    #[serde(default)]
1021    pub cash_forecast_count: usize,
1022    /// Cash pool count.
1023    #[serde(default)]
1024    pub cash_pool_count: usize,
1025    /// Process evolution event count.
1026    #[serde(default)]
1027    pub process_evolution_event_count: usize,
1028    /// Organizational event count.
1029    #[serde(default)]
1030    pub organizational_event_count: usize,
1031    /// Counterfactual pair count.
1032    #[serde(default)]
1033    pub counterfactual_pair_count: usize,
1034    /// Number of fraud red-flag indicators generated.
1035    #[serde(default)]
1036    pub red_flag_count: usize,
1037    /// Number of collusion rings generated.
1038    #[serde(default)]
1039    pub collusion_ring_count: usize,
1040    /// Number of bi-temporal vendor version chains generated.
1041    #[serde(default)]
1042    pub temporal_version_chain_count: usize,
1043    /// Number of nodes in the entity relationship graph.
1044    #[serde(default)]
1045    pub entity_relationship_node_count: usize,
1046    /// Number of edges in the entity relationship graph.
1047    #[serde(default)]
1048    pub entity_relationship_edge_count: usize,
1049    /// Number of cross-process links generated.
1050    #[serde(default)]
1051    pub cross_process_link_count: usize,
1052    /// Number of disruption events generated.
1053    #[serde(default)]
1054    pub disruption_event_count: usize,
1055    /// Number of industry-specific GL accounts generated.
1056    #[serde(default)]
1057    pub industry_gl_account_count: usize,
1058}
1059
1060/// Enhanced orchestrator with full feature integration.
1061pub struct EnhancedOrchestrator {
1062    config: GeneratorConfig,
1063    phase_config: PhaseConfig,
1064    coa: Option<Arc<ChartOfAccounts>>,
1065    master_data: MasterDataSnapshot,
1066    seed: u64,
1067    multi_progress: Option<MultiProgress>,
1068    /// Resource guard for memory, disk, and CPU monitoring
1069    resource_guard: ResourceGuard,
1070    /// Output path for disk space monitoring
1071    output_path: Option<PathBuf>,
1072    /// Copula generators for preserving correlations (from fingerprint)
1073    copula_generators: Vec<CopulaGeneratorSpec>,
1074    /// Country pack registry for localized data generation
1075    country_pack_registry: datasynth_core::CountryPackRegistry,
1076    /// Optional streaming sink for phase-by-phase output
1077    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1078}
1079
1080impl EnhancedOrchestrator {
1081    /// Create a new enhanced orchestrator.
1082    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1083        datasynth_config::validate_config(&config)?;
1084
1085        let seed = config.global.seed.unwrap_or_else(rand::random);
1086
1087        // Build resource guard from config
1088        let resource_guard = Self::build_resource_guard(&config, None);
1089
1090        // Build country pack registry from config
1091        let country_pack_registry = match &config.country_packs {
1092            Some(cp) => {
1093                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1094                    .map_err(|e| SynthError::config(e.to_string()))?
1095            }
1096            None => datasynth_core::CountryPackRegistry::builtin_only()
1097                .map_err(|e| SynthError::config(e.to_string()))?,
1098        };
1099
1100        Ok(Self {
1101            config,
1102            phase_config,
1103            coa: None,
1104            master_data: MasterDataSnapshot::default(),
1105            seed,
1106            multi_progress: None,
1107            resource_guard,
1108            output_path: None,
1109            copula_generators: Vec::new(),
1110            country_pack_registry,
1111            phase_sink: None,
1112        })
1113    }
1114
1115    /// Create with default phase config.
1116    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1117        Self::new(config, PhaseConfig::default())
1118    }
1119
1120    /// Set a streaming phase sink for real-time output.
1121    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1122        self.phase_sink = Some(sink);
1123        self
1124    }
1125
1126    /// Emit a batch of items to the phase sink (if configured).
1127    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1128        if let Some(ref sink) = self.phase_sink {
1129            for item in items {
1130                if let Ok(value) = serde_json::to_value(item) {
1131                    if let Err(e) = sink.emit(phase, type_name, &value) {
1132                        warn!(
1133                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1134                        );
1135                    }
1136                }
1137            }
1138            if let Err(e) = sink.phase_complete(phase) {
1139                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1140            }
1141        }
1142    }
1143
1144    /// Enable/disable progress bars.
1145    pub fn with_progress(mut self, show: bool) -> Self {
1146        self.phase_config.show_progress = show;
1147        if show {
1148            self.multi_progress = Some(MultiProgress::new());
1149        }
1150        self
1151    }
1152
1153    /// Set the output path for disk space monitoring.
1154    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1155        let path = path.into();
1156        self.output_path = Some(path.clone());
1157        // Rebuild resource guard with the output path
1158        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1159        self
1160    }
1161
1162    /// Access the country pack registry.
1163    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1164        &self.country_pack_registry
1165    }
1166
1167    /// Look up a country pack by country code string.
1168    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1169        self.country_pack_registry.get_by_str(country)
1170    }
1171
1172    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1173    /// company, defaulting to `"US"` if no companies are configured.
1174    fn primary_country_code(&self) -> &str {
1175        self.config
1176            .companies
1177            .first()
1178            .map(|c| c.country.as_str())
1179            .unwrap_or("US")
1180    }
1181
1182    /// Resolve the country pack for the primary (first) company.
1183    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1184        self.country_pack_for(self.primary_country_code())
1185    }
1186
1187    /// Resolve the CoA framework from config/country-pack.
1188    fn resolve_coa_framework(&self) -> CoAFramework {
1189        if self.config.accounting_standards.enabled {
1190            match self.config.accounting_standards.framework {
1191                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1192                    return CoAFramework::FrenchPcg;
1193                }
1194                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1195                    return CoAFramework::GermanSkr04;
1196                }
1197                _ => {}
1198            }
1199        }
1200        // Fallback: derive from country pack
1201        let pack = self.primary_pack();
1202        match pack.accounting.framework.as_str() {
1203            "french_gaap" => CoAFramework::FrenchPcg,
1204            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1205            _ => CoAFramework::UsGaap,
1206        }
1207    }
1208
1209    /// Check if copula generators are available.
1210    ///
1211    /// Returns true if the orchestrator has copula generators for preserving
1212    /// correlations (typically from fingerprint-based generation).
1213    pub fn has_copulas(&self) -> bool {
1214        !self.copula_generators.is_empty()
1215    }
1216
1217    /// Get the copula generators.
1218    ///
1219    /// Returns a reference to the copula generators for use during generation.
1220    /// These can be used to generate correlated samples that preserve the
1221    /// statistical relationships from the source data.
1222    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1223        &self.copula_generators
1224    }
1225
1226    /// Get a mutable reference to the copula generators.
1227    ///
1228    /// Allows generators to sample from copulas during data generation.
1229    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1230        &mut self.copula_generators
1231    }
1232
1233    /// Sample correlated values from a named copula.
1234    ///
1235    /// Returns None if the copula doesn't exist.
1236    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1237        self.copula_generators
1238            .iter_mut()
1239            .find(|c| c.name == copula_name)
1240            .map(|c| c.generator.sample())
1241    }
1242
1243    /// Create an orchestrator from a fingerprint file.
1244    ///
1245    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1246    /// and creates an orchestrator configured to generate data matching
1247    /// the statistical properties of the original data.
1248    ///
1249    /// # Arguments
1250    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1251    /// * `phase_config` - Phase configuration for generation
1252    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1253    ///
1254    /// # Example
1255    /// ```no_run
1256    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1257    /// use std::path::Path;
1258    ///
1259    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1260    ///     Path::new("fingerprint.dsf"),
1261    ///     PhaseConfig::default(),
1262    ///     1.0,
1263    /// ).unwrap();
1264    /// ```
1265    pub fn from_fingerprint(
1266        fingerprint_path: &std::path::Path,
1267        phase_config: PhaseConfig,
1268        scale: f64,
1269    ) -> SynthResult<Self> {
1270        info!("Loading fingerprint from: {}", fingerprint_path.display());
1271
1272        // Read the fingerprint
1273        let reader = FingerprintReader::new();
1274        let fingerprint = reader
1275            .read_from_file(fingerprint_path)
1276            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1277
1278        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1279    }
1280
1281    /// Create an orchestrator from a loaded fingerprint.
1282    ///
1283    /// # Arguments
1284    /// * `fingerprint` - The loaded fingerprint
1285    /// * `phase_config` - Phase configuration for generation
1286    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1287    pub fn from_fingerprint_data(
1288        fingerprint: Fingerprint,
1289        phase_config: PhaseConfig,
1290        scale: f64,
1291    ) -> SynthResult<Self> {
1292        info!(
1293            "Synthesizing config from fingerprint (version: {}, tables: {})",
1294            fingerprint.manifest.version,
1295            fingerprint.schema.tables.len()
1296        );
1297
1298        // Generate a seed for the synthesis
1299        let seed: u64 = rand::random();
1300
1301        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1302        let options = SynthesisOptions {
1303            scale,
1304            seed: Some(seed),
1305            preserve_correlations: true,
1306            inject_anomalies: true,
1307        };
1308        let synthesizer = ConfigSynthesizer::with_options(options);
1309
1310        // Synthesize full result including copula generators
1311        let synthesis_result = synthesizer
1312            .synthesize_full(&fingerprint, seed)
1313            .map_err(|e| {
1314                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1315            })?;
1316
1317        // Start with a base config from the fingerprint's industry if available
1318        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1319            Self::base_config_for_industry(industry)
1320        } else {
1321            Self::base_config_for_industry("manufacturing")
1322        };
1323
1324        // Apply the synthesized patches
1325        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1326
1327        // Log synthesis results
1328        info!(
1329            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1330            fingerprint.schema.tables.len(),
1331            scale,
1332            synthesis_result.copula_generators.len()
1333        );
1334
1335        if !synthesis_result.copula_generators.is_empty() {
1336            for spec in &synthesis_result.copula_generators {
1337                info!(
1338                    "  Copula '{}' for table '{}': {} columns",
1339                    spec.name,
1340                    spec.table,
1341                    spec.columns.len()
1342                );
1343            }
1344        }
1345
1346        // Create the orchestrator with the synthesized config
1347        let mut orchestrator = Self::new(config, phase_config)?;
1348
1349        // Store copula generators for use during generation
1350        orchestrator.copula_generators = synthesis_result.copula_generators;
1351
1352        Ok(orchestrator)
1353    }
1354
1355    /// Create a base config for a given industry.
1356    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1357        use datasynth_config::presets::create_preset;
1358        use datasynth_config::TransactionVolume;
1359        use datasynth_core::models::{CoAComplexity, IndustrySector};
1360
1361        let sector = match industry.to_lowercase().as_str() {
1362            "manufacturing" => IndustrySector::Manufacturing,
1363            "retail" => IndustrySector::Retail,
1364            "financial" | "financial_services" => IndustrySector::FinancialServices,
1365            "healthcare" => IndustrySector::Healthcare,
1366            "technology" | "tech" => IndustrySector::Technology,
1367            _ => IndustrySector::Manufacturing,
1368        };
1369
1370        // Create a preset with reasonable defaults
1371        create_preset(
1372            sector,
1373            1,  // company count
1374            12, // period months
1375            CoAComplexity::Medium,
1376            TransactionVolume::TenK,
1377        )
1378    }
1379
1380    /// Apply a config patch to a GeneratorConfig.
1381    fn apply_config_patch(
1382        mut config: GeneratorConfig,
1383        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1384    ) -> GeneratorConfig {
1385        use datasynth_fingerprint::synthesis::ConfigValue;
1386
1387        for (key, value) in patch.values() {
1388            match (key.as_str(), value) {
1389                // Transaction count is handled via TransactionVolume enum on companies
1390                // Log it but cannot directly set it (would need to modify company volumes)
1391                ("transactions.count", ConfigValue::Integer(n)) => {
1392                    info!(
1393                        "Fingerprint suggests {} transactions (apply via company volumes)",
1394                        n
1395                    );
1396                }
1397                ("global.period_months", ConfigValue::Integer(n)) => {
1398                    config.global.period_months = (*n).clamp(1, 120) as u32;
1399                }
1400                ("global.start_date", ConfigValue::String(s)) => {
1401                    config.global.start_date = s.clone();
1402                }
1403                ("global.seed", ConfigValue::Integer(n)) => {
1404                    config.global.seed = Some(*n as u64);
1405                }
1406                ("fraud.enabled", ConfigValue::Bool(b)) => {
1407                    config.fraud.enabled = *b;
1408                }
1409                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1410                    config.fraud.fraud_rate = *f;
1411                }
1412                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1413                    config.data_quality.enabled = *b;
1414                }
1415                // Handle anomaly injection paths (mapped to fraud config)
1416                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1417                    config.fraud.enabled = *b;
1418                }
1419                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1420                    config.fraud.fraud_rate = *f;
1421                }
1422                _ => {
1423                    debug!("Ignoring unknown config patch key: {}", key);
1424                }
1425            }
1426        }
1427
1428        config
1429    }
1430
1431    /// Build a resource guard from the configuration.
1432    fn build_resource_guard(
1433        config: &GeneratorConfig,
1434        output_path: Option<PathBuf>,
1435    ) -> ResourceGuard {
1436        let mut builder = ResourceGuardBuilder::new();
1437
1438        // Configure memory limit if set
1439        if config.global.memory_limit_mb > 0 {
1440            builder = builder.memory_limit(config.global.memory_limit_mb);
1441        }
1442
1443        // Configure disk monitoring for output path
1444        if let Some(path) = output_path {
1445            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1446        }
1447
1448        // Use conservative degradation settings for production safety
1449        builder = builder.conservative();
1450
1451        builder.build()
1452    }
1453
1454    /// Check resources (memory, disk, CPU) and return degradation level.
1455    ///
1456    /// Returns an error if hard limits are exceeded.
1457    /// Returns Ok(DegradationLevel) indicating current resource state.
1458    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1459        self.resource_guard.check()
1460    }
1461
1462    /// Check resources with logging.
1463    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1464        let level = self.resource_guard.check()?;
1465
1466        if level != DegradationLevel::Normal {
1467            warn!(
1468                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1469                phase,
1470                level,
1471                self.resource_guard.current_memory_mb(),
1472                self.resource_guard.available_disk_mb()
1473            );
1474        }
1475
1476        Ok(level)
1477    }
1478
1479    /// Get current degradation actions based on resource state.
1480    fn get_degradation_actions(&self) -> DegradationActions {
1481        self.resource_guard.get_actions()
1482    }
1483
1484    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1485    fn check_memory_limit(&self) -> SynthResult<()> {
1486        self.check_resources()?;
1487        Ok(())
1488    }
1489
1490    /// Run the complete generation workflow.
1491    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1492        info!("Starting enhanced generation workflow");
1493        info!(
1494            "Config: industry={:?}, period_months={}, companies={}",
1495            self.config.global.industry,
1496            self.config.global.period_months,
1497            self.config.companies.len()
1498        );
1499
1500        // Initial resource check before starting
1501        let initial_level = self.check_resources_with_log("initial")?;
1502        if initial_level == DegradationLevel::Emergency {
1503            return Err(SynthError::resource(
1504                "Insufficient resources to start generation",
1505            ));
1506        }
1507
1508        let mut stats = EnhancedGenerationStatistics {
1509            companies_count: self.config.companies.len(),
1510            period_months: self.config.global.period_months,
1511            ..Default::default()
1512        };
1513
1514        // Phase 1: Chart of Accounts
1515        let coa = self.phase_chart_of_accounts(&mut stats)?;
1516
1517        // Phase 2: Master Data
1518        self.phase_master_data(&mut stats)?;
1519
1520        // Emit master data to stream sink
1521        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1522        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1523        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1524
1525        // Phase 3: Document Flows + Subledger Linking
1526        let (mut document_flows, subledger, fa_journal_entries) =
1527            self.phase_document_flows(&mut stats)?;
1528
1529        // Emit document flows to stream sink
1530        self.emit_phase_items(
1531            "document_flows",
1532            "PurchaseOrder",
1533            &document_flows.purchase_orders,
1534        );
1535        self.emit_phase_items(
1536            "document_flows",
1537            "GoodsReceipt",
1538            &document_flows.goods_receipts,
1539        );
1540        self.emit_phase_items(
1541            "document_flows",
1542            "VendorInvoice",
1543            &document_flows.vendor_invoices,
1544        );
1545        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1546        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1547
1548        // Phase 3b: Opening Balances (before JE generation)
1549        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1550
1551        // Note: Opening balances are exported as balance/opening_balances.json but are not
1552        // converted to journal entries. Converting to JEs requires richer type information
1553        // (GeneratedOpeningBalance.balances loses AccountType, making contra-asset accounts
1554        // like Accumulated Depreciation indistinguishable from regular assets by code prefix).
1555        // A future enhancement could store (Decimal, AccountType) in the balances map.
1556
1557        // Phase 4: Journal Entries
1558        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1559
1560        // Phase 4b: Append FA acquisition journal entries to main entries
1561        if !fa_journal_entries.is_empty() {
1562            debug!(
1563                "Appending {} FA acquisition JEs to main entries",
1564                fa_journal_entries.len()
1565            );
1566            entries.extend(fa_journal_entries);
1567        }
1568
1569        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1570        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1571
1572        // Get current degradation actions for optional phases
1573        let actions = self.get_degradation_actions();
1574
1575        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1576        let sourcing = self.phase_sourcing_data(&mut stats)?;
1577
1578        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs
1579        if !sourcing.contracts.is_empty() {
1580            let mut linked_count = 0usize;
1581            for chain in &mut document_flows.p2p_chains {
1582                if chain.purchase_order.contract_id.is_none() {
1583                    if let Some(contract) = sourcing
1584                        .contracts
1585                        .iter()
1586                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
1587                    {
1588                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
1589                        linked_count += 1;
1590                    }
1591                }
1592            }
1593            if linked_count > 0 {
1594                debug!(
1595                    "Linked {} purchase orders to S2C contracts by vendor match",
1596                    linked_count
1597                );
1598            }
1599        }
1600
1601        // Phase 5b: Intercompany Transactions + Matching + Eliminations
1602        let intercompany = self.phase_intercompany(&mut stats)?;
1603
1604        // Phase 5c: Append IC journal entries to main entries
1605        if !intercompany.seller_journal_entries.is_empty()
1606            || !intercompany.buyer_journal_entries.is_empty()
1607        {
1608            let ic_je_count = intercompany.seller_journal_entries.len()
1609                + intercompany.buyer_journal_entries.len();
1610            entries.extend(intercompany.seller_journal_entries.iter().cloned());
1611            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
1612            debug!(
1613                "Appended {} IC journal entries to main entries",
1614                ic_je_count
1615            );
1616        }
1617
1618        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
1619        let hr = self.phase_hr_data(&mut stats)?;
1620
1621        // Phase 6b: Generate JEs from payroll runs
1622        if !hr.payroll_runs.is_empty() {
1623            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
1624            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
1625            entries.extend(payroll_jes);
1626        }
1627
1628        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
1629        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
1630
1631        // Phase 7a: Generate JEs from production orders
1632        if !manufacturing_snap.production_orders.is_empty() {
1633            let mfg_jes = Self::generate_manufacturing_jes(&manufacturing_snap.production_orders);
1634            debug!("Generated {} JEs from production orders", mfg_jes.len());
1635            entries.extend(mfg_jes);
1636        }
1637
1638        // Update final entry/line-item stats after all JE-generating phases
1639        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
1640        if !entries.is_empty() {
1641            stats.total_entries = entries.len() as u64;
1642            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
1643            debug!(
1644                "Final entry count: {}, line items: {} (after all JE-generating phases)",
1645                stats.total_entries, stats.total_line_items
1646            );
1647        }
1648
1649        // Phase 7b: Apply internal controls to journal entries
1650        if self.config.internal_controls.enabled && !entries.is_empty() {
1651            info!("Phase 7b: Applying internal controls to journal entries");
1652            let control_config = ControlGeneratorConfig {
1653                exception_rate: self.config.internal_controls.exception_rate,
1654                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
1655                enable_sox_marking: true,
1656                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
1657                    self.config.internal_controls.sox_materiality_threshold,
1658                )
1659                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
1660            };
1661            let mut control_gen = ControlGenerator::with_config(self.seed + 99, control_config);
1662            for entry in &mut entries {
1663                control_gen.apply_controls(entry, &coa);
1664            }
1665            let with_controls = entries
1666                .iter()
1667                .filter(|e| !e.header.control_ids.is_empty())
1668                .count();
1669            info!(
1670                "Applied controls to {} entries ({} with control IDs assigned)",
1671                entries.len(),
1672                with_controls
1673            );
1674        }
1675
1676        // Emit journal entries to stream sink (after all JE-generating phases)
1677        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
1678
1679        // Phase 8: Anomaly Injection (after all JE-generating phases)
1680        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
1681
1682        // Emit anomaly labels to stream sink
1683        self.emit_phase_items(
1684            "anomaly_injection",
1685            "LabeledAnomaly",
1686            &anomaly_labels.labels,
1687        );
1688
1689        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
1690        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
1691
1692        // Emit red flags to stream sink
1693        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
1694
1695        // Phase 26b: Collusion Ring Generation (after red flags)
1696        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
1697
1698        // Emit collusion rings to stream sink
1699        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
1700
1701        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
1702        let balance_validation = self.phase_balance_validation(&entries)?;
1703
1704        // Phase 9b: GL-to-Subledger Reconciliation
1705        let subledger_reconciliation =
1706            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
1707
1708        // Phase 10: Data Quality Injection
1709        let data_quality_stats =
1710            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
1711
1712        // Phase 11: Audit Data
1713        let audit = self.phase_audit_data(&entries, &mut stats)?;
1714
1715        // Phase 12: Banking KYC/AML Data
1716        let banking = self.phase_banking_data(&mut stats)?;
1717
1718        // Phase 13: Graph Export
1719        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
1720
1721        // Phase 14: LLM Enrichment
1722        self.phase_llm_enrichment(&mut stats);
1723
1724        // Phase 15: Diffusion Enhancement
1725        self.phase_diffusion_enhancement(&mut stats);
1726
1727        // Phase 16: Causal Overlay
1728        self.phase_causal_overlay(&mut stats);
1729
1730        // Phase 17: Bank Reconciliation + Financial Statements
1731        let financial_reporting =
1732            self.phase_financial_reporting(&document_flows, &entries, &coa, &mut stats)?;
1733
1734        // Phase 18: Accounting Standards (Revenue Recognition, Impairment)
1735        let accounting_standards = self.phase_accounting_standards(&mut stats)?;
1736
1737        // Phase 18b: OCPM Events (after all process data is available)
1738        let ocpm = self.phase_ocpm_events(
1739            &document_flows,
1740            &sourcing,
1741            &hr,
1742            &manufacturing_snap,
1743            &banking,
1744            &audit,
1745            &financial_reporting,
1746            &mut stats,
1747        )?;
1748
1749        // Emit OCPM events to stream sink
1750        if let Some(ref event_log) = ocpm.event_log {
1751            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
1752        }
1753
1754        // Phase 19: Sales Quotes, Management KPIs, Budgets
1755        let sales_kpi_budgets =
1756            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
1757
1758        // Phase 20: Tax Generation
1759        let tax = self.phase_tax_generation(&document_flows, &mut stats)?;
1760
1761        // Phase 21: ESG Data Generation
1762        let esg_snap = self.phase_esg_generation(&document_flows, &mut stats)?;
1763
1764        // Phase 22: Treasury Data Generation
1765        let treasury =
1766            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
1767
1768        // Phase 23: Project Accounting Data Generation
1769        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
1770
1771        // Phase 24: Process Evolution + Organizational Events
1772        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
1773
1774        // Phase 24b: Disruption Events
1775        let disruption_events = self.phase_disruption_events(&mut stats)?;
1776
1777        // Phase 27: Bi-Temporal Vendor Version Chains
1778        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
1779
1780        // Phase 28: Entity Relationship Graph + Cross-Process Links
1781        let (entity_relationship_graph, cross_process_links) =
1782            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
1783
1784        // Phase 29: Industry-specific GL accounts
1785        let industry_output = self.phase_industry_data(&mut stats);
1786
1787        // Phase 19b: Hypergraph Export (after all data is available)
1788        self.phase_hypergraph_export(
1789            &coa,
1790            &entries,
1791            &document_flows,
1792            &sourcing,
1793            &hr,
1794            &manufacturing_snap,
1795            &banking,
1796            &audit,
1797            &financial_reporting,
1798            &ocpm,
1799            &mut stats,
1800        )?;
1801
1802        // Phase 10c: Additional graph builders (approval, entity, banking)
1803        // These run after all data is available since they need banking/IC data.
1804        if self.phase_config.generate_graph_export || self.config.graph_export.enabled {
1805            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
1806        }
1807
1808        // Log informational messages for config sections not yet fully wired
1809        if self.config.streaming.enabled {
1810            info!("Note: streaming config is enabled but batch mode does not use it");
1811        }
1812        if self.config.vendor_network.enabled {
1813            debug!("Vendor network config available; relationship graph generation is partial");
1814        }
1815        if self.config.customer_segmentation.enabled {
1816            debug!("Customer segmentation config available; segment-aware generation is partial");
1817        }
1818
1819        // Log final resource statistics
1820        let resource_stats = self.resource_guard.stats();
1821        info!(
1822            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
1823            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
1824            resource_stats.disk.estimated_bytes_written,
1825            resource_stats.degradation_level
1826        );
1827
1828        // Flush any remaining stream sink data
1829        if let Some(ref sink) = self.phase_sink {
1830            if let Err(e) = sink.flush() {
1831                warn!("Stream sink flush failed: {e}");
1832            }
1833        }
1834
1835        // Build data lineage graph
1836        let lineage = self.build_lineage_graph();
1837
1838        // Evaluate quality gates if enabled in config
1839        let gate_result = if self.config.quality_gates.enabled {
1840            let profile_name = &self.config.quality_gates.profile;
1841            match datasynth_eval::gates::get_profile(profile_name) {
1842                Some(profile) => {
1843                    // Build an evaluation populated with actual generation metrics.
1844                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
1845
1846                    // Populate balance sheet evaluation from balance validation results
1847                    if balance_validation.validated {
1848                        eval.coherence.balance =
1849                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
1850                                equation_balanced: balance_validation.is_balanced,
1851                                max_imbalance: (balance_validation.total_debits
1852                                    - balance_validation.total_credits)
1853                                    .abs(),
1854                                periods_evaluated: 1,
1855                                periods_imbalanced: if balance_validation.is_balanced {
1856                                    0
1857                                } else {
1858                                    1
1859                                },
1860                                period_results: Vec::new(),
1861                                companies_evaluated: self.config.companies.len(),
1862                            });
1863                    }
1864
1865                    // Set coherence passes based on balance validation
1866                    eval.coherence.passes = balance_validation.is_balanced;
1867                    if !balance_validation.is_balanced {
1868                        eval.coherence
1869                            .failures
1870                            .push("Balance sheet equation not satisfied".to_string());
1871                    }
1872
1873                    // Set statistical score based on entry count (basic sanity)
1874                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
1875                    eval.statistical.passes = !entries.is_empty();
1876
1877                    // Set quality score from data quality stats
1878                    eval.quality.overall_score = 0.9; // Default high for generated data
1879                    eval.quality.passes = true;
1880
1881                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
1882                    info!(
1883                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
1884                        profile_name, result.gates_passed, result.gates_total, result.summary
1885                    );
1886                    Some(result)
1887                }
1888                None => {
1889                    warn!(
1890                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
1891                        profile_name
1892                    );
1893                    None
1894                }
1895            }
1896        } else {
1897            None
1898        };
1899
1900        // Generate internal controls if enabled
1901        let internal_controls = if self.config.internal_controls.enabled {
1902            InternalControl::standard_controls()
1903        } else {
1904            Vec::new()
1905        };
1906
1907        Ok(EnhancedGenerationResult {
1908            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
1909            master_data: std::mem::take(&mut self.master_data),
1910            document_flows,
1911            subledger,
1912            ocpm,
1913            audit,
1914            banking,
1915            graph_export,
1916            sourcing,
1917            financial_reporting,
1918            hr,
1919            accounting_standards,
1920            manufacturing: manufacturing_snap,
1921            sales_kpi_budgets,
1922            tax,
1923            esg: esg_snap,
1924            treasury,
1925            project_accounting,
1926            process_evolution,
1927            organizational_events,
1928            disruption_events,
1929            intercompany,
1930            journal_entries: entries,
1931            anomaly_labels,
1932            balance_validation,
1933            data_quality_stats,
1934            statistics: stats,
1935            lineage: Some(lineage),
1936            gate_result,
1937            internal_controls,
1938            opening_balances,
1939            subledger_reconciliation,
1940            counterfactual_pairs,
1941            red_flags,
1942            collusion_rings,
1943            temporal_vendor_chains,
1944            entity_relationship_graph,
1945            cross_process_links,
1946            industry_output,
1947        })
1948    }
1949
1950    // ========================================================================
1951    // Generation Phase Methods
1952    // ========================================================================
1953
1954    /// Phase 1: Generate Chart of Accounts and update statistics.
1955    fn phase_chart_of_accounts(
1956        &mut self,
1957        stats: &mut EnhancedGenerationStatistics,
1958    ) -> SynthResult<Arc<ChartOfAccounts>> {
1959        info!("Phase 1: Generating Chart of Accounts");
1960        let coa = self.generate_coa()?;
1961        stats.accounts_count = coa.account_count();
1962        info!(
1963            "Chart of Accounts generated: {} accounts",
1964            stats.accounts_count
1965        );
1966        self.check_resources_with_log("post-coa")?;
1967        Ok(coa)
1968    }
1969
1970    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
1971    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
1972        if self.phase_config.generate_master_data {
1973            info!("Phase 2: Generating Master Data");
1974            self.generate_master_data()?;
1975            stats.vendor_count = self.master_data.vendors.len();
1976            stats.customer_count = self.master_data.customers.len();
1977            stats.material_count = self.master_data.materials.len();
1978            stats.asset_count = self.master_data.assets.len();
1979            stats.employee_count = self.master_data.employees.len();
1980            info!(
1981                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
1982                stats.vendor_count, stats.customer_count, stats.material_count,
1983                stats.asset_count, stats.employee_count
1984            );
1985            self.check_resources_with_log("post-master-data")?;
1986        } else {
1987            debug!("Phase 2: Skipped (master data generation disabled)");
1988        }
1989        Ok(())
1990    }
1991
1992    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
1993    fn phase_document_flows(
1994        &mut self,
1995        stats: &mut EnhancedGenerationStatistics,
1996    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
1997        let mut document_flows = DocumentFlowSnapshot::default();
1998        let mut subledger = SubledgerSnapshot::default();
1999
2000        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
2001            info!("Phase 3: Generating Document Flows");
2002            self.generate_document_flows(&mut document_flows)?;
2003            stats.p2p_chain_count = document_flows.p2p_chains.len();
2004            stats.o2c_chain_count = document_flows.o2c_chains.len();
2005            info!(
2006                "Document flows generated: {} P2P chains, {} O2C chains",
2007                stats.p2p_chain_count, stats.o2c_chain_count
2008            );
2009
2010            // Phase 3b: Link document flows to subledgers (for data coherence)
2011            debug!("Phase 3b: Linking document flows to subledgers");
2012            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
2013            stats.ap_invoice_count = subledger.ap_invoices.len();
2014            stats.ar_invoice_count = subledger.ar_invoices.len();
2015            debug!(
2016                "Subledgers linked: {} AP invoices, {} AR invoices",
2017                stats.ap_invoice_count, stats.ar_invoice_count
2018            );
2019
2020            self.check_resources_with_log("post-document-flows")?;
2021        } else {
2022            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
2023        }
2024
2025        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
2026        let mut fa_journal_entries = Vec::new();
2027        if !self.master_data.assets.is_empty() {
2028            debug!("Generating FA subledger records");
2029            let company_code = self
2030                .config
2031                .companies
2032                .first()
2033                .map(|c| c.code.as_str())
2034                .unwrap_or("1000");
2035            let currency = self
2036                .config
2037                .companies
2038                .first()
2039                .map(|c| c.currency.as_str())
2040                .unwrap_or("USD");
2041
2042            let mut fa_gen = datasynth_generators::FAGenerator::new(
2043                datasynth_generators::FAGeneratorConfig::default(),
2044                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
2045            );
2046
2047            for asset in &self.master_data.assets {
2048                let (record, je) = fa_gen.generate_asset_acquisition(
2049                    company_code,
2050                    &format!("{:?}", asset.asset_class),
2051                    &asset.description,
2052                    asset.acquisition_date,
2053                    currency,
2054                    asset.cost_center.as_deref(),
2055                );
2056                subledger.fa_records.push(record);
2057                fa_journal_entries.push(je);
2058            }
2059
2060            stats.fa_subledger_count = subledger.fa_records.len();
2061            debug!(
2062                "FA subledger records generated: {} (with {} acquisition JEs)",
2063                stats.fa_subledger_count,
2064                fa_journal_entries.len()
2065            );
2066        }
2067
2068        // Generate Inventory subledger records from master data materials
2069        if !self.master_data.materials.is_empty() {
2070            debug!("Generating Inventory subledger records");
2071            let first_company = self.config.companies.first();
2072            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
2073            let inv_currency = first_company
2074                .map(|c| c.currency.clone())
2075                .unwrap_or_else(|| "USD".to_string());
2076
2077            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
2078                datasynth_generators::InventoryGeneratorConfig::default(),
2079                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
2080                inv_currency.clone(),
2081            );
2082
2083            for (i, material) in self.master_data.materials.iter().enumerate() {
2084                let plant = format!("PLANT{:02}", (i % 3) + 1);
2085                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
2086                let initial_qty = rust_decimal::Decimal::from(
2087                    material
2088                        .safety_stock
2089                        .to_string()
2090                        .parse::<i64>()
2091                        .unwrap_or(100),
2092                );
2093
2094                let position = inv_gen.generate_position(
2095                    company_code,
2096                    &plant,
2097                    &storage_loc,
2098                    &material.material_id,
2099                    &material.description,
2100                    initial_qty,
2101                    Some(material.standard_cost),
2102                    &inv_currency,
2103                );
2104                subledger.inventory_positions.push(position);
2105            }
2106
2107            stats.inventory_subledger_count = subledger.inventory_positions.len();
2108            debug!(
2109                "Inventory subledger records generated: {}",
2110                stats.inventory_subledger_count
2111            );
2112        }
2113
2114        Ok((document_flows, subledger, fa_journal_entries))
2115    }
2116
2117    /// Phase 3c: Generate OCPM events from document flows.
2118    #[allow(clippy::too_many_arguments)]
2119    fn phase_ocpm_events(
2120        &mut self,
2121        document_flows: &DocumentFlowSnapshot,
2122        sourcing: &SourcingSnapshot,
2123        hr: &HrSnapshot,
2124        manufacturing: &ManufacturingSnapshot,
2125        banking: &BankingSnapshot,
2126        audit: &AuditSnapshot,
2127        financial_reporting: &FinancialReportingSnapshot,
2128        stats: &mut EnhancedGenerationStatistics,
2129    ) -> SynthResult<OcpmSnapshot> {
2130        if self.phase_config.generate_ocpm_events {
2131            info!("Phase 3c: Generating OCPM Events");
2132            let ocpm_snapshot = self.generate_ocpm_events(
2133                document_flows,
2134                sourcing,
2135                hr,
2136                manufacturing,
2137                banking,
2138                audit,
2139                financial_reporting,
2140            )?;
2141            stats.ocpm_event_count = ocpm_snapshot.event_count;
2142            stats.ocpm_object_count = ocpm_snapshot.object_count;
2143            stats.ocpm_case_count = ocpm_snapshot.case_count;
2144            info!(
2145                "OCPM events generated: {} events, {} objects, {} cases",
2146                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
2147            );
2148            self.check_resources_with_log("post-ocpm")?;
2149            Ok(ocpm_snapshot)
2150        } else {
2151            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
2152            Ok(OcpmSnapshot::default())
2153        }
2154    }
2155
2156    /// Phase 4: Generate journal entries from document flows and standalone generation.
2157    fn phase_journal_entries(
2158        &mut self,
2159        coa: &Arc<ChartOfAccounts>,
2160        document_flows: &DocumentFlowSnapshot,
2161        _stats: &mut EnhancedGenerationStatistics,
2162    ) -> SynthResult<Vec<JournalEntry>> {
2163        let mut entries = Vec::new();
2164
2165        // Phase 4a: Generate JEs from document flows (for data coherence)
2166        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
2167            debug!("Phase 4a: Generating JEs from document flows");
2168            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
2169            debug!("Generated {} JEs from document flows", flow_entries.len());
2170            entries.extend(flow_entries);
2171        }
2172
2173        // Phase 4b: Generate standalone journal entries
2174        if self.phase_config.generate_journal_entries {
2175            info!("Phase 4: Generating Journal Entries");
2176            let je_entries = self.generate_journal_entries(coa)?;
2177            info!("Generated {} standalone journal entries", je_entries.len());
2178            entries.extend(je_entries);
2179        } else {
2180            debug!("Phase 4: Skipped (journal entry generation disabled)");
2181        }
2182
2183        if !entries.is_empty() {
2184            // Note: stats.total_entries/total_line_items are set in generate()
2185            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
2186            self.check_resources_with_log("post-journal-entries")?;
2187        }
2188
2189        Ok(entries)
2190    }
2191
2192    /// Phase 5: Inject anomalies into journal entries.
2193    fn phase_anomaly_injection(
2194        &mut self,
2195        entries: &mut [JournalEntry],
2196        actions: &DegradationActions,
2197        stats: &mut EnhancedGenerationStatistics,
2198    ) -> SynthResult<AnomalyLabels> {
2199        if self.phase_config.inject_anomalies
2200            && !entries.is_empty()
2201            && !actions.skip_anomaly_injection
2202        {
2203            info!("Phase 5: Injecting Anomalies");
2204            let result = self.inject_anomalies(entries)?;
2205            stats.anomalies_injected = result.labels.len();
2206            info!("Injected {} anomalies", stats.anomalies_injected);
2207            self.check_resources_with_log("post-anomaly-injection")?;
2208            Ok(result)
2209        } else if actions.skip_anomaly_injection {
2210            warn!("Phase 5: Skipped due to resource degradation");
2211            Ok(AnomalyLabels::default())
2212        } else {
2213            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
2214            Ok(AnomalyLabels::default())
2215        }
2216    }
2217
2218    /// Phase 6: Validate balance sheet equation on journal entries.
2219    fn phase_balance_validation(
2220        &mut self,
2221        entries: &[JournalEntry],
2222    ) -> SynthResult<BalanceValidationResult> {
2223        if self.phase_config.validate_balances && !entries.is_empty() {
2224            debug!("Phase 6: Validating Balances");
2225            let balance_validation = self.validate_journal_entries(entries)?;
2226            if balance_validation.is_balanced {
2227                debug!("Balance validation passed");
2228            } else {
2229                warn!(
2230                    "Balance validation found {} errors",
2231                    balance_validation.validation_errors.len()
2232                );
2233            }
2234            Ok(balance_validation)
2235        } else {
2236            Ok(BalanceValidationResult::default())
2237        }
2238    }
2239
2240    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
2241    fn phase_data_quality_injection(
2242        &mut self,
2243        entries: &mut [JournalEntry],
2244        actions: &DegradationActions,
2245        stats: &mut EnhancedGenerationStatistics,
2246    ) -> SynthResult<DataQualityStats> {
2247        if self.phase_config.inject_data_quality
2248            && !entries.is_empty()
2249            && !actions.skip_data_quality
2250        {
2251            info!("Phase 7: Injecting Data Quality Variations");
2252            let dq_stats = self.inject_data_quality(entries)?;
2253            stats.data_quality_issues = dq_stats.records_with_issues;
2254            info!("Injected {} data quality issues", stats.data_quality_issues);
2255            self.check_resources_with_log("post-data-quality")?;
2256            Ok(dq_stats)
2257        } else if actions.skip_data_quality {
2258            warn!("Phase 7: Skipped due to resource degradation");
2259            Ok(DataQualityStats::default())
2260        } else {
2261            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
2262            Ok(DataQualityStats::default())
2263        }
2264    }
2265
2266    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
2267    fn phase_audit_data(
2268        &mut self,
2269        entries: &[JournalEntry],
2270        stats: &mut EnhancedGenerationStatistics,
2271    ) -> SynthResult<AuditSnapshot> {
2272        if self.phase_config.generate_audit {
2273            info!("Phase 8: Generating Audit Data");
2274            let audit_snapshot = self.generate_audit_data(entries)?;
2275            stats.audit_engagement_count = audit_snapshot.engagements.len();
2276            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
2277            stats.audit_evidence_count = audit_snapshot.evidence.len();
2278            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
2279            stats.audit_finding_count = audit_snapshot.findings.len();
2280            stats.audit_judgment_count = audit_snapshot.judgments.len();
2281            info!(
2282                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, {} findings, {} judgments",
2283                stats.audit_engagement_count, stats.audit_workpaper_count,
2284                stats.audit_evidence_count, stats.audit_risk_count,
2285                stats.audit_finding_count, stats.audit_judgment_count
2286            );
2287            self.check_resources_with_log("post-audit")?;
2288            Ok(audit_snapshot)
2289        } else {
2290            debug!("Phase 8: Skipped (audit generation disabled)");
2291            Ok(AuditSnapshot::default())
2292        }
2293    }
2294
2295    /// Phase 9: Generate banking KYC/AML data.
2296    fn phase_banking_data(
2297        &mut self,
2298        stats: &mut EnhancedGenerationStatistics,
2299    ) -> SynthResult<BankingSnapshot> {
2300        if self.phase_config.generate_banking && self.config.banking.enabled {
2301            info!("Phase 9: Generating Banking KYC/AML Data");
2302            let banking_snapshot = self.generate_banking_data()?;
2303            stats.banking_customer_count = banking_snapshot.customers.len();
2304            stats.banking_account_count = banking_snapshot.accounts.len();
2305            stats.banking_transaction_count = banking_snapshot.transactions.len();
2306            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
2307            info!(
2308                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
2309                stats.banking_customer_count, stats.banking_account_count,
2310                stats.banking_transaction_count, stats.banking_suspicious_count
2311            );
2312            self.check_resources_with_log("post-banking")?;
2313            Ok(banking_snapshot)
2314        } else {
2315            debug!("Phase 9: Skipped (banking generation disabled)");
2316            Ok(BankingSnapshot::default())
2317        }
2318    }
2319
2320    /// Phase 10: Export accounting network graphs for ML training.
2321    fn phase_graph_export(
2322        &mut self,
2323        entries: &[JournalEntry],
2324        coa: &Arc<ChartOfAccounts>,
2325        stats: &mut EnhancedGenerationStatistics,
2326    ) -> SynthResult<GraphExportSnapshot> {
2327        if (self.phase_config.generate_graph_export || self.config.graph_export.enabled)
2328            && !entries.is_empty()
2329        {
2330            info!("Phase 10: Exporting Accounting Network Graphs");
2331            match self.export_graphs(entries, coa, stats) {
2332                Ok(snapshot) => {
2333                    info!(
2334                        "Graph export complete: {} graphs ({} nodes, {} edges)",
2335                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
2336                    );
2337                    Ok(snapshot)
2338                }
2339                Err(e) => {
2340                    warn!("Phase 10: Graph export failed: {}", e);
2341                    Ok(GraphExportSnapshot::default())
2342                }
2343            }
2344        } else {
2345            debug!("Phase 10: Skipped (graph export disabled or no entries)");
2346            Ok(GraphExportSnapshot::default())
2347        }
2348    }
2349
2350    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
2351    #[allow(clippy::too_many_arguments)]
2352    fn phase_hypergraph_export(
2353        &self,
2354        coa: &Arc<ChartOfAccounts>,
2355        entries: &[JournalEntry],
2356        document_flows: &DocumentFlowSnapshot,
2357        sourcing: &SourcingSnapshot,
2358        hr: &HrSnapshot,
2359        manufacturing: &ManufacturingSnapshot,
2360        banking: &BankingSnapshot,
2361        audit: &AuditSnapshot,
2362        financial_reporting: &FinancialReportingSnapshot,
2363        ocpm: &OcpmSnapshot,
2364        stats: &mut EnhancedGenerationStatistics,
2365    ) -> SynthResult<()> {
2366        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
2367            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
2368            match self.export_hypergraph(
2369                coa,
2370                entries,
2371                document_flows,
2372                sourcing,
2373                hr,
2374                manufacturing,
2375                banking,
2376                audit,
2377                financial_reporting,
2378                ocpm,
2379                stats,
2380            ) {
2381                Ok(info) => {
2382                    info!(
2383                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
2384                        info.node_count, info.edge_count, info.hyperedge_count
2385                    );
2386                }
2387                Err(e) => {
2388                    warn!("Phase 10b: Hypergraph export failed: {}", e);
2389                }
2390            }
2391        } else {
2392            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
2393        }
2394        Ok(())
2395    }
2396
2397    /// Phase 11: LLM Enrichment.
2398    ///
2399    /// Uses an LLM provider (mock by default) to enrich vendor names with
2400    /// realistic, context-aware names. This phase is non-blocking: failures
2401    /// log a warning but do not stop the generation pipeline.
2402    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
2403        if !self.config.llm.enabled {
2404            debug!("Phase 11: Skipped (LLM enrichment disabled)");
2405            return;
2406        }
2407
2408        info!("Phase 11: Starting LLM Enrichment");
2409        let start = std::time::Instant::now();
2410
2411        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2412            let provider = Arc::new(MockLlmProvider::new(self.seed));
2413            let enricher = VendorLlmEnricher::new(provider);
2414
2415            let industry = format!("{:?}", self.config.global.industry);
2416            let max_enrichments = self
2417                .config
2418                .llm
2419                .max_vendor_enrichments
2420                .min(self.master_data.vendors.len());
2421
2422            let mut enriched_count = 0usize;
2423            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
2424                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
2425                    Ok(name) => {
2426                        vendor.name = name;
2427                        enriched_count += 1;
2428                    }
2429                    Err(e) => {
2430                        warn!(
2431                            "LLM vendor enrichment failed for {}: {}",
2432                            vendor.vendor_id, e
2433                        );
2434                    }
2435                }
2436            }
2437
2438            enriched_count
2439        }));
2440
2441        match result {
2442            Ok(enriched_count) => {
2443                stats.llm_vendors_enriched = enriched_count;
2444                let elapsed = start.elapsed();
2445                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2446                info!(
2447                    "Phase 11 complete: {} vendors enriched in {}ms",
2448                    enriched_count, stats.llm_enrichment_ms
2449                );
2450            }
2451            Err(_) => {
2452                let elapsed = start.elapsed();
2453                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
2454                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
2455            }
2456        }
2457    }
2458
2459    /// Phase 12: Diffusion Enhancement.
2460    ///
2461    /// Generates a sample set using the statistical diffusion backend to
2462    /// demonstrate distribution-matching data generation. This phase is
2463    /// non-blocking: failures log a warning but do not stop the pipeline.
2464    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
2465        if !self.config.diffusion.enabled {
2466            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
2467            return;
2468        }
2469
2470        info!("Phase 12: Starting Diffusion Enhancement");
2471        let start = std::time::Instant::now();
2472
2473        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2474            // Target distribution: transaction amounts (log-normal-like)
2475            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
2476            let stds = vec![2000.0, 1.5, 1.0];
2477
2478            let diffusion_config = DiffusionConfig {
2479                n_steps: self.config.diffusion.n_steps,
2480                seed: self.seed,
2481                ..Default::default()
2482            };
2483
2484            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
2485
2486            let n_samples = self.config.diffusion.sample_size;
2487            let n_features = 3; // amount, line_items, approval_level
2488            let samples = backend.generate(n_samples, n_features, self.seed);
2489
2490            samples.len()
2491        }));
2492
2493        match result {
2494            Ok(sample_count) => {
2495                stats.diffusion_samples_generated = sample_count;
2496                let elapsed = start.elapsed();
2497                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2498                info!(
2499                    "Phase 12 complete: {} diffusion samples generated in {}ms",
2500                    sample_count, stats.diffusion_enhancement_ms
2501                );
2502            }
2503            Err(_) => {
2504                let elapsed = start.elapsed();
2505                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
2506                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
2507            }
2508        }
2509    }
2510
2511    /// Phase 13: Causal Overlay.
2512    ///
2513    /// Builds a structural causal model from a built-in template (e.g.,
2514    /// fraud_detection) and generates causal samples. Optionally validates
2515    /// that the output respects the causal structure. This phase is
2516    /// non-blocking: failures log a warning but do not stop the pipeline.
2517    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
2518        if !self.config.causal.enabled {
2519            debug!("Phase 13: Skipped (causal generation disabled)");
2520            return;
2521        }
2522
2523        info!("Phase 13: Starting Causal Overlay");
2524        let start = std::time::Instant::now();
2525
2526        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
2527            // Select template based on config
2528            let graph = match self.config.causal.template.as_str() {
2529                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
2530                _ => CausalGraph::fraud_detection_template(),
2531            };
2532
2533            let scm = StructuralCausalModel::new(graph.clone())
2534                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
2535
2536            let n_samples = self.config.causal.sample_size;
2537            let samples = scm
2538                .generate(n_samples, self.seed)
2539                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
2540
2541            // Optionally validate causal structure
2542            let validation_passed = if self.config.causal.validate {
2543                let report = CausalValidator::validate_causal_structure(&samples, &graph);
2544                if report.valid {
2545                    info!(
2546                        "Causal validation passed: all {} checks OK",
2547                        report.checks.len()
2548                    );
2549                } else {
2550                    warn!(
2551                        "Causal validation: {} violations detected: {:?}",
2552                        report.violations.len(),
2553                        report.violations
2554                    );
2555                }
2556                Some(report.valid)
2557            } else {
2558                None
2559            };
2560
2561            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
2562        }));
2563
2564        match result {
2565            Ok(Ok((sample_count, validation_passed))) => {
2566                stats.causal_samples_generated = sample_count;
2567                stats.causal_validation_passed = validation_passed;
2568                let elapsed = start.elapsed();
2569                stats.causal_generation_ms = elapsed.as_millis() as u64;
2570                info!(
2571                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
2572                    sample_count, stats.causal_generation_ms, validation_passed,
2573                );
2574            }
2575            Ok(Err(e)) => {
2576                let elapsed = start.elapsed();
2577                stats.causal_generation_ms = elapsed.as_millis() as u64;
2578                warn!("Phase 13: Causal generation failed: {}", e);
2579            }
2580            Err(_) => {
2581                let elapsed = start.elapsed();
2582                stats.causal_generation_ms = elapsed.as_millis() as u64;
2583                warn!("Phase 13: Causal generation failed (panic caught), continuing");
2584            }
2585        }
2586    }
2587
2588    /// Phase 14: Generate S2C sourcing data.
2589    fn phase_sourcing_data(
2590        &mut self,
2591        stats: &mut EnhancedGenerationStatistics,
2592    ) -> SynthResult<SourcingSnapshot> {
2593        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
2594            debug!("Phase 14: Skipped (sourcing generation disabled)");
2595            return Ok(SourcingSnapshot::default());
2596        }
2597
2598        info!("Phase 14: Generating S2C Sourcing Data");
2599        let seed = self.seed;
2600
2601        // Gather vendor data from master data
2602        let vendor_ids: Vec<String> = self
2603            .master_data
2604            .vendors
2605            .iter()
2606            .map(|v| v.vendor_id.clone())
2607            .collect();
2608        if vendor_ids.is_empty() {
2609            debug!("Phase 14: Skipped (no vendors available)");
2610            return Ok(SourcingSnapshot::default());
2611        }
2612
2613        let categories: Vec<(String, String)> = vec![
2614            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
2615            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
2616            ("CAT-IT".to_string(), "IT Equipment".to_string()),
2617            ("CAT-SVC".to_string(), "Professional Services".to_string()),
2618            ("CAT-LOG".to_string(), "Logistics".to_string()),
2619        ];
2620        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
2621            .iter()
2622            .map(|(id, name)| {
2623                (
2624                    id.clone(),
2625                    name.clone(),
2626                    rust_decimal::Decimal::from(100_000),
2627                )
2628            })
2629            .collect();
2630
2631        let company_code = self
2632            .config
2633            .companies
2634            .first()
2635            .map(|c| c.code.as_str())
2636            .unwrap_or("1000");
2637        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2638            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2639        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2640        let fiscal_year = start_date.year() as u16;
2641        let owner_ids: Vec<String> = self
2642            .master_data
2643            .employees
2644            .iter()
2645            .take(5)
2646            .map(|e| e.employee_id.clone())
2647            .collect();
2648        let owner_id = owner_ids
2649            .first()
2650            .map(std::string::String::as_str)
2651            .unwrap_or("BUYER-001");
2652
2653        // Step 1: Spend Analysis
2654        let mut spend_gen = SpendAnalysisGenerator::new(seed);
2655        let spend_analyses =
2656            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
2657
2658        // Step 2: Sourcing Projects
2659        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
2660        let sourcing_projects = if owner_ids.is_empty() {
2661            Vec::new()
2662        } else {
2663            project_gen.generate(
2664                company_code,
2665                &categories_with_spend,
2666                &owner_ids,
2667                start_date,
2668                self.config.global.period_months,
2669            )
2670        };
2671        stats.sourcing_project_count = sourcing_projects.len();
2672
2673        // Step 3: Qualifications
2674        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
2675        let mut qual_gen = QualificationGenerator::new(seed + 2);
2676        let qualifications = qual_gen.generate(
2677            company_code,
2678            &qual_vendor_ids,
2679            sourcing_projects.first().map(|p| p.project_id.as_str()),
2680            owner_id,
2681            start_date,
2682        );
2683
2684        // Step 4: RFx Events
2685        let mut rfx_gen = RfxGenerator::new(seed + 3);
2686        let rfx_events: Vec<RfxEvent> = sourcing_projects
2687            .iter()
2688            .map(|proj| {
2689                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
2690                rfx_gen.generate(
2691                    company_code,
2692                    &proj.project_id,
2693                    &proj.category_id,
2694                    &qualified_vids,
2695                    owner_id,
2696                    start_date,
2697                    50000.0,
2698                )
2699            })
2700            .collect();
2701        stats.rfx_event_count = rfx_events.len();
2702
2703        // Step 5: Bids
2704        let mut bid_gen = BidGenerator::new(seed + 4);
2705        let mut all_bids = Vec::new();
2706        for rfx in &rfx_events {
2707            let bidder_count = vendor_ids.len().clamp(2, 5);
2708            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
2709            let bids = bid_gen.generate(rfx, &responding, start_date);
2710            all_bids.extend(bids);
2711        }
2712        stats.bid_count = all_bids.len();
2713
2714        // Step 6: Bid Evaluations
2715        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
2716        let bid_evaluations: Vec<BidEvaluation> = rfx_events
2717            .iter()
2718            .map(|rfx| {
2719                let rfx_bids: Vec<SupplierBid> = all_bids
2720                    .iter()
2721                    .filter(|b| b.rfx_id == rfx.rfx_id)
2722                    .cloned()
2723                    .collect();
2724                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
2725            })
2726            .collect();
2727
2728        // Step 7: Contracts from winning bids
2729        let mut contract_gen = ContractGenerator::new(seed + 6);
2730        let contracts: Vec<ProcurementContract> = bid_evaluations
2731            .iter()
2732            .zip(rfx_events.iter())
2733            .filter_map(|(eval, rfx)| {
2734                eval.ranked_bids.first().and_then(|winner| {
2735                    all_bids
2736                        .iter()
2737                        .find(|b| b.bid_id == winner.bid_id)
2738                        .map(|winning_bid| {
2739                            contract_gen.generate_from_bid(
2740                                winning_bid,
2741                                Some(&rfx.sourcing_project_id),
2742                                &rfx.category_id,
2743                                owner_id,
2744                                start_date,
2745                            )
2746                        })
2747                })
2748            })
2749            .collect();
2750        stats.contract_count = contracts.len();
2751
2752        // Step 8: Catalog Items
2753        let mut catalog_gen = CatalogGenerator::new(seed + 7);
2754        let catalog_items = catalog_gen.generate(&contracts);
2755        stats.catalog_item_count = catalog_items.len();
2756
2757        // Step 9: Scorecards
2758        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
2759        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
2760            .iter()
2761            .fold(
2762                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
2763                |mut acc, c| {
2764                    acc.entry(c.vendor_id.clone()).or_default().push(c);
2765                    acc
2766                },
2767            )
2768            .into_iter()
2769            .collect();
2770        let scorecards = scorecard_gen.generate(
2771            company_code,
2772            &vendor_contracts,
2773            start_date,
2774            end_date,
2775            owner_id,
2776        );
2777        stats.scorecard_count = scorecards.len();
2778
2779        // Back-populate cross-references on sourcing projects (Task 35)
2780        // Link each project to its RFx events, contracts, and spend analyses
2781        let mut sourcing_projects = sourcing_projects;
2782        for project in &mut sourcing_projects {
2783            // Link RFx events generated for this project
2784            project.rfx_ids = rfx_events
2785                .iter()
2786                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
2787                .map(|rfx| rfx.rfx_id.clone())
2788                .collect();
2789
2790            // Link contract awarded from this project's RFx
2791            project.contract_id = contracts
2792                .iter()
2793                .find(|c| {
2794                    c.sourcing_project_id
2795                        .as_deref()
2796                        .is_some_and(|sp| sp == project.project_id)
2797                })
2798                .map(|c| c.contract_id.clone());
2799
2800            // Link spend analysis for matching category (use category_id as the reference)
2801            project.spend_analysis_id = spend_analyses
2802                .iter()
2803                .find(|sa| sa.category_id == project.category_id)
2804                .map(|sa| sa.category_id.clone());
2805        }
2806
2807        info!(
2808            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
2809            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
2810            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
2811        );
2812        self.check_resources_with_log("post-sourcing")?;
2813
2814        Ok(SourcingSnapshot {
2815            spend_analyses,
2816            sourcing_projects,
2817            qualifications,
2818            rfx_events,
2819            bids: all_bids,
2820            bid_evaluations,
2821            contracts,
2822            catalog_items,
2823            scorecards,
2824        })
2825    }
2826
2827    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
2828    fn phase_intercompany(
2829        &mut self,
2830        stats: &mut EnhancedGenerationStatistics,
2831    ) -> SynthResult<IntercompanySnapshot> {
2832        // Skip if intercompany is disabled in config
2833        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
2834            debug!("Phase 14b: Skipped (intercompany generation disabled)");
2835            return Ok(IntercompanySnapshot::default());
2836        }
2837
2838        // Intercompany requires at least 2 companies
2839        if self.config.companies.len() < 2 {
2840            debug!(
2841                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
2842                self.config.companies.len()
2843            );
2844            return Ok(IntercompanySnapshot::default());
2845        }
2846
2847        info!("Phase 14b: Generating Intercompany Transactions");
2848
2849        let seed = self.seed;
2850        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2851            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
2852        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2853
2854        // Build ownership structure from company configs
2855        // First company is treated as the parent, remaining are subsidiaries
2856        let parent_code = self.config.companies[0].code.clone();
2857        let mut ownership_structure =
2858            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
2859
2860        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
2861            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
2862                format!("REL{:03}", i + 1),
2863                parent_code.clone(),
2864                company.code.clone(),
2865                rust_decimal::Decimal::from(100), // Default 100% ownership
2866                start_date,
2867            );
2868            ownership_structure.add_relationship(relationship);
2869        }
2870
2871        // Convert config transfer pricing method to core model enum
2872        let tp_method = match self.config.intercompany.transfer_pricing_method {
2873            datasynth_config::schema::TransferPricingMethod::CostPlus => {
2874                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
2875            }
2876            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
2877                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
2878            }
2879            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
2880                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
2881            }
2882            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
2883                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
2884            }
2885            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
2886                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
2887            }
2888        };
2889
2890        // Build IC generator config from schema config
2891        let ic_currency = self
2892            .config
2893            .companies
2894            .first()
2895            .map(|c| c.currency.clone())
2896            .unwrap_or_else(|| "USD".to_string());
2897        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
2898            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
2899            transfer_pricing_method: tp_method,
2900            markup_percent: rust_decimal::Decimal::from_f64_retain(
2901                self.config.intercompany.markup_percent,
2902            )
2903            .unwrap_or(rust_decimal::Decimal::from(5)),
2904            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
2905            default_currency: ic_currency,
2906            ..Default::default()
2907        };
2908
2909        // Create IC generator
2910        let mut ic_generator = datasynth_generators::ICGenerator::new(
2911            ic_gen_config,
2912            ownership_structure.clone(),
2913            seed + 50,
2914        );
2915
2916        // Generate IC transactions for the period
2917        // Use ~3 transactions per day as a reasonable default
2918        let transactions_per_day = 3;
2919        let matched_pairs = ic_generator.generate_transactions_for_period(
2920            start_date,
2921            end_date,
2922            transactions_per_day,
2923        );
2924
2925        // Generate journal entries from matched pairs
2926        let mut seller_entries = Vec::new();
2927        let mut buyer_entries = Vec::new();
2928        let fiscal_year = start_date.year();
2929
2930        for pair in &matched_pairs {
2931            let fiscal_period = pair.posting_date.month();
2932            let (seller_je, buyer_je) =
2933                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
2934            seller_entries.push(seller_je);
2935            buyer_entries.push(buyer_je);
2936        }
2937
2938        // Run matching engine
2939        let matching_config = datasynth_generators::ICMatchingConfig {
2940            base_currency: self
2941                .config
2942                .companies
2943                .first()
2944                .map(|c| c.currency.clone())
2945                .unwrap_or_else(|| "USD".to_string()),
2946            ..Default::default()
2947        };
2948        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
2949        matching_engine.load_matched_pairs(&matched_pairs);
2950        let matching_result = matching_engine.run_matching(end_date);
2951
2952        // Generate elimination entries if configured
2953        let mut elimination_entries = Vec::new();
2954        if self.config.intercompany.generate_eliminations {
2955            let elim_config = datasynth_generators::EliminationConfig {
2956                consolidation_entity: "GROUP".to_string(),
2957                base_currency: self
2958                    .config
2959                    .companies
2960                    .first()
2961                    .map(|c| c.currency.clone())
2962                    .unwrap_or_else(|| "USD".to_string()),
2963                ..Default::default()
2964            };
2965
2966            let mut elim_generator =
2967                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
2968
2969            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
2970            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
2971                matching_result
2972                    .matched_balances
2973                    .iter()
2974                    .chain(matching_result.unmatched_balances.iter())
2975                    .cloned()
2976                    .collect();
2977
2978            let journal = elim_generator.generate_eliminations(
2979                &fiscal_period,
2980                end_date,
2981                &all_balances,
2982                &matched_pairs,
2983                &std::collections::HashMap::new(), // investment amounts (simplified)
2984                &std::collections::HashMap::new(), // equity amounts (simplified)
2985            );
2986
2987            elimination_entries = journal.entries.clone();
2988        }
2989
2990        let matched_pair_count = matched_pairs.len();
2991        let elimination_entry_count = elimination_entries.len();
2992        let match_rate = matching_result.match_rate;
2993
2994        stats.ic_matched_pair_count = matched_pair_count;
2995        stats.ic_elimination_count = elimination_entry_count;
2996        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
2997
2998        info!(
2999            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
3000            matched_pair_count,
3001            stats.ic_transaction_count,
3002            seller_entries.len(),
3003            buyer_entries.len(),
3004            elimination_entry_count,
3005            match_rate * 100.0
3006        );
3007        self.check_resources_with_log("post-intercompany")?;
3008
3009        Ok(IntercompanySnapshot {
3010            matched_pairs,
3011            seller_journal_entries: seller_entries,
3012            buyer_journal_entries: buyer_entries,
3013            elimination_entries,
3014            matched_pair_count,
3015            elimination_entry_count,
3016            match_rate,
3017        })
3018    }
3019
3020    /// Phase 15: Generate bank reconciliations and financial statements.
3021    fn phase_financial_reporting(
3022        &mut self,
3023        document_flows: &DocumentFlowSnapshot,
3024        journal_entries: &[JournalEntry],
3025        coa: &Arc<ChartOfAccounts>,
3026        stats: &mut EnhancedGenerationStatistics,
3027    ) -> SynthResult<FinancialReportingSnapshot> {
3028        let fs_enabled = self.phase_config.generate_financial_statements
3029            || self.config.financial_reporting.enabled;
3030        let br_enabled = self.phase_config.generate_bank_reconciliation;
3031
3032        if !fs_enabled && !br_enabled {
3033            debug!("Phase 15: Skipped (financial reporting disabled)");
3034            return Ok(FinancialReportingSnapshot::default());
3035        }
3036
3037        info!("Phase 15: Generating Financial Reporting Data");
3038
3039        let seed = self.seed;
3040        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3041            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3042
3043        let mut financial_statements = Vec::new();
3044        let mut bank_reconciliations = Vec::new();
3045        let mut trial_balances = Vec::new();
3046
3047        // Generate financial statements from JE-derived trial balances.
3048        //
3049        // When journal entries are available, we use cumulative trial balances for
3050        // balance sheet accounts and current-period trial balances for income
3051        // statement accounts. We also track prior-period trial balances so the
3052        // generator can produce comparative amounts, and we build a proper
3053        // cash flow statement from working capital changes rather than random data.
3054        if fs_enabled {
3055            let company_code = self
3056                .config
3057                .companies
3058                .first()
3059                .map(|c| c.code.as_str())
3060                .unwrap_or("1000");
3061            let currency = self
3062                .config
3063                .companies
3064                .first()
3065                .map(|c| c.currency.as_str())
3066                .unwrap_or("USD");
3067            let has_journal_entries = !journal_entries.is_empty();
3068
3069            // Use FinancialStatementGenerator for balance sheet and income statement,
3070            // but build cash flow ourselves from TB data when JEs are available.
3071            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
3072
3073            // Track prior-period cumulative TB for comparative amounts and cash flow
3074            let mut prior_cumulative_tb: Option<Vec<datasynth_generators::TrialBalanceEntry>> =
3075                None;
3076
3077            // Generate one set of statements per period
3078            for period in 0..self.config.global.period_months {
3079                let period_start = start_date + chrono::Months::new(period);
3080                let period_end =
3081                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3082                let fiscal_year = period_end.year() as u16;
3083                let fiscal_period = period_end.month() as u8;
3084
3085                if has_journal_entries {
3086                    // Build cumulative trial balance from actual JEs for coherent
3087                    // balance sheet (cumulative) and income statement (current period)
3088                    let tb_entries = Self::build_cumulative_trial_balance(
3089                        journal_entries,
3090                        coa,
3091                        company_code,
3092                        start_date,
3093                        period_end,
3094                        fiscal_year,
3095                        fiscal_period,
3096                    );
3097
3098                    // Generate balance sheet and income statement via the generator,
3099                    // passing prior-period TB for comparative amounts
3100                    let prior_ref = prior_cumulative_tb.as_deref();
3101                    let stmts = fs_gen.generate(
3102                        company_code,
3103                        currency,
3104                        &tb_entries,
3105                        period_start,
3106                        period_end,
3107                        fiscal_year,
3108                        fiscal_period,
3109                        prior_ref,
3110                        "SYS-AUTOCLOSE",
3111                    );
3112
3113                    // Replace the generator's random cash flow with our TB-derived one
3114                    for stmt in stmts {
3115                        if stmt.statement_type == StatementType::CashFlowStatement {
3116                            // Build a coherent cash flow from trial balance changes
3117                            let net_income = Self::calculate_net_income_from_tb(&tb_entries);
3118                            let cf_items = Self::build_cash_flow_from_trial_balances(
3119                                &tb_entries,
3120                                prior_ref,
3121                                net_income,
3122                            );
3123                            financial_statements.push(FinancialStatement {
3124                                cash_flow_items: cf_items,
3125                                ..stmt
3126                            });
3127                        } else {
3128                            financial_statements.push(stmt);
3129                        }
3130                    }
3131
3132                    // Store current TB in snapshot for output
3133                    trial_balances.push(PeriodTrialBalance {
3134                        fiscal_year,
3135                        fiscal_period,
3136                        period_start,
3137                        period_end,
3138                        entries: tb_entries.clone(),
3139                    });
3140
3141                    // Store current TB as prior for next period
3142                    prior_cumulative_tb = Some(tb_entries);
3143                } else {
3144                    // Fallback: no JEs available, use single-period TB from entries
3145                    // (which will be empty, producing zero-valued statements)
3146                    let tb_entries = Self::build_trial_balance_from_entries(
3147                        journal_entries,
3148                        coa,
3149                        company_code,
3150                        fiscal_year,
3151                        fiscal_period,
3152                    );
3153
3154                    let stmts = fs_gen.generate(
3155                        company_code,
3156                        currency,
3157                        &tb_entries,
3158                        period_start,
3159                        period_end,
3160                        fiscal_year,
3161                        fiscal_period,
3162                        None,
3163                        "SYS-AUTOCLOSE",
3164                    );
3165                    financial_statements.extend(stmts);
3166
3167                    // Store trial balance even in fallback path
3168                    if !tb_entries.is_empty() {
3169                        trial_balances.push(PeriodTrialBalance {
3170                            fiscal_year,
3171                            fiscal_period,
3172                            period_start,
3173                            period_end,
3174                            entries: tb_entries,
3175                        });
3176                    }
3177                }
3178            }
3179            stats.financial_statement_count = financial_statements.len();
3180            info!(
3181                "Financial statements generated: {} statements (JE-derived: {})",
3182                stats.financial_statement_count, has_journal_entries
3183            );
3184        }
3185
3186        // Generate bank reconciliations from payment data
3187        if br_enabled && !document_flows.payments.is_empty() {
3188            let employee_ids: Vec<String> = self
3189                .master_data
3190                .employees
3191                .iter()
3192                .map(|e| e.employee_id.clone())
3193                .collect();
3194            let mut br_gen =
3195                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
3196
3197            // Group payments by company code and period
3198            for company in &self.config.companies {
3199                let company_payments: Vec<PaymentReference> = document_flows
3200                    .payments
3201                    .iter()
3202                    .filter(|p| p.header.company_code == company.code)
3203                    .map(|p| PaymentReference {
3204                        id: p.header.document_id.clone(),
3205                        amount: if p.is_vendor { p.amount } else { -p.amount },
3206                        date: p.header.document_date,
3207                        reference: p
3208                            .check_number
3209                            .clone()
3210                            .or_else(|| p.wire_reference.clone())
3211                            .unwrap_or_else(|| p.header.document_id.clone()),
3212                    })
3213                    .collect();
3214
3215                if company_payments.is_empty() {
3216                    continue;
3217                }
3218
3219                let bank_account_id = format!("{}-MAIN", company.code);
3220
3221                // Generate one reconciliation per period
3222                for period in 0..self.config.global.period_months {
3223                    let period_start = start_date + chrono::Months::new(period);
3224                    let period_end =
3225                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
3226
3227                    let period_payments: Vec<PaymentReference> = company_payments
3228                        .iter()
3229                        .filter(|p| p.date >= period_start && p.date <= period_end)
3230                        .cloned()
3231                        .collect();
3232
3233                    let recon = br_gen.generate(
3234                        &company.code,
3235                        &bank_account_id,
3236                        period_start,
3237                        period_end,
3238                        &company.currency,
3239                        &period_payments,
3240                    );
3241                    bank_reconciliations.push(recon);
3242                }
3243            }
3244            info!(
3245                "Bank reconciliations generated: {} reconciliations",
3246                bank_reconciliations.len()
3247            );
3248        }
3249
3250        stats.bank_reconciliation_count = bank_reconciliations.len();
3251        self.check_resources_with_log("post-financial-reporting")?;
3252
3253        if !trial_balances.is_empty() {
3254            info!(
3255                "Period-close trial balances captured: {} periods",
3256                trial_balances.len()
3257            );
3258        }
3259
3260        Ok(FinancialReportingSnapshot {
3261            financial_statements,
3262            bank_reconciliations,
3263            trial_balances,
3264        })
3265    }
3266
3267    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
3268    ///
3269    /// This ensures the trial balance is coherent with the JEs: every debit and credit
3270    /// posted in the journal entries flows through to the trial balance, using the real
3271    /// GL account numbers from the CoA.
3272    fn build_trial_balance_from_entries(
3273        journal_entries: &[JournalEntry],
3274        coa: &ChartOfAccounts,
3275        company_code: &str,
3276        fiscal_year: u16,
3277        fiscal_period: u8,
3278    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3279        use rust_decimal::Decimal;
3280
3281        // Accumulate total debits and credits per GL account
3282        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
3283        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
3284
3285        for je in journal_entries {
3286            // Filter to matching company, fiscal year, and period
3287            if je.header.company_code != company_code
3288                || je.header.fiscal_year != fiscal_year
3289                || je.header.fiscal_period != fiscal_period
3290            {
3291                continue;
3292            }
3293
3294            for line in &je.lines {
3295                let acct = &line.gl_account;
3296                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
3297                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
3298            }
3299        }
3300
3301        // Build a TrialBalanceEntry for each account that had activity
3302        let mut all_accounts: Vec<&String> = account_debits
3303            .keys()
3304            .chain(account_credits.keys())
3305            .collect::<std::collections::HashSet<_>>()
3306            .into_iter()
3307            .collect();
3308        all_accounts.sort();
3309
3310        let mut entries = Vec::new();
3311
3312        for acct_number in all_accounts {
3313            let debit = account_debits
3314                .get(acct_number)
3315                .copied()
3316                .unwrap_or(Decimal::ZERO);
3317            let credit = account_credits
3318                .get(acct_number)
3319                .copied()
3320                .unwrap_or(Decimal::ZERO);
3321
3322            if debit.is_zero() && credit.is_zero() {
3323                continue;
3324            }
3325
3326            // Look up account name from CoA, fall back to "Account {code}"
3327            let account_name = coa
3328                .get_account(acct_number)
3329                .map(|gl| gl.short_description.clone())
3330                .unwrap_or_else(|| format!("Account {acct_number}"));
3331
3332            // Map account code prefix to the category strings expected by
3333            // FinancialStatementGenerator (Cash, Receivables, Inventory,
3334            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
3335            // OperatingExpenses).
3336            let category = Self::category_from_account_code(acct_number);
3337
3338            entries.push(datasynth_generators::TrialBalanceEntry {
3339                account_code: acct_number.clone(),
3340                account_name,
3341                category,
3342                debit_balance: debit,
3343                credit_balance: credit,
3344            });
3345        }
3346
3347        entries
3348    }
3349
3350    /// Build a cumulative trial balance by aggregating all JEs from the start up to
3351    /// (and including) the given period end date.
3352    ///
3353    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
3354    /// while income statement accounts (revenue, expenses) show only the current period.
3355    /// The two are merged into a single Vec for the FinancialStatementGenerator.
3356    fn build_cumulative_trial_balance(
3357        journal_entries: &[JournalEntry],
3358        coa: &ChartOfAccounts,
3359        company_code: &str,
3360        start_date: NaiveDate,
3361        period_end: NaiveDate,
3362        fiscal_year: u16,
3363        fiscal_period: u8,
3364    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
3365        use rust_decimal::Decimal;
3366
3367        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
3368        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
3369        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
3370
3371        // Accumulate debits/credits for income statement accounts (current period only)
3372        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
3373        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
3374
3375        for je in journal_entries {
3376            if je.header.company_code != company_code {
3377                continue;
3378            }
3379
3380            for line in &je.lines {
3381                let acct = &line.gl_account;
3382                let category = Self::category_from_account_code(acct);
3383                let is_bs_account = matches!(
3384                    category.as_str(),
3385                    "Cash"
3386                        | "Receivables"
3387                        | "Inventory"
3388                        | "FixedAssets"
3389                        | "Payables"
3390                        | "AccruedLiabilities"
3391                        | "LongTermDebt"
3392                        | "Equity"
3393                );
3394
3395                if is_bs_account {
3396                    // Balance sheet: accumulate from start through period_end
3397                    if je.header.document_date <= period_end
3398                        && je.header.document_date >= start_date
3399                    {
3400                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3401                            line.debit_amount;
3402                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3403                            line.credit_amount;
3404                    }
3405                } else {
3406                    // Income statement: current period only
3407                    if je.header.fiscal_year == fiscal_year
3408                        && je.header.fiscal_period == fiscal_period
3409                    {
3410                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3411                            line.debit_amount;
3412                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
3413                            line.credit_amount;
3414                    }
3415                }
3416            }
3417        }
3418
3419        // Merge all accounts
3420        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
3421        all_accounts.extend(bs_debits.keys().cloned());
3422        all_accounts.extend(bs_credits.keys().cloned());
3423        all_accounts.extend(is_debits.keys().cloned());
3424        all_accounts.extend(is_credits.keys().cloned());
3425
3426        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
3427        sorted_accounts.sort();
3428
3429        let mut entries = Vec::new();
3430
3431        for acct_number in &sorted_accounts {
3432            let category = Self::category_from_account_code(acct_number);
3433            let is_bs_account = matches!(
3434                category.as_str(),
3435                "Cash"
3436                    | "Receivables"
3437                    | "Inventory"
3438                    | "FixedAssets"
3439                    | "Payables"
3440                    | "AccruedLiabilities"
3441                    | "LongTermDebt"
3442                    | "Equity"
3443            );
3444
3445            let (debit, credit) = if is_bs_account {
3446                (
3447                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3448                    bs_credits
3449                        .get(acct_number)
3450                        .copied()
3451                        .unwrap_or(Decimal::ZERO),
3452                )
3453            } else {
3454                (
3455                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
3456                    is_credits
3457                        .get(acct_number)
3458                        .copied()
3459                        .unwrap_or(Decimal::ZERO),
3460                )
3461            };
3462
3463            if debit.is_zero() && credit.is_zero() {
3464                continue;
3465            }
3466
3467            let account_name = coa
3468                .get_account(acct_number)
3469                .map(|gl| gl.short_description.clone())
3470                .unwrap_or_else(|| format!("Account {acct_number}"));
3471
3472            entries.push(datasynth_generators::TrialBalanceEntry {
3473                account_code: acct_number.clone(),
3474                account_name,
3475                category,
3476                debit_balance: debit,
3477                credit_balance: credit,
3478            });
3479        }
3480
3481        entries
3482    }
3483
3484    /// Build a JE-derived cash flow statement using the indirect method.
3485    ///
3486    /// Compares current and prior cumulative trial balances to derive working capital
3487    /// changes, producing a coherent cash flow statement tied to actual journal entries.
3488    fn build_cash_flow_from_trial_balances(
3489        current_tb: &[datasynth_generators::TrialBalanceEntry],
3490        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
3491        net_income: rust_decimal::Decimal,
3492    ) -> Vec<CashFlowItem> {
3493        use rust_decimal::Decimal;
3494
3495        // Helper: aggregate a TB by category and return net (debit - credit)
3496        let aggregate =
3497            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
3498                let mut map: HashMap<String, Decimal> = HashMap::new();
3499                for entry in tb {
3500                    let net = entry.debit_balance - entry.credit_balance;
3501                    *map.entry(entry.category.clone()).or_default() += net;
3502                }
3503                map
3504            };
3505
3506        let current = aggregate(current_tb);
3507        let prior = prior_tb.map(aggregate);
3508
3509        // Get balance for a category, defaulting to zero
3510        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
3511            *map.get(key).unwrap_or(&Decimal::ZERO)
3512        };
3513
3514        // Compute change: current - prior (or current if no prior)
3515        let change = |key: &str| -> Decimal {
3516            let curr = get(&current, key);
3517            match &prior {
3518                Some(p) => curr - get(p, key),
3519                None => curr,
3520            }
3521        };
3522
3523        // Operating activities (indirect method)
3524        // Depreciation add-back: approximate from FixedAssets decrease
3525        let fixed_asset_change = change("FixedAssets");
3526        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
3527            -fixed_asset_change
3528        } else {
3529            Decimal::ZERO
3530        };
3531
3532        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
3533        let ar_change = change("Receivables");
3534        let inventory_change = change("Inventory");
3535        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
3536        let ap_change = change("Payables");
3537        let accrued_change = change("AccruedLiabilities");
3538
3539        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
3540            + (-ap_change)
3541            + (-accrued_change);
3542
3543        // Investing activities
3544        let capex = if fixed_asset_change > Decimal::ZERO {
3545            -fixed_asset_change
3546        } else {
3547            Decimal::ZERO
3548        };
3549        let investing_cf = capex;
3550
3551        // Financing activities
3552        let debt_change = -change("LongTermDebt");
3553        let equity_change = -change("Equity");
3554        let financing_cf = debt_change + equity_change;
3555
3556        let net_change = operating_cf + investing_cf + financing_cf;
3557
3558        vec![
3559            CashFlowItem {
3560                item_code: "CF-NI".to_string(),
3561                label: "Net Income".to_string(),
3562                category: CashFlowCategory::Operating,
3563                amount: net_income,
3564                amount_prior: None,
3565                sort_order: 1,
3566                is_total: false,
3567            },
3568            CashFlowItem {
3569                item_code: "CF-DEP".to_string(),
3570                label: "Depreciation & Amortization".to_string(),
3571                category: CashFlowCategory::Operating,
3572                amount: depreciation_addback,
3573                amount_prior: None,
3574                sort_order: 2,
3575                is_total: false,
3576            },
3577            CashFlowItem {
3578                item_code: "CF-AR".to_string(),
3579                label: "Change in Accounts Receivable".to_string(),
3580                category: CashFlowCategory::Operating,
3581                amount: -ar_change,
3582                amount_prior: None,
3583                sort_order: 3,
3584                is_total: false,
3585            },
3586            CashFlowItem {
3587                item_code: "CF-AP".to_string(),
3588                label: "Change in Accounts Payable".to_string(),
3589                category: CashFlowCategory::Operating,
3590                amount: -ap_change,
3591                amount_prior: None,
3592                sort_order: 4,
3593                is_total: false,
3594            },
3595            CashFlowItem {
3596                item_code: "CF-INV".to_string(),
3597                label: "Change in Inventory".to_string(),
3598                category: CashFlowCategory::Operating,
3599                amount: -inventory_change,
3600                amount_prior: None,
3601                sort_order: 5,
3602                is_total: false,
3603            },
3604            CashFlowItem {
3605                item_code: "CF-OP".to_string(),
3606                label: "Net Cash from Operating Activities".to_string(),
3607                category: CashFlowCategory::Operating,
3608                amount: operating_cf,
3609                amount_prior: None,
3610                sort_order: 6,
3611                is_total: true,
3612            },
3613            CashFlowItem {
3614                item_code: "CF-CAPEX".to_string(),
3615                label: "Capital Expenditures".to_string(),
3616                category: CashFlowCategory::Investing,
3617                amount: capex,
3618                amount_prior: None,
3619                sort_order: 7,
3620                is_total: false,
3621            },
3622            CashFlowItem {
3623                item_code: "CF-INV-T".to_string(),
3624                label: "Net Cash from Investing Activities".to_string(),
3625                category: CashFlowCategory::Investing,
3626                amount: investing_cf,
3627                amount_prior: None,
3628                sort_order: 8,
3629                is_total: true,
3630            },
3631            CashFlowItem {
3632                item_code: "CF-DEBT".to_string(),
3633                label: "Net Borrowings / (Repayments)".to_string(),
3634                category: CashFlowCategory::Financing,
3635                amount: debt_change,
3636                amount_prior: None,
3637                sort_order: 9,
3638                is_total: false,
3639            },
3640            CashFlowItem {
3641                item_code: "CF-EQ".to_string(),
3642                label: "Equity Changes".to_string(),
3643                category: CashFlowCategory::Financing,
3644                amount: equity_change,
3645                amount_prior: None,
3646                sort_order: 10,
3647                is_total: false,
3648            },
3649            CashFlowItem {
3650                item_code: "CF-FIN-T".to_string(),
3651                label: "Net Cash from Financing Activities".to_string(),
3652                category: CashFlowCategory::Financing,
3653                amount: financing_cf,
3654                amount_prior: None,
3655                sort_order: 11,
3656                is_total: true,
3657            },
3658            CashFlowItem {
3659                item_code: "CF-NET".to_string(),
3660                label: "Net Change in Cash".to_string(),
3661                category: CashFlowCategory::Operating,
3662                amount: net_change,
3663                amount_prior: None,
3664                sort_order: 12,
3665                is_total: true,
3666            },
3667        ]
3668    }
3669
3670    /// Calculate net income from a set of trial balance entries.
3671    ///
3672    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
3673    fn calculate_net_income_from_tb(
3674        tb: &[datasynth_generators::TrialBalanceEntry],
3675    ) -> rust_decimal::Decimal {
3676        use rust_decimal::Decimal;
3677
3678        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
3679        for entry in tb {
3680            let net = entry.debit_balance - entry.credit_balance;
3681            *aggregated.entry(entry.category.clone()).or_default() += net;
3682        }
3683
3684        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
3685        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
3686        let opex = *aggregated
3687            .get("OperatingExpenses")
3688            .unwrap_or(&Decimal::ZERO);
3689        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
3690        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
3691
3692        // revenue is negative (credit-normal), expenses are positive (debit-normal)
3693        // other_income is typically negative (credit), other_expenses is typically positive
3694        let operating_income = revenue - cogs - opex - other_expenses - other_income;
3695        let tax_rate = Decimal::new(25, 2); // 0.25
3696        let tax = operating_income * tax_rate;
3697        operating_income - tax
3698    }
3699
3700    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
3701    ///
3702    /// Uses the first two digits of the account code to classify into the categories
3703    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
3704    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
3705    /// OperatingExpenses, OtherIncome, OtherExpenses.
3706    fn category_from_account_code(code: &str) -> String {
3707        let prefix: String = code.chars().take(2).collect();
3708        match prefix.as_str() {
3709            "10" => "Cash",
3710            "11" => "Receivables",
3711            "12" | "13" | "14" => "Inventory",
3712            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
3713            "20" => "Payables",
3714            "21" | "22" | "23" | "24" => "AccruedLiabilities",
3715            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
3716            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
3717            "40" | "41" | "42" | "43" | "44" => "Revenue",
3718            "50" | "51" | "52" => "CostOfSales",
3719            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
3720                "OperatingExpenses"
3721            }
3722            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
3723            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
3724            _ => "OperatingExpenses",
3725        }
3726        .to_string()
3727    }
3728
3729    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
3730    fn phase_hr_data(
3731        &mut self,
3732        stats: &mut EnhancedGenerationStatistics,
3733    ) -> SynthResult<HrSnapshot> {
3734        if !self.config.hr.enabled {
3735            debug!("Phase 16: Skipped (HR generation disabled)");
3736            return Ok(HrSnapshot::default());
3737        }
3738
3739        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
3740
3741        let seed = self.seed;
3742        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3743            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3744        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3745        let company_code = self
3746            .config
3747            .companies
3748            .first()
3749            .map(|c| c.code.as_str())
3750            .unwrap_or("1000");
3751        let currency = self
3752            .config
3753            .companies
3754            .first()
3755            .map(|c| c.currency.as_str())
3756            .unwrap_or("USD");
3757
3758        let employee_ids: Vec<String> = self
3759            .master_data
3760            .employees
3761            .iter()
3762            .map(|e| e.employee_id.clone())
3763            .collect();
3764
3765        if employee_ids.is_empty() {
3766            debug!("Phase 16: Skipped (no employees available)");
3767            return Ok(HrSnapshot::default());
3768        }
3769
3770        // Extract cost-center pool from master data employees for cross-reference
3771        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
3772        let cost_center_ids: Vec<String> = self
3773            .master_data
3774            .employees
3775            .iter()
3776            .filter_map(|e| e.cost_center.clone())
3777            .collect::<std::collections::HashSet<_>>()
3778            .into_iter()
3779            .collect();
3780
3781        let mut snapshot = HrSnapshot::default();
3782
3783        // Generate payroll runs (one per month)
3784        if self.config.hr.payroll.enabled {
3785            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 30)
3786                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3787
3788            // Look up country pack for payroll deductions and labels
3789            let payroll_pack = self.primary_pack();
3790
3791            // Store the pack on the generator so generate() resolves
3792            // localized deduction rates and labels from it.
3793            payroll_gen.set_country_pack(payroll_pack.clone());
3794
3795            let employees_with_salary: Vec<(
3796                String,
3797                rust_decimal::Decimal,
3798                Option<String>,
3799                Option<String>,
3800            )> = self
3801                .master_data
3802                .employees
3803                .iter()
3804                .map(|e| {
3805                    (
3806                        e.employee_id.clone(),
3807                        rust_decimal::Decimal::from(5000), // Default monthly salary
3808                        e.cost_center.clone(),
3809                        e.department_id.clone(),
3810                    )
3811                })
3812                .collect();
3813
3814            for month in 0..self.config.global.period_months {
3815                let period_start = start_date + chrono::Months::new(month);
3816                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
3817                let (run, items) = payroll_gen.generate(
3818                    company_code,
3819                    &employees_with_salary,
3820                    period_start,
3821                    period_end,
3822                    currency,
3823                );
3824                snapshot.payroll_runs.push(run);
3825                snapshot.payroll_run_count += 1;
3826                snapshot.payroll_line_item_count += items.len();
3827                snapshot.payroll_line_items.extend(items);
3828            }
3829        }
3830
3831        // Generate time entries
3832        if self.config.hr.time_attendance.enabled {
3833            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
3834                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3835            let entries = time_gen.generate(
3836                &employee_ids,
3837                start_date,
3838                end_date,
3839                &self.config.hr.time_attendance,
3840            );
3841            snapshot.time_entry_count = entries.len();
3842            snapshot.time_entries = entries;
3843        }
3844
3845        // Generate expense reports
3846        if self.config.hr.expenses.enabled {
3847            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
3848                .with_pools(employee_ids.clone(), cost_center_ids.clone());
3849            expense_gen.set_country_pack(self.primary_pack().clone());
3850            let company_currency = self
3851                .config
3852                .companies
3853                .first()
3854                .map(|c| c.currency.as_str())
3855                .unwrap_or("USD");
3856            let reports = expense_gen.generate_with_currency(
3857                &employee_ids,
3858                start_date,
3859                end_date,
3860                &self.config.hr.expenses,
3861                company_currency,
3862            );
3863            snapshot.expense_report_count = reports.len();
3864            snapshot.expense_reports = reports;
3865        }
3866
3867        // Generate benefit enrollments (gated on payroll, since benefits require employees)
3868        if self.config.hr.payroll.enabled {
3869            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
3870            let employee_pairs: Vec<(String, String)> = self
3871                .master_data
3872                .employees
3873                .iter()
3874                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
3875                .collect();
3876            let enrollments =
3877                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
3878            snapshot.benefit_enrollment_count = enrollments.len();
3879            snapshot.benefit_enrollments = enrollments;
3880        }
3881
3882        stats.payroll_run_count = snapshot.payroll_run_count;
3883        stats.time_entry_count = snapshot.time_entry_count;
3884        stats.expense_report_count = snapshot.expense_report_count;
3885        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
3886
3887        info!(
3888            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments",
3889            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
3890            snapshot.time_entry_count, snapshot.expense_report_count,
3891            snapshot.benefit_enrollment_count
3892        );
3893        self.check_resources_with_log("post-hr")?;
3894
3895        Ok(snapshot)
3896    }
3897
3898    /// Phase 17: Generate accounting standards data (revenue recognition, impairment).
3899    fn phase_accounting_standards(
3900        &mut self,
3901        stats: &mut EnhancedGenerationStatistics,
3902    ) -> SynthResult<AccountingStandardsSnapshot> {
3903        if !self.phase_config.generate_accounting_standards
3904            || !self.config.accounting_standards.enabled
3905        {
3906            debug!("Phase 17: Skipped (accounting standards generation disabled)");
3907            return Ok(AccountingStandardsSnapshot::default());
3908        }
3909        info!("Phase 17: Generating Accounting Standards Data");
3910
3911        let seed = self.seed;
3912        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3913            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3914        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3915        let company_code = self
3916            .config
3917            .companies
3918            .first()
3919            .map(|c| c.code.as_str())
3920            .unwrap_or("1000");
3921        let currency = self
3922            .config
3923            .companies
3924            .first()
3925            .map(|c| c.currency.as_str())
3926            .unwrap_or("USD");
3927
3928        // Convert config framework to standards framework.
3929        // If the user explicitly set a framework in the YAML config, use that.
3930        // Otherwise, fall back to the country pack's accounting.framework field,
3931        // and if that is also absent or unrecognised, default to US GAAP.
3932        let framework = match self.config.accounting_standards.framework {
3933            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
3934                datasynth_standards::framework::AccountingFramework::UsGaap
3935            }
3936            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
3937                datasynth_standards::framework::AccountingFramework::Ifrs
3938            }
3939            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
3940                datasynth_standards::framework::AccountingFramework::DualReporting
3941            }
3942            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
3943                datasynth_standards::framework::AccountingFramework::FrenchGaap
3944            }
3945            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
3946                datasynth_standards::framework::AccountingFramework::GermanGaap
3947            }
3948            None => {
3949                // Derive framework from the primary company's country pack
3950                let pack = self.primary_pack();
3951                let pack_fw = pack.accounting.framework.as_str();
3952                match pack_fw {
3953                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
3954                    "dual_reporting" => {
3955                        datasynth_standards::framework::AccountingFramework::DualReporting
3956                    }
3957                    "french_gaap" => {
3958                        datasynth_standards::framework::AccountingFramework::FrenchGaap
3959                    }
3960                    "german_gaap" | "hgb" => {
3961                        datasynth_standards::framework::AccountingFramework::GermanGaap
3962                    }
3963                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
3964                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
3965                }
3966            }
3967        };
3968
3969        let mut snapshot = AccountingStandardsSnapshot::default();
3970
3971        // Revenue recognition
3972        if self.config.accounting_standards.revenue_recognition.enabled {
3973            let customer_ids: Vec<String> = self
3974                .master_data
3975                .customers
3976                .iter()
3977                .map(|c| c.customer_id.clone())
3978                .collect();
3979
3980            if !customer_ids.is_empty() {
3981                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
3982                let contracts = rev_gen.generate(
3983                    company_code,
3984                    &customer_ids,
3985                    start_date,
3986                    end_date,
3987                    currency,
3988                    &self.config.accounting_standards.revenue_recognition,
3989                    framework,
3990                );
3991                snapshot.revenue_contract_count = contracts.len();
3992                snapshot.contracts = contracts;
3993            }
3994        }
3995
3996        // Impairment testing
3997        if self.config.accounting_standards.impairment.enabled {
3998            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
3999                .master_data
4000                .assets
4001                .iter()
4002                .map(|a| {
4003                    (
4004                        a.asset_id.clone(),
4005                        a.description.clone(),
4006                        a.acquisition_cost,
4007                    )
4008                })
4009                .collect();
4010
4011            if !asset_data.is_empty() {
4012                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
4013                let tests = imp_gen.generate(
4014                    company_code,
4015                    &asset_data,
4016                    end_date,
4017                    &self.config.accounting_standards.impairment,
4018                    framework,
4019                );
4020                snapshot.impairment_test_count = tests.len();
4021                snapshot.impairment_tests = tests;
4022            }
4023        }
4024
4025        stats.revenue_contract_count = snapshot.revenue_contract_count;
4026        stats.impairment_test_count = snapshot.impairment_test_count;
4027
4028        info!(
4029            "Accounting standards data generated: {} revenue contracts, {} impairment tests",
4030            snapshot.revenue_contract_count, snapshot.impairment_test_count
4031        );
4032        self.check_resources_with_log("post-accounting-standards")?;
4033
4034        Ok(snapshot)
4035    }
4036
4037    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
4038    fn phase_manufacturing(
4039        &mut self,
4040        stats: &mut EnhancedGenerationStatistics,
4041    ) -> SynthResult<ManufacturingSnapshot> {
4042        if !self.phase_config.generate_manufacturing || !self.config.manufacturing.enabled {
4043            debug!("Phase 18: Skipped (manufacturing generation disabled)");
4044            return Ok(ManufacturingSnapshot::default());
4045        }
4046        info!("Phase 18: Generating Manufacturing Data");
4047
4048        let seed = self.seed;
4049        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4050            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4051        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4052        let company_code = self
4053            .config
4054            .companies
4055            .first()
4056            .map(|c| c.code.as_str())
4057            .unwrap_or("1000");
4058
4059        let material_data: Vec<(String, String)> = self
4060            .master_data
4061            .materials
4062            .iter()
4063            .map(|m| (m.material_id.clone(), m.description.clone()))
4064            .collect();
4065
4066        if material_data.is_empty() {
4067            debug!("Phase 18: Skipped (no materials available)");
4068            return Ok(ManufacturingSnapshot::default());
4069        }
4070
4071        let mut snapshot = ManufacturingSnapshot::default();
4072
4073        // Generate production orders
4074        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 50);
4075        let production_orders = prod_gen.generate(
4076            company_code,
4077            &material_data,
4078            start_date,
4079            end_date,
4080            &self.config.manufacturing.production_orders,
4081            &self.config.manufacturing.costing,
4082            &self.config.manufacturing.routing,
4083        );
4084        snapshot.production_order_count = production_orders.len();
4085
4086        // Generate quality inspections from production orders
4087        let inspection_data: Vec<(String, String, String)> = production_orders
4088            .iter()
4089            .map(|po| {
4090                (
4091                    po.order_id.clone(),
4092                    po.material_id.clone(),
4093                    po.material_description.clone(),
4094                )
4095            })
4096            .collect();
4097
4098        snapshot.production_orders = production_orders;
4099
4100        if !inspection_data.is_empty() {
4101            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 51);
4102            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
4103            snapshot.quality_inspection_count = inspections.len();
4104            snapshot.quality_inspections = inspections;
4105        }
4106
4107        // Generate cycle counts (one per month)
4108        let storage_locations: Vec<(String, String)> = material_data
4109            .iter()
4110            .enumerate()
4111            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
4112            .collect();
4113
4114        let employee_ids: Vec<String> = self
4115            .master_data
4116            .employees
4117            .iter()
4118            .map(|e| e.employee_id.clone())
4119            .collect();
4120        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 52)
4121            .with_employee_pool(employee_ids);
4122        let mut cycle_count_total = 0usize;
4123        for month in 0..self.config.global.period_months {
4124            let count_date = start_date + chrono::Months::new(month);
4125            let items_per_count = storage_locations.len().clamp(10, 50);
4126            let cc = cc_gen.generate(
4127                company_code,
4128                &storage_locations,
4129                count_date,
4130                items_per_count,
4131            );
4132            snapshot.cycle_counts.push(cc);
4133            cycle_count_total += 1;
4134        }
4135        snapshot.cycle_count_count = cycle_count_total;
4136
4137        // Generate BOM components
4138        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 53);
4139        let bom_components = bom_gen.generate(company_code, &material_data);
4140        snapshot.bom_component_count = bom_components.len();
4141        snapshot.bom_components = bom_components;
4142
4143        // Generate inventory movements
4144        let currency = self
4145            .config
4146            .companies
4147            .first()
4148            .map(|c| c.currency.as_str())
4149            .unwrap_or("USD");
4150        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 54);
4151        let inventory_movements = inv_mov_gen.generate(
4152            company_code,
4153            &material_data,
4154            start_date,
4155            end_date,
4156            2,
4157            currency,
4158        );
4159        snapshot.inventory_movement_count = inventory_movements.len();
4160        snapshot.inventory_movements = inventory_movements;
4161
4162        stats.production_order_count = snapshot.production_order_count;
4163        stats.quality_inspection_count = snapshot.quality_inspection_count;
4164        stats.cycle_count_count = snapshot.cycle_count_count;
4165        stats.bom_component_count = snapshot.bom_component_count;
4166        stats.inventory_movement_count = snapshot.inventory_movement_count;
4167
4168        info!(
4169            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
4170            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
4171            snapshot.bom_component_count, snapshot.inventory_movement_count
4172        );
4173        self.check_resources_with_log("post-manufacturing")?;
4174
4175        Ok(snapshot)
4176    }
4177
4178    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
4179    fn phase_sales_kpi_budgets(
4180        &mut self,
4181        coa: &Arc<ChartOfAccounts>,
4182        financial_reporting: &FinancialReportingSnapshot,
4183        stats: &mut EnhancedGenerationStatistics,
4184    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
4185        if !self.phase_config.generate_sales_kpi_budgets {
4186            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
4187            return Ok(SalesKpiBudgetsSnapshot::default());
4188        }
4189        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
4190
4191        let seed = self.seed;
4192        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4193            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4194        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4195        let company_code = self
4196            .config
4197            .companies
4198            .first()
4199            .map(|c| c.code.as_str())
4200            .unwrap_or("1000");
4201
4202        let mut snapshot = SalesKpiBudgetsSnapshot::default();
4203
4204        // Sales Quotes
4205        if self.config.sales_quotes.enabled {
4206            let customer_data: Vec<(String, String)> = self
4207                .master_data
4208                .customers
4209                .iter()
4210                .map(|c| (c.customer_id.clone(), c.name.clone()))
4211                .collect();
4212            let material_data: Vec<(String, String)> = self
4213                .master_data
4214                .materials
4215                .iter()
4216                .map(|m| (m.material_id.clone(), m.description.clone()))
4217                .collect();
4218
4219            if !customer_data.is_empty() && !material_data.is_empty() {
4220                let employee_ids: Vec<String> = self
4221                    .master_data
4222                    .employees
4223                    .iter()
4224                    .map(|e| e.employee_id.clone())
4225                    .collect();
4226                let customer_ids: Vec<String> = self
4227                    .master_data
4228                    .customers
4229                    .iter()
4230                    .map(|c| c.customer_id.clone())
4231                    .collect();
4232                let company_currency = self
4233                    .config
4234                    .companies
4235                    .first()
4236                    .map(|c| c.currency.as_str())
4237                    .unwrap_or("USD");
4238
4239                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
4240                    .with_pools(employee_ids, customer_ids);
4241                let quotes = quote_gen.generate_with_currency(
4242                    company_code,
4243                    &customer_data,
4244                    &material_data,
4245                    start_date,
4246                    end_date,
4247                    &self.config.sales_quotes,
4248                    company_currency,
4249                );
4250                snapshot.sales_quote_count = quotes.len();
4251                snapshot.sales_quotes = quotes;
4252            }
4253        }
4254
4255        // Management KPIs
4256        if self.config.financial_reporting.management_kpis.enabled {
4257            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
4258            let mut kpis = kpi_gen.generate(
4259                company_code,
4260                start_date,
4261                end_date,
4262                &self.config.financial_reporting.management_kpis,
4263            );
4264
4265            // Override financial KPIs with actual data from financial statements
4266            {
4267                use rust_decimal::Decimal;
4268
4269                if let Some(income_stmt) =
4270                    financial_reporting.financial_statements.iter().find(|fs| {
4271                        fs.statement_type == StatementType::IncomeStatement
4272                            && fs.company_code == company_code
4273                    })
4274                {
4275                    // Extract revenue and COGS from income statement line items
4276                    let total_revenue: Decimal = income_stmt
4277                        .line_items
4278                        .iter()
4279                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
4280                        .map(|li| li.amount)
4281                        .sum();
4282                    let total_cogs: Decimal = income_stmt
4283                        .line_items
4284                        .iter()
4285                        .filter(|li| {
4286                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
4287                                && !li.is_total
4288                        })
4289                        .map(|li| li.amount.abs())
4290                        .sum();
4291                    let total_opex: Decimal = income_stmt
4292                        .line_items
4293                        .iter()
4294                        .filter(|li| {
4295                            li.section.contains("Expense")
4296                                && !li.is_total
4297                                && !li.section.contains("Cost")
4298                        })
4299                        .map(|li| li.amount.abs())
4300                        .sum();
4301
4302                    if total_revenue > Decimal::ZERO {
4303                        let hundred = Decimal::from(100);
4304                        let gross_margin_pct =
4305                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
4306                        let operating_income = total_revenue - total_cogs - total_opex;
4307                        let op_margin_pct =
4308                            (operating_income * hundred / total_revenue).round_dp(2);
4309
4310                        // Override gross margin and operating margin KPIs
4311                        for kpi in &mut kpis {
4312                            if kpi.name == "Gross Margin" {
4313                                kpi.value = gross_margin_pct;
4314                            } else if kpi.name == "Operating Margin" {
4315                                kpi.value = op_margin_pct;
4316                            }
4317                        }
4318                    }
4319                }
4320
4321                // Override Current Ratio from balance sheet
4322                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
4323                    fs.statement_type == StatementType::BalanceSheet
4324                        && fs.company_code == company_code
4325                }) {
4326                    let current_assets: Decimal = bs
4327                        .line_items
4328                        .iter()
4329                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
4330                        .map(|li| li.amount)
4331                        .sum();
4332                    let current_liabilities: Decimal = bs
4333                        .line_items
4334                        .iter()
4335                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
4336                        .map(|li| li.amount.abs())
4337                        .sum();
4338
4339                    if current_liabilities > Decimal::ZERO {
4340                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
4341                        for kpi in &mut kpis {
4342                            if kpi.name == "Current Ratio" {
4343                                kpi.value = current_ratio;
4344                            }
4345                        }
4346                    }
4347                }
4348            }
4349
4350            snapshot.kpi_count = kpis.len();
4351            snapshot.kpis = kpis;
4352        }
4353
4354        // Budgets
4355        if self.config.financial_reporting.budgets.enabled {
4356            let account_data: Vec<(String, String)> = coa
4357                .accounts
4358                .iter()
4359                .map(|a| (a.account_number.clone(), a.short_description.clone()))
4360                .collect();
4361
4362            if !account_data.is_empty() {
4363                let fiscal_year = start_date.year() as u32;
4364                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
4365                let budget = budget_gen.generate(
4366                    company_code,
4367                    fiscal_year,
4368                    &account_data,
4369                    &self.config.financial_reporting.budgets,
4370                );
4371                snapshot.budget_line_count = budget.line_items.len();
4372                snapshot.budgets.push(budget);
4373            }
4374        }
4375
4376        stats.sales_quote_count = snapshot.sales_quote_count;
4377        stats.kpi_count = snapshot.kpi_count;
4378        stats.budget_line_count = snapshot.budget_line_count;
4379
4380        info!(
4381            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
4382            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
4383        );
4384        self.check_resources_with_log("post-sales-kpi-budgets")?;
4385
4386        Ok(snapshot)
4387    }
4388
4389    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
4390    fn phase_tax_generation(
4391        &mut self,
4392        document_flows: &DocumentFlowSnapshot,
4393        stats: &mut EnhancedGenerationStatistics,
4394    ) -> SynthResult<TaxSnapshot> {
4395        if !self.phase_config.generate_tax || !self.config.tax.enabled {
4396            debug!("Phase 20: Skipped (tax generation disabled)");
4397            return Ok(TaxSnapshot::default());
4398        }
4399        info!("Phase 20: Generating Tax Data");
4400
4401        let seed = self.seed;
4402        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4403            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4404        let fiscal_year = start_date.year();
4405        let company_code = self
4406            .config
4407            .companies
4408            .first()
4409            .map(|c| c.code.as_str())
4410            .unwrap_or("1000");
4411
4412        let mut gen =
4413            datasynth_generators::TaxCodeGenerator::with_config(seed + 70, self.config.tax.clone());
4414
4415        let pack = self.primary_pack().clone();
4416        let (jurisdictions, codes) =
4417            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
4418
4419        // Generate tax provisions for each company
4420        let mut provisions = Vec::new();
4421        if self.config.tax.provisions.enabled {
4422            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 71);
4423            for company in &self.config.companies {
4424                let pre_tax_income = rust_decimal::Decimal::from(1_000_000);
4425                let statutory_rate = rust_decimal::Decimal::new(
4426                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
4427                    2,
4428                );
4429                let provision = provision_gen.generate(
4430                    &company.code,
4431                    start_date,
4432                    pre_tax_income,
4433                    statutory_rate,
4434                );
4435                provisions.push(provision);
4436            }
4437        }
4438
4439        // Generate tax lines from document invoices
4440        let mut tax_lines = Vec::new();
4441        if !codes.is_empty() {
4442            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
4443                datasynth_generators::TaxLineGeneratorConfig::default(),
4444                codes.clone(),
4445                seed + 72,
4446            );
4447
4448            // Tax lines from vendor invoices (input tax)
4449            // Use the first company's country as buyer country
4450            let buyer_country = self
4451                .config
4452                .companies
4453                .first()
4454                .map(|c| c.country.as_str())
4455                .unwrap_or("US");
4456            for vi in &document_flows.vendor_invoices {
4457                let lines = tax_line_gen.generate_for_document(
4458                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
4459                    &vi.header.document_id,
4460                    buyer_country, // seller approx same country
4461                    buyer_country,
4462                    vi.payable_amount,
4463                    vi.header.document_date,
4464                    None,
4465                );
4466                tax_lines.extend(lines);
4467            }
4468
4469            // Tax lines from customer invoices (output tax)
4470            for ci in &document_flows.customer_invoices {
4471                let lines = tax_line_gen.generate_for_document(
4472                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
4473                    &ci.header.document_id,
4474                    buyer_country, // seller is the company
4475                    buyer_country,
4476                    ci.total_gross_amount,
4477                    ci.header.document_date,
4478                    None,
4479                );
4480                tax_lines.extend(lines);
4481            }
4482        }
4483
4484        let snapshot = TaxSnapshot {
4485            jurisdiction_count: jurisdictions.len(),
4486            code_count: codes.len(),
4487            jurisdictions,
4488            codes,
4489            tax_provisions: provisions,
4490            tax_lines,
4491            tax_returns: Vec::new(),
4492            withholding_records: Vec::new(),
4493            tax_anomaly_labels: Vec::new(),
4494        };
4495
4496        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
4497        stats.tax_code_count = snapshot.code_count;
4498        stats.tax_provision_count = snapshot.tax_provisions.len();
4499        stats.tax_line_count = snapshot.tax_lines.len();
4500
4501        info!(
4502            "Tax data generated: {} jurisdictions, {} codes, {} provisions",
4503            snapshot.jurisdiction_count,
4504            snapshot.code_count,
4505            snapshot.tax_provisions.len()
4506        );
4507        self.check_resources_with_log("post-tax")?;
4508
4509        Ok(snapshot)
4510    }
4511
4512    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
4513    fn phase_esg_generation(
4514        &mut self,
4515        document_flows: &DocumentFlowSnapshot,
4516        stats: &mut EnhancedGenerationStatistics,
4517    ) -> SynthResult<EsgSnapshot> {
4518        if !self.phase_config.generate_esg || !self.config.esg.enabled {
4519            debug!("Phase 21: Skipped (ESG generation disabled)");
4520            return Ok(EsgSnapshot::default());
4521        }
4522        info!("Phase 21: Generating ESG Data");
4523
4524        let seed = self.seed;
4525        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4526            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4527        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4528        let entity_id = self
4529            .config
4530            .companies
4531            .first()
4532            .map(|c| c.code.as_str())
4533            .unwrap_or("1000");
4534
4535        let esg_cfg = &self.config.esg;
4536        let mut snapshot = EsgSnapshot::default();
4537
4538        // Energy consumption (feeds into scope 1 & 2 emissions)
4539        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
4540            esg_cfg.environmental.energy.clone(),
4541            seed + 80,
4542        );
4543        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
4544
4545        // Water usage
4546        let facility_count = esg_cfg.environmental.energy.facility_count;
4547        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
4548        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
4549
4550        // Waste
4551        let mut waste_gen = datasynth_generators::WasteGenerator::new(
4552            seed + 82,
4553            esg_cfg.environmental.waste.diversion_target,
4554            facility_count,
4555        );
4556        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
4557
4558        // Emissions (scope 1, 2, 3)
4559        let mut emission_gen =
4560            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
4561
4562        // Build EnergyInput from energy_records
4563        let energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
4564            .iter()
4565            .map(|e| datasynth_generators::EnergyInput {
4566                facility_id: e.facility_id.clone(),
4567                energy_type: match e.energy_source {
4568                    EnergySourceType::NaturalGas => {
4569                        datasynth_generators::EnergyInputType::NaturalGas
4570                    }
4571                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
4572                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
4573                    _ => datasynth_generators::EnergyInputType::Electricity,
4574                },
4575                consumption_kwh: e.consumption_kwh,
4576                period: e.period,
4577            })
4578            .collect();
4579
4580        let mut emissions = Vec::new();
4581        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
4582        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
4583
4584        // Scope 3: use vendor spend data from actual payments
4585        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
4586            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4587            for payment in &document_flows.payments {
4588                if payment.is_vendor {
4589                    *totals
4590                        .entry(payment.business_partner_id.clone())
4591                        .or_default() += payment.amount;
4592                }
4593            }
4594            totals
4595        };
4596        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
4597            .master_data
4598            .vendors
4599            .iter()
4600            .map(|v| {
4601                let spend = vendor_payment_totals
4602                    .get(&v.vendor_id)
4603                    .copied()
4604                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
4605                datasynth_generators::VendorSpendInput {
4606                    vendor_id: v.vendor_id.clone(),
4607                    category: format!("{:?}", v.vendor_type).to_lowercase(),
4608                    spend,
4609                    country: v.country.clone(),
4610                }
4611            })
4612            .collect();
4613        if !vendor_spend.is_empty() {
4614            emissions.extend(emission_gen.generate_scope3_purchased_goods(
4615                entity_id,
4616                &vendor_spend,
4617                start_date,
4618                end_date,
4619            ));
4620        }
4621
4622        // Business travel & commuting (scope 3)
4623        let headcount = self.master_data.employees.len() as u32;
4624        if headcount > 0 {
4625            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
4626            emissions.extend(emission_gen.generate_scope3_business_travel(
4627                entity_id,
4628                travel_spend,
4629                start_date,
4630            ));
4631            emissions
4632                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
4633        }
4634
4635        snapshot.emission_count = emissions.len();
4636        snapshot.emissions = emissions;
4637        snapshot.energy = energy_records;
4638
4639        // Social: Workforce diversity, pay equity, safety
4640        let mut workforce_gen =
4641            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
4642        let total_headcount = headcount.max(100);
4643        snapshot.diversity =
4644            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
4645        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
4646        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
4647            entity_id,
4648            facility_count,
4649            start_date,
4650            end_date,
4651        );
4652
4653        // Compute safety metrics
4654        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
4655        let safety_metric = workforce_gen.compute_safety_metrics(
4656            entity_id,
4657            &snapshot.safety_incidents,
4658            total_hours,
4659            start_date,
4660        );
4661        snapshot.safety_metrics = vec![safety_metric];
4662
4663        // Governance
4664        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
4665            seed + 85,
4666            esg_cfg.governance.board_size,
4667            esg_cfg.governance.independence_target,
4668        );
4669        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
4670
4671        // Supplier ESG assessments
4672        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
4673            esg_cfg.supply_chain_esg.clone(),
4674            seed + 86,
4675        );
4676        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
4677            .master_data
4678            .vendors
4679            .iter()
4680            .map(|v| datasynth_generators::VendorInput {
4681                vendor_id: v.vendor_id.clone(),
4682                country: v.country.clone(),
4683                industry: format!("{:?}", v.vendor_type).to_lowercase(),
4684                quality_score: None,
4685            })
4686            .collect();
4687        snapshot.supplier_assessments =
4688            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
4689
4690        // Disclosures
4691        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
4692            seed + 87,
4693            esg_cfg.reporting.clone(),
4694            esg_cfg.climate_scenarios.clone(),
4695        );
4696        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
4697        snapshot.disclosures = disclosure_gen.generate_disclosures(
4698            entity_id,
4699            &snapshot.materiality,
4700            start_date,
4701            end_date,
4702        );
4703        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
4704        snapshot.disclosure_count = snapshot.disclosures.len();
4705
4706        // Anomaly injection
4707        if esg_cfg.anomaly_rate > 0.0 {
4708            let mut anomaly_injector =
4709                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
4710            let mut labels = Vec::new();
4711            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
4712            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
4713            labels.extend(
4714                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
4715            );
4716            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
4717            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
4718            snapshot.anomaly_labels = labels;
4719        }
4720
4721        stats.esg_emission_count = snapshot.emission_count;
4722        stats.esg_disclosure_count = snapshot.disclosure_count;
4723
4724        info!(
4725            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
4726            snapshot.emission_count,
4727            snapshot.disclosure_count,
4728            snapshot.supplier_assessments.len()
4729        );
4730        self.check_resources_with_log("post-esg")?;
4731
4732        Ok(snapshot)
4733    }
4734
4735    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
4736    fn phase_treasury_data(
4737        &mut self,
4738        document_flows: &DocumentFlowSnapshot,
4739        subledger: &SubledgerSnapshot,
4740        intercompany: &IntercompanySnapshot,
4741        stats: &mut EnhancedGenerationStatistics,
4742    ) -> SynthResult<TreasurySnapshot> {
4743        if !self.config.treasury.enabled {
4744            debug!("Phase 22: Skipped (treasury generation disabled)");
4745            return Ok(TreasurySnapshot::default());
4746        }
4747        info!("Phase 22: Generating Treasury Data");
4748
4749        let seed = self.seed;
4750        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4751            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4752        let currency = self
4753            .config
4754            .companies
4755            .first()
4756            .map(|c| c.currency.as_str())
4757            .unwrap_or("USD");
4758        let entity_id = self
4759            .config
4760            .companies
4761            .first()
4762            .map(|c| c.code.as_str())
4763            .unwrap_or("1000");
4764
4765        let mut snapshot = TreasurySnapshot::default();
4766
4767        // Generate debt instruments
4768        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
4769            self.config.treasury.debt.clone(),
4770            seed + 90,
4771        );
4772        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
4773
4774        // Generate hedging instruments (IR swaps for floating-rate debt)
4775        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
4776            self.config.treasury.hedging.clone(),
4777            seed + 91,
4778        );
4779        for debt in &snapshot.debt_instruments {
4780            if debt.rate_type == InterestRateType::Variable {
4781                let swap = hedge_gen.generate_ir_swap(
4782                    currency,
4783                    debt.principal,
4784                    debt.origination_date,
4785                    debt.maturity_date,
4786                );
4787                snapshot.hedging_instruments.push(swap);
4788            }
4789        }
4790
4791        // Build FX exposures from foreign-currency payments and generate
4792        // FX forwards + hedge relationship designations via generate() API.
4793        {
4794            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
4795            for payment in &document_flows.payments {
4796                if payment.currency != currency {
4797                    let entry = fx_map
4798                        .entry(payment.currency.clone())
4799                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
4800                    entry.0 += payment.amount;
4801                    // Use the latest settlement date among grouped payments
4802                    if payment.header.document_date > entry.1 {
4803                        entry.1 = payment.header.document_date;
4804                    }
4805                }
4806            }
4807            if !fx_map.is_empty() {
4808                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
4809                    .into_iter()
4810                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
4811                        datasynth_generators::treasury::FxExposure {
4812                            currency_pair: format!("{foreign_ccy}/{currency}"),
4813                            foreign_currency: foreign_ccy,
4814                            net_amount,
4815                            settlement_date,
4816                            description: "AP payment FX exposure".to_string(),
4817                        }
4818                    })
4819                    .collect();
4820                let (fx_instruments, fx_relationships) =
4821                    hedge_gen.generate(start_date, &fx_exposures);
4822                snapshot.hedging_instruments.extend(fx_instruments);
4823                snapshot.hedge_relationships.extend(fx_relationships);
4824            }
4825        }
4826
4827        // Inject anomalies if configured
4828        if self.config.treasury.anomaly_rate > 0.0 {
4829            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
4830                seed + 92,
4831                self.config.treasury.anomaly_rate,
4832            );
4833            let mut labels = Vec::new();
4834            labels.extend(
4835                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
4836            );
4837            snapshot.treasury_anomaly_labels = labels;
4838        }
4839
4840        // Generate cash positions from payment flows
4841        if self.config.treasury.cash_positioning.enabled {
4842            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
4843
4844            // AP payments as outflows
4845            for payment in &document_flows.payments {
4846                cash_flows.push(datasynth_generators::treasury::CashFlow {
4847                    date: payment.header.document_date,
4848                    account_id: format!("{entity_id}-MAIN"),
4849                    amount: payment.amount,
4850                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
4851                });
4852            }
4853
4854            // Customer receipts (from O2C chains) as inflows
4855            for chain in &document_flows.o2c_chains {
4856                if let Some(ref receipt) = chain.customer_receipt {
4857                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4858                        date: receipt.header.document_date,
4859                        account_id: format!("{entity_id}-MAIN"),
4860                        amount: receipt.amount,
4861                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4862                    });
4863                }
4864                // Remainder receipts (follow-up to partial payments)
4865                for receipt in &chain.remainder_receipts {
4866                    cash_flows.push(datasynth_generators::treasury::CashFlow {
4867                        date: receipt.header.document_date,
4868                        account_id: format!("{entity_id}-MAIN"),
4869                        amount: receipt.amount,
4870                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
4871                    });
4872                }
4873            }
4874
4875            if !cash_flows.is_empty() {
4876                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
4877                    self.config.treasury.cash_positioning.clone(),
4878                    seed + 93,
4879                );
4880                let account_id = format!("{entity_id}-MAIN");
4881                snapshot.cash_positions = cash_gen.generate(
4882                    entity_id,
4883                    &account_id,
4884                    currency,
4885                    &cash_flows,
4886                    start_date,
4887                    start_date + chrono::Months::new(self.config.global.period_months),
4888                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
4889                );
4890            }
4891        }
4892
4893        // Generate cash forecasts from AR/AP aging
4894        if self.config.treasury.cash_forecasting.enabled {
4895            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4896
4897            // Build AR aging items from subledger AR invoices
4898            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
4899                .ar_invoices
4900                .iter()
4901                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4902                .map(|inv| {
4903                    let days_past_due = if inv.due_date < end_date {
4904                        (end_date - inv.due_date).num_days().max(0) as u32
4905                    } else {
4906                        0
4907                    };
4908                    datasynth_generators::treasury::ArAgingItem {
4909                        expected_date: inv.due_date,
4910                        amount: inv.amount_remaining,
4911                        days_past_due,
4912                        document_id: inv.invoice_number.clone(),
4913                    }
4914                })
4915                .collect();
4916
4917            // Build AP aging items from subledger AP invoices
4918            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
4919                .ap_invoices
4920                .iter()
4921                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
4922                .map(|inv| datasynth_generators::treasury::ApAgingItem {
4923                    payment_date: inv.due_date,
4924                    amount: inv.amount_remaining,
4925                    document_id: inv.invoice_number.clone(),
4926                })
4927                .collect();
4928
4929            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
4930                self.config.treasury.cash_forecasting.clone(),
4931                seed + 94,
4932            );
4933            let forecast = forecast_gen.generate(
4934                entity_id,
4935                currency,
4936                end_date,
4937                &ar_items,
4938                &ap_items,
4939                &[], // scheduled disbursements - empty for now
4940            );
4941            snapshot.cash_forecasts.push(forecast);
4942        }
4943
4944        // Generate cash pools and sweeps
4945        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
4946            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4947            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
4948                self.config.treasury.cash_pooling.clone(),
4949                seed + 95,
4950            );
4951
4952            // Create a pool from available accounts
4953            let account_ids: Vec<String> = snapshot
4954                .cash_positions
4955                .iter()
4956                .map(|cp| cp.bank_account_id.clone())
4957                .collect::<std::collections::HashSet<_>>()
4958                .into_iter()
4959                .collect();
4960
4961            if let Some(pool) =
4962                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
4963            {
4964                // Generate sweeps - build participant balances from last cash position per account
4965                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
4966                for cp in &snapshot.cash_positions {
4967                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
4968                }
4969
4970                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
4971                    latest_balances
4972                        .into_iter()
4973                        .filter(|(id, _)| pool.participant_accounts.contains(id))
4974                        .map(
4975                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
4976                                account_id: id,
4977                                balance,
4978                            },
4979                        )
4980                        .collect();
4981
4982                let sweeps =
4983                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
4984                snapshot.cash_pool_sweeps = sweeps;
4985                snapshot.cash_pools.push(pool);
4986            }
4987        }
4988
4989        // Generate bank guarantees
4990        if self.config.treasury.bank_guarantees.enabled {
4991            let vendor_names: Vec<String> = self
4992                .master_data
4993                .vendors
4994                .iter()
4995                .map(|v| v.name.clone())
4996                .collect();
4997            if !vendor_names.is_empty() {
4998                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
4999                    self.config.treasury.bank_guarantees.clone(),
5000                    seed + 96,
5001                );
5002                snapshot.bank_guarantees =
5003                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
5004            }
5005        }
5006
5007        // Generate netting runs from intercompany matched pairs
5008        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
5009            let entity_ids: Vec<String> = self
5010                .config
5011                .companies
5012                .iter()
5013                .map(|c| c.code.clone())
5014                .collect();
5015            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
5016                .matched_pairs
5017                .iter()
5018                .map(|mp| {
5019                    (
5020                        mp.seller_company.clone(),
5021                        mp.buyer_company.clone(),
5022                        mp.amount,
5023                    )
5024                })
5025                .collect();
5026            if entity_ids.len() >= 2 {
5027                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
5028                    self.config.treasury.netting.clone(),
5029                    seed + 97,
5030                );
5031                snapshot.netting_runs = netting_gen.generate(
5032                    &entity_ids,
5033                    currency,
5034                    start_date,
5035                    self.config.global.period_months,
5036                    &ic_amounts,
5037                );
5038            }
5039        }
5040
5041        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
5042        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
5043        stats.cash_position_count = snapshot.cash_positions.len();
5044        stats.cash_forecast_count = snapshot.cash_forecasts.len();
5045        stats.cash_pool_count = snapshot.cash_pools.len();
5046
5047        info!(
5048            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs",
5049            snapshot.debt_instruments.len(),
5050            snapshot.hedging_instruments.len(),
5051            snapshot.cash_positions.len(),
5052            snapshot.cash_forecasts.len(),
5053            snapshot.cash_pools.len(),
5054            snapshot.bank_guarantees.len(),
5055            snapshot.netting_runs.len(),
5056        );
5057        self.check_resources_with_log("post-treasury")?;
5058
5059        Ok(snapshot)
5060    }
5061
5062    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
5063    fn phase_project_accounting(
5064        &mut self,
5065        document_flows: &DocumentFlowSnapshot,
5066        hr: &HrSnapshot,
5067        stats: &mut EnhancedGenerationStatistics,
5068    ) -> SynthResult<ProjectAccountingSnapshot> {
5069        if !self.config.project_accounting.enabled {
5070            debug!("Phase 23: Skipped (project accounting disabled)");
5071            return Ok(ProjectAccountingSnapshot::default());
5072        }
5073        info!("Phase 23: Generating Project Accounting Data");
5074
5075        let seed = self.seed;
5076        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5077            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5078        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5079        let company_code = self
5080            .config
5081            .companies
5082            .first()
5083            .map(|c| c.code.as_str())
5084            .unwrap_or("1000");
5085
5086        let mut snapshot = ProjectAccountingSnapshot::default();
5087
5088        // Generate projects with WBS hierarchies
5089        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
5090            self.config.project_accounting.clone(),
5091            seed + 95,
5092        );
5093        let pool = project_gen.generate(company_code, start_date, end_date);
5094        snapshot.projects = pool.projects.clone();
5095
5096        // Link source documents to projects for cost allocation
5097        {
5098            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
5099                Vec::new();
5100
5101            // Time entries
5102            for te in &hr.time_entries {
5103                let total_hours = te.hours_regular + te.hours_overtime;
5104                if total_hours > 0.0 {
5105                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5106                        id: te.entry_id.clone(),
5107                        entity_id: company_code.to_string(),
5108                        date: te.date,
5109                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
5110                            .unwrap_or(rust_decimal::Decimal::ZERO),
5111                        source_type: CostSourceType::TimeEntry,
5112                        hours: Some(
5113                            rust_decimal::Decimal::from_f64_retain(total_hours)
5114                                .unwrap_or(rust_decimal::Decimal::ZERO),
5115                        ),
5116                    });
5117                }
5118            }
5119
5120            // Expense reports
5121            for er in &hr.expense_reports {
5122                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5123                    id: er.report_id.clone(),
5124                    entity_id: company_code.to_string(),
5125                    date: er.submission_date,
5126                    amount: er.total_amount,
5127                    source_type: CostSourceType::ExpenseReport,
5128                    hours: None,
5129                });
5130            }
5131
5132            // Purchase orders
5133            for po in &document_flows.purchase_orders {
5134                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5135                    id: po.header.document_id.clone(),
5136                    entity_id: company_code.to_string(),
5137                    date: po.header.document_date,
5138                    amount: po.total_net_amount,
5139                    source_type: CostSourceType::PurchaseOrder,
5140                    hours: None,
5141                });
5142            }
5143
5144            // Vendor invoices
5145            for vi in &document_flows.vendor_invoices {
5146                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
5147                    id: vi.header.document_id.clone(),
5148                    entity_id: company_code.to_string(),
5149                    date: vi.header.document_date,
5150                    amount: vi.payable_amount,
5151                    source_type: CostSourceType::VendorInvoice,
5152                    hours: None,
5153                });
5154            }
5155
5156            if !source_docs.is_empty() && !pool.projects.is_empty() {
5157                let mut cost_gen =
5158                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
5159                        self.config.project_accounting.cost_allocation.clone(),
5160                        seed + 99,
5161                    );
5162                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
5163            }
5164        }
5165
5166        // Generate change orders
5167        if self.config.project_accounting.change_orders.enabled {
5168            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
5169                self.config.project_accounting.change_orders.clone(),
5170                seed + 96,
5171            );
5172            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
5173        }
5174
5175        // Generate milestones
5176        if self.config.project_accounting.milestones.enabled {
5177            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
5178                self.config.project_accounting.milestones.clone(),
5179                seed + 97,
5180            );
5181            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
5182        }
5183
5184        // Generate earned value metrics (needs cost lines, so only if we have projects)
5185        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
5186            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
5187                self.config.project_accounting.earned_value.clone(),
5188                seed + 98,
5189            );
5190            snapshot.earned_value_metrics =
5191                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
5192        }
5193
5194        stats.project_count = snapshot.projects.len();
5195        stats.project_change_order_count = snapshot.change_orders.len();
5196        stats.project_cost_line_count = snapshot.cost_lines.len();
5197
5198        info!(
5199            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
5200            snapshot.projects.len(),
5201            snapshot.change_orders.len(),
5202            snapshot.milestones.len(),
5203            snapshot.earned_value_metrics.len()
5204        );
5205        self.check_resources_with_log("post-project-accounting")?;
5206
5207        Ok(snapshot)
5208    }
5209
5210    /// Phase 24: Generate process evolution and organizational events.
5211    fn phase_evolution_events(
5212        &mut self,
5213        stats: &mut EnhancedGenerationStatistics,
5214    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
5215        if !self.phase_config.generate_evolution_events {
5216            debug!("Phase 24: Skipped (evolution events disabled)");
5217            return Ok((Vec::new(), Vec::new()));
5218        }
5219        info!("Phase 24: Generating Process Evolution + Organizational Events");
5220
5221        let seed = self.seed;
5222        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5223            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5224        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5225
5226        // Process evolution events
5227        let mut proc_gen =
5228            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
5229                seed + 100,
5230            );
5231        let process_events = proc_gen.generate_events(start_date, end_date);
5232
5233        // Organizational events
5234        let company_codes: Vec<String> = self
5235            .config
5236            .companies
5237            .iter()
5238            .map(|c| c.code.clone())
5239            .collect();
5240        let mut org_gen =
5241            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
5242                seed + 101,
5243            );
5244        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
5245
5246        stats.process_evolution_event_count = process_events.len();
5247        stats.organizational_event_count = org_events.len();
5248
5249        info!(
5250            "Evolution events generated: {} process evolution, {} organizational",
5251            process_events.len(),
5252            org_events.len()
5253        );
5254        self.check_resources_with_log("post-evolution-events")?;
5255
5256        Ok((process_events, org_events))
5257    }
5258
5259    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
5260    /// data recovery, and regulatory changes).
5261    fn phase_disruption_events(
5262        &self,
5263        stats: &mut EnhancedGenerationStatistics,
5264    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
5265        if !self.config.organizational_events.enabled {
5266            debug!("Phase 24b: Skipped (organizational events disabled)");
5267            return Ok(Vec::new());
5268        }
5269        info!("Phase 24b: Generating Disruption Events");
5270
5271        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5272            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5273        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5274
5275        let company_codes: Vec<String> = self
5276            .config
5277            .companies
5278            .iter()
5279            .map(|c| c.code.clone())
5280            .collect();
5281
5282        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
5283        let events = gen.generate(start_date, end_date, &company_codes);
5284
5285        stats.disruption_event_count = events.len();
5286        info!("Disruption events generated: {} events", events.len());
5287        self.check_resources_with_log("post-disruption-events")?;
5288
5289        Ok(events)
5290    }
5291
5292    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
5293    ///
5294    /// Produces paired examples where each pair contains the original clean JE
5295    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
5296    /// split transaction). Useful for training anomaly detection models with
5297    /// known ground truth.
5298    fn phase_counterfactuals(
5299        &self,
5300        journal_entries: &[JournalEntry],
5301        stats: &mut EnhancedGenerationStatistics,
5302    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
5303        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
5304            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
5305            return Ok(Vec::new());
5306        }
5307        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
5308
5309        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
5310
5311        let mut gen = CounterfactualGenerator::new(self.seed + 110);
5312
5313        // Rotating set of specs to produce diverse mutation types
5314        let specs = [
5315            CounterfactualSpec::ScaleAmount { factor: 2.5 },
5316            CounterfactualSpec::ShiftDate { days: -14 },
5317            CounterfactualSpec::SelfApprove,
5318            CounterfactualSpec::SplitTransaction { split_count: 3 },
5319        ];
5320
5321        let pairs: Vec<_> = journal_entries
5322            .iter()
5323            .enumerate()
5324            .map(|(i, je)| {
5325                let spec = &specs[i % specs.len()];
5326                gen.generate(je, spec)
5327            })
5328            .collect();
5329
5330        stats.counterfactual_pair_count = pairs.len();
5331        info!(
5332            "Counterfactual pairs generated: {} pairs from {} journal entries",
5333            pairs.len(),
5334            journal_entries.len()
5335        );
5336        self.check_resources_with_log("post-counterfactuals")?;
5337
5338        Ok(pairs)
5339    }
5340
5341    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
5342    ///
5343    /// Uses the anomaly labels (from Phase 8) to determine which documents are
5344    /// fraudulent, then generates probabilistic red flags on all chain documents.
5345    /// Non-fraud documents also receive red flags at a lower rate (false positives)
5346    /// to produce realistic ML training data.
5347    fn phase_red_flags(
5348        &self,
5349        anomaly_labels: &AnomalyLabels,
5350        document_flows: &DocumentFlowSnapshot,
5351        stats: &mut EnhancedGenerationStatistics,
5352    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
5353        if !self.config.fraud.enabled {
5354            debug!("Phase 26: Skipped (fraud generation disabled)");
5355            return Ok(Vec::new());
5356        }
5357        info!("Phase 26: Generating Fraud Red-Flag Indicators");
5358
5359        use datasynth_generators::fraud::RedFlagGenerator;
5360
5361        let generator = RedFlagGenerator::new();
5362        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
5363
5364        // Build a set of document IDs that are known-fraudulent from anomaly labels.
5365        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
5366            .labels
5367            .iter()
5368            .filter(|label| label.anomaly_type.is_intentional())
5369            .map(|label| label.document_id.as_str())
5370            .collect();
5371
5372        let mut flags = Vec::new();
5373
5374        // Iterate P2P chains: use the purchase order document ID as the chain key.
5375        for chain in &document_flows.p2p_chains {
5376            let doc_id = &chain.purchase_order.header.document_id;
5377            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5378            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5379        }
5380
5381        // Iterate O2C chains: use the sales order document ID as the chain key.
5382        for chain in &document_flows.o2c_chains {
5383            let doc_id = &chain.sales_order.header.document_id;
5384            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
5385            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
5386        }
5387
5388        stats.red_flag_count = flags.len();
5389        info!(
5390            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
5391            flags.len(),
5392            document_flows.p2p_chains.len(),
5393            document_flows.o2c_chains.len(),
5394            fraud_doc_ids.len()
5395        );
5396        self.check_resources_with_log("post-red-flags")?;
5397
5398        Ok(flags)
5399    }
5400
5401    /// Phase 26b: Generate collusion rings from employee/vendor pools.
5402    ///
5403    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
5404    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
5405    /// advance them over the simulation period.
5406    fn phase_collusion_rings(
5407        &mut self,
5408        stats: &mut EnhancedGenerationStatistics,
5409    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
5410        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
5411            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
5412            return Ok(Vec::new());
5413        }
5414        info!("Phase 26b: Generating Collusion Rings");
5415
5416        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5417            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5418        let months = self.config.global.period_months;
5419
5420        let employee_ids: Vec<String> = self
5421            .master_data
5422            .employees
5423            .iter()
5424            .map(|e| e.employee_id.clone())
5425            .collect();
5426        let vendor_ids: Vec<String> = self
5427            .master_data
5428            .vendors
5429            .iter()
5430            .map(|v| v.vendor_id.clone())
5431            .collect();
5432
5433        let mut generator =
5434            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
5435        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
5436
5437        stats.collusion_ring_count = rings.len();
5438        info!(
5439            "Collusion rings generated: {} rings, total members: {}",
5440            rings.len(),
5441            rings
5442                .iter()
5443                .map(datasynth_generators::fraud::CollusionRing::size)
5444                .sum::<usize>()
5445        );
5446        self.check_resources_with_log("post-collusion-rings")?;
5447
5448        Ok(rings)
5449    }
5450
5451    /// Phase 27: Generate bi-temporal version chains for vendor entities.
5452    ///
5453    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
5454    /// master data changes over time, supporting bi-temporal audit queries.
5455    fn phase_temporal_attributes(
5456        &mut self,
5457        stats: &mut EnhancedGenerationStatistics,
5458    ) -> SynthResult<
5459        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
5460    > {
5461        if !self.config.temporal_attributes.enabled {
5462            debug!("Phase 27: Skipped (temporal attributes disabled)");
5463            return Ok(Vec::new());
5464        }
5465        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
5466
5467        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5468            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5469
5470        // Build a TemporalAttributeConfig from the user's config.
5471        // Since Phase 27 is already gated on temporal_attributes.enabled,
5472        // default to enabling version chains so users get actual mutations.
5473        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
5474            || self.config.temporal_attributes.enabled;
5475        let temporal_config = {
5476            let ta = &self.config.temporal_attributes;
5477            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
5478                .enabled(ta.enabled)
5479                .closed_probability(ta.valid_time.closed_probability)
5480                .avg_validity_days(ta.valid_time.avg_validity_days)
5481                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
5482                .with_version_chains(if generate_version_chains {
5483                    ta.avg_versions_per_entity
5484                } else {
5485                    1.0
5486                })
5487                .build()
5488        };
5489        // Apply backdating settings if configured
5490        let temporal_config = if self
5491            .config
5492            .temporal_attributes
5493            .transaction_time
5494            .allow_backdating
5495        {
5496            let mut c = temporal_config;
5497            c.transaction_time.allow_backdating = true;
5498            c.transaction_time.backdating_probability = self
5499                .config
5500                .temporal_attributes
5501                .transaction_time
5502                .backdating_probability;
5503            c.transaction_time.max_backdate_days = self
5504                .config
5505                .temporal_attributes
5506                .transaction_time
5507                .max_backdate_days;
5508            c
5509        } else {
5510            temporal_config
5511        };
5512        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
5513            temporal_config,
5514            self.seed + 130,
5515            start_date,
5516        );
5517
5518        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
5519            self.seed + 130,
5520            datasynth_core::GeneratorType::Vendor,
5521        );
5522
5523        let chains: Vec<_> = self
5524            .master_data
5525            .vendors
5526            .iter()
5527            .map(|vendor| {
5528                let id = uuid_factory.next();
5529                gen.generate_version_chain(vendor.clone(), id)
5530            })
5531            .collect();
5532
5533        stats.temporal_version_chain_count = chains.len();
5534        info!("Temporal version chains generated: {} chains", chains.len());
5535        self.check_resources_with_log("post-temporal-attributes")?;
5536
5537        Ok(chains)
5538    }
5539
5540    /// Phase 28: Build entity relationship graph and cross-process links.
5541    ///
5542    /// Part 1 (gated on `relationship_strength.enabled`): builds an
5543    /// `EntityGraph` from master-data vendor/customer entities and
5544    /// journal-entry-derived transaction summaries.
5545    ///
5546    /// Part 2 (gated on `cross_process_links.enabled`): extracts
5547    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
5548    /// generates inventory-movement cross-process links.
5549    fn phase_entity_relationships(
5550        &self,
5551        journal_entries: &[JournalEntry],
5552        document_flows: &DocumentFlowSnapshot,
5553        stats: &mut EnhancedGenerationStatistics,
5554    ) -> SynthResult<(
5555        Option<datasynth_core::models::EntityGraph>,
5556        Vec<datasynth_core::models::CrossProcessLink>,
5557    )> {
5558        use datasynth_generators::relationships::{
5559            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
5560            TransactionSummary,
5561        };
5562
5563        let rs_enabled = self.config.relationship_strength.enabled;
5564        let cpl_enabled = self.config.cross_process_links.enabled
5565            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
5566
5567        if !rs_enabled && !cpl_enabled {
5568            debug!(
5569                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
5570            );
5571            return Ok((None, Vec::new()));
5572        }
5573
5574        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
5575
5576        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5577            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5578
5579        let company_code = self
5580            .config
5581            .companies
5582            .first()
5583            .map(|c| c.code.as_str())
5584            .unwrap_or("1000");
5585
5586        // Build the generator with matching config flags
5587        let gen_config = EntityGraphConfig {
5588            enabled: rs_enabled,
5589            cross_process: datasynth_generators::relationships::CrossProcessConfig {
5590                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
5591                enable_return_flows: false,
5592                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
5593                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
5594                // Use higher link rate for small datasets to avoid probabilistic empty results
5595                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
5596                    1.0
5597                } else {
5598                    0.30
5599                },
5600                ..Default::default()
5601            },
5602            strength_config: datasynth_generators::relationships::StrengthConfig {
5603                transaction_volume_weight: self
5604                    .config
5605                    .relationship_strength
5606                    .calculation
5607                    .transaction_volume_weight,
5608                transaction_count_weight: self
5609                    .config
5610                    .relationship_strength
5611                    .calculation
5612                    .transaction_count_weight,
5613                duration_weight: self
5614                    .config
5615                    .relationship_strength
5616                    .calculation
5617                    .relationship_duration_weight,
5618                recency_weight: self.config.relationship_strength.calculation.recency_weight,
5619                mutual_connections_weight: self
5620                    .config
5621                    .relationship_strength
5622                    .calculation
5623                    .mutual_connections_weight,
5624                recency_half_life_days: self
5625                    .config
5626                    .relationship_strength
5627                    .calculation
5628                    .recency_half_life_days,
5629            },
5630            ..Default::default()
5631        };
5632
5633        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
5634
5635        // --- Part 1: Entity Relationship Graph ---
5636        let entity_graph = if rs_enabled {
5637            // Build EntitySummary lists from master data
5638            let vendor_summaries: Vec<EntitySummary> = self
5639                .master_data
5640                .vendors
5641                .iter()
5642                .map(|v| {
5643                    EntitySummary::new(
5644                        &v.vendor_id,
5645                        &v.name,
5646                        datasynth_core::models::GraphEntityType::Vendor,
5647                        start_date,
5648                    )
5649                })
5650                .collect();
5651
5652            let customer_summaries: Vec<EntitySummary> = self
5653                .master_data
5654                .customers
5655                .iter()
5656                .map(|c| {
5657                    EntitySummary::new(
5658                        &c.customer_id,
5659                        &c.name,
5660                        datasynth_core::models::GraphEntityType::Customer,
5661                        start_date,
5662                    )
5663                })
5664                .collect();
5665
5666            // Build transaction summaries from journal entries.
5667            // Key = (company_code, trading_partner) for entries that have a
5668            // trading partner.  This captures intercompany flows and any JE
5669            // whose line items carry a trading_partner reference.
5670            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
5671                std::collections::HashMap::new();
5672
5673            for je in journal_entries {
5674                let cc = je.header.company_code.clone();
5675                let posting_date = je.header.posting_date;
5676                for line in &je.lines {
5677                    if let Some(ref tp) = line.trading_partner {
5678                        let amount = if line.debit_amount > line.credit_amount {
5679                            line.debit_amount
5680                        } else {
5681                            line.credit_amount
5682                        };
5683                        let entry = txn_summaries
5684                            .entry((cc.clone(), tp.clone()))
5685                            .or_insert_with(|| TransactionSummary {
5686                                total_volume: rust_decimal::Decimal::ZERO,
5687                                transaction_count: 0,
5688                                first_transaction_date: posting_date,
5689                                last_transaction_date: posting_date,
5690                                related_entities: std::collections::HashSet::new(),
5691                            });
5692                        entry.total_volume += amount;
5693                        entry.transaction_count += 1;
5694                        if posting_date < entry.first_transaction_date {
5695                            entry.first_transaction_date = posting_date;
5696                        }
5697                        if posting_date > entry.last_transaction_date {
5698                            entry.last_transaction_date = posting_date;
5699                        }
5700                        entry.related_entities.insert(cc.clone());
5701                    }
5702                }
5703            }
5704
5705            // Also extract transaction relationships from document flow chains.
5706            // P2P chains: Company → Vendor relationships
5707            for chain in &document_flows.p2p_chains {
5708                let cc = chain.purchase_order.header.company_code.clone();
5709                let vendor_id = chain.purchase_order.vendor_id.clone();
5710                let po_date = chain.purchase_order.header.document_date;
5711                let amount = chain.purchase_order.total_net_amount;
5712
5713                let entry = txn_summaries
5714                    .entry((cc.clone(), vendor_id))
5715                    .or_insert_with(|| TransactionSummary {
5716                        total_volume: rust_decimal::Decimal::ZERO,
5717                        transaction_count: 0,
5718                        first_transaction_date: po_date,
5719                        last_transaction_date: po_date,
5720                        related_entities: std::collections::HashSet::new(),
5721                    });
5722                entry.total_volume += amount;
5723                entry.transaction_count += 1;
5724                if po_date < entry.first_transaction_date {
5725                    entry.first_transaction_date = po_date;
5726                }
5727                if po_date > entry.last_transaction_date {
5728                    entry.last_transaction_date = po_date;
5729                }
5730                entry.related_entities.insert(cc);
5731            }
5732
5733            // O2C chains: Company → Customer relationships
5734            for chain in &document_flows.o2c_chains {
5735                let cc = chain.sales_order.header.company_code.clone();
5736                let customer_id = chain.sales_order.customer_id.clone();
5737                let so_date = chain.sales_order.header.document_date;
5738                let amount = chain.sales_order.total_net_amount;
5739
5740                let entry = txn_summaries
5741                    .entry((cc.clone(), customer_id))
5742                    .or_insert_with(|| TransactionSummary {
5743                        total_volume: rust_decimal::Decimal::ZERO,
5744                        transaction_count: 0,
5745                        first_transaction_date: so_date,
5746                        last_transaction_date: so_date,
5747                        related_entities: std::collections::HashSet::new(),
5748                    });
5749                entry.total_volume += amount;
5750                entry.transaction_count += 1;
5751                if so_date < entry.first_transaction_date {
5752                    entry.first_transaction_date = so_date;
5753                }
5754                if so_date > entry.last_transaction_date {
5755                    entry.last_transaction_date = so_date;
5756                }
5757                entry.related_entities.insert(cc);
5758            }
5759
5760            let as_of_date = journal_entries
5761                .last()
5762                .map(|je| je.header.posting_date)
5763                .unwrap_or(start_date);
5764
5765            let graph = gen.generate_entity_graph(
5766                company_code,
5767                as_of_date,
5768                &vendor_summaries,
5769                &customer_summaries,
5770                &txn_summaries,
5771            );
5772
5773            info!(
5774                "Entity relationship graph: {} nodes, {} edges",
5775                graph.nodes.len(),
5776                graph.edges.len()
5777            );
5778            stats.entity_relationship_node_count = graph.nodes.len();
5779            stats.entity_relationship_edge_count = graph.edges.len();
5780            Some(graph)
5781        } else {
5782            None
5783        };
5784
5785        // --- Part 2: Cross-Process Links ---
5786        let cross_process_links = if cpl_enabled {
5787            // Build GoodsReceiptRef from P2P chains
5788            let gr_refs: Vec<GoodsReceiptRef> = document_flows
5789                .p2p_chains
5790                .iter()
5791                .flat_map(|chain| {
5792                    let vendor_id = chain.purchase_order.vendor_id.clone();
5793                    let cc = chain.purchase_order.header.company_code.clone();
5794                    chain.goods_receipts.iter().flat_map(move |gr| {
5795                        gr.items.iter().filter_map({
5796                            let doc_id = gr.header.document_id.clone();
5797                            let v_id = vendor_id.clone();
5798                            let company = cc.clone();
5799                            let receipt_date = gr.header.document_date;
5800                            move |item| {
5801                                item.base
5802                                    .material_id
5803                                    .as_ref()
5804                                    .map(|mat_id| GoodsReceiptRef {
5805                                        document_id: doc_id.clone(),
5806                                        material_id: mat_id.clone(),
5807                                        quantity: item.base.quantity,
5808                                        receipt_date,
5809                                        vendor_id: v_id.clone(),
5810                                        company_code: company.clone(),
5811                                    })
5812                            }
5813                        })
5814                    })
5815                })
5816                .collect();
5817
5818            // Build DeliveryRef from O2C chains
5819            let del_refs: Vec<DeliveryRef> = document_flows
5820                .o2c_chains
5821                .iter()
5822                .flat_map(|chain| {
5823                    let customer_id = chain.sales_order.customer_id.clone();
5824                    let cc = chain.sales_order.header.company_code.clone();
5825                    chain.deliveries.iter().flat_map(move |del| {
5826                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
5827                        del.items.iter().filter_map({
5828                            let doc_id = del.header.document_id.clone();
5829                            let c_id = customer_id.clone();
5830                            let company = cc.clone();
5831                            move |item| {
5832                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
5833                                    document_id: doc_id.clone(),
5834                                    material_id: mat_id.clone(),
5835                                    quantity: item.base.quantity,
5836                                    delivery_date,
5837                                    customer_id: c_id.clone(),
5838                                    company_code: company.clone(),
5839                                })
5840                            }
5841                        })
5842                    })
5843                })
5844                .collect();
5845
5846            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
5847            info!("Cross-process links generated: {} links", links.len());
5848            stats.cross_process_link_count = links.len();
5849            links
5850        } else {
5851            Vec::new()
5852        };
5853
5854        self.check_resources_with_log("post-entity-relationships")?;
5855        Ok((entity_graph, cross_process_links))
5856    }
5857
5858    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
5859    fn phase_industry_data(
5860        &self,
5861        stats: &mut EnhancedGenerationStatistics,
5862    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
5863        if !self.config.industry_specific.enabled {
5864            return None;
5865        }
5866        info!("Phase 29: Generating industry-specific data");
5867        let output = datasynth_generators::industry::factory::generate_industry_output(
5868            self.config.global.industry,
5869        );
5870        stats.industry_gl_account_count = output.gl_accounts.len();
5871        info!(
5872            "Industry data generated: {} GL accounts for {:?}",
5873            output.gl_accounts.len(),
5874            self.config.global.industry
5875        );
5876        Some(output)
5877    }
5878
5879    /// Phase 3b: Generate opening balances for each company.
5880    fn phase_opening_balances(
5881        &mut self,
5882        coa: &Arc<ChartOfAccounts>,
5883        stats: &mut EnhancedGenerationStatistics,
5884    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
5885        if !self.config.balance.generate_opening_balances {
5886            debug!("Phase 3b: Skipped (opening balance generation disabled)");
5887            return Ok(Vec::new());
5888        }
5889        info!("Phase 3b: Generating Opening Balances");
5890
5891        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5892            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5893        let fiscal_year = start_date.year();
5894
5895        let industry = match self.config.global.industry {
5896            IndustrySector::Manufacturing => IndustryType::Manufacturing,
5897            IndustrySector::Retail => IndustryType::Retail,
5898            IndustrySector::FinancialServices => IndustryType::Financial,
5899            IndustrySector::Healthcare => IndustryType::Healthcare,
5900            IndustrySector::Technology => IndustryType::Technology,
5901            _ => IndustryType::Manufacturing,
5902        };
5903
5904        let config = datasynth_generators::OpeningBalanceConfig {
5905            industry,
5906            ..Default::default()
5907        };
5908        let mut gen =
5909            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
5910
5911        let mut results = Vec::new();
5912        for company in &self.config.companies {
5913            let spec = OpeningBalanceSpec::new(
5914                company.code.clone(),
5915                start_date,
5916                fiscal_year,
5917                company.currency.clone(),
5918                rust_decimal::Decimal::new(10_000_000, 0),
5919                industry,
5920            );
5921            let ob = gen.generate(&spec, coa, start_date, &company.code);
5922            results.push(ob);
5923        }
5924
5925        stats.opening_balance_count = results.len();
5926        info!("Opening balances generated: {} companies", results.len());
5927        self.check_resources_with_log("post-opening-balances")?;
5928
5929        Ok(results)
5930    }
5931
5932    /// Phase 9b: Reconcile GL control accounts to subledger balances.
5933    fn phase_subledger_reconciliation(
5934        &mut self,
5935        subledger: &SubledgerSnapshot,
5936        entries: &[JournalEntry],
5937        stats: &mut EnhancedGenerationStatistics,
5938    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
5939        if !self.config.balance.reconcile_subledgers {
5940            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
5941            return Ok(Vec::new());
5942        }
5943        info!("Phase 9b: Reconciling GL to subledger balances");
5944
5945        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5946            .map(|d| d + chrono::Months::new(self.config.global.period_months))
5947            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5948
5949        // Build GL balance map from journal entries using a balance tracker
5950        let tracker_config = BalanceTrackerConfig {
5951            validate_on_each_entry: false,
5952            track_history: false,
5953            fail_on_validation_error: false,
5954            ..Default::default()
5955        };
5956        let recon_currency = self
5957            .config
5958            .companies
5959            .first()
5960            .map(|c| c.currency.clone())
5961            .unwrap_or_else(|| "USD".to_string());
5962        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
5963        let validation_errors = tracker.apply_entries(entries);
5964        if !validation_errors.is_empty() {
5965            warn!(
5966                error_count = validation_errors.len(),
5967                "Balance tracker encountered validation errors during subledger reconciliation"
5968            );
5969            for err in &validation_errors {
5970                debug!("Balance validation error: {:?}", err);
5971            }
5972        }
5973
5974        let mut engine = datasynth_generators::ReconciliationEngine::new(
5975            datasynth_generators::ReconciliationConfig::default(),
5976        );
5977
5978        let mut results = Vec::new();
5979        let company_code = self
5980            .config
5981            .companies
5982            .first()
5983            .map(|c| c.code.as_str())
5984            .unwrap_or("1000");
5985
5986        // Reconcile AR
5987        if !subledger.ar_invoices.is_empty() {
5988            let gl_balance = tracker
5989                .get_account_balance(
5990                    company_code,
5991                    datasynth_core::accounts::control_accounts::AR_CONTROL,
5992                )
5993                .map(|b| b.closing_balance)
5994                .unwrap_or_default();
5995            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
5996            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
5997        }
5998
5999        // Reconcile AP
6000        if !subledger.ap_invoices.is_empty() {
6001            let gl_balance = tracker
6002                .get_account_balance(
6003                    company_code,
6004                    datasynth_core::accounts::control_accounts::AP_CONTROL,
6005                )
6006                .map(|b| b.closing_balance)
6007                .unwrap_or_default();
6008            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
6009            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
6010        }
6011
6012        // Reconcile FA
6013        if !subledger.fa_records.is_empty() {
6014            let gl_asset_balance = tracker
6015                .get_account_balance(
6016                    company_code,
6017                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
6018                )
6019                .map(|b| b.closing_balance)
6020                .unwrap_or_default();
6021            let gl_accum_depr_balance = tracker
6022                .get_account_balance(
6023                    company_code,
6024                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
6025                )
6026                .map(|b| b.closing_balance)
6027                .unwrap_or_default();
6028            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
6029                subledger.fa_records.iter().collect();
6030            let (asset_recon, depr_recon) = engine.reconcile_fa(
6031                company_code,
6032                end_date,
6033                gl_asset_balance,
6034                gl_accum_depr_balance,
6035                &fa_refs,
6036            );
6037            results.push(asset_recon);
6038            results.push(depr_recon);
6039        }
6040
6041        // Reconcile Inventory
6042        if !subledger.inventory_positions.is_empty() {
6043            let gl_balance = tracker
6044                .get_account_balance(
6045                    company_code,
6046                    datasynth_core::accounts::control_accounts::INVENTORY,
6047                )
6048                .map(|b| b.closing_balance)
6049                .unwrap_or_default();
6050            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
6051                subledger.inventory_positions.iter().collect();
6052            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
6053        }
6054
6055        stats.subledger_reconciliation_count = results.len();
6056        info!(
6057            "Subledger reconciliation complete: {} reconciliations",
6058            results.len()
6059        );
6060        self.check_resources_with_log("post-subledger-reconciliation")?;
6061
6062        Ok(results)
6063    }
6064
6065    /// Generate the chart of accounts.
6066    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
6067        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
6068
6069        let coa_framework = self.resolve_coa_framework();
6070
6071        let mut gen = ChartOfAccountsGenerator::new(
6072            self.config.chart_of_accounts.complexity,
6073            self.config.global.industry,
6074            self.seed,
6075        )
6076        .with_coa_framework(coa_framework);
6077
6078        let coa = Arc::new(gen.generate());
6079        self.coa = Some(Arc::clone(&coa));
6080
6081        if let Some(pb) = pb {
6082            pb.finish_with_message("Chart of Accounts complete");
6083        }
6084
6085        Ok(coa)
6086    }
6087
6088    /// Generate master data entities.
6089    fn generate_master_data(&mut self) -> SynthResult<()> {
6090        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6091            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6092        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6093
6094        let total = self.config.companies.len() as u64 * 5; // 5 entity types
6095        let pb = self.create_progress_bar(total, "Generating Master Data");
6096
6097        // Resolve country pack once for all companies (uses primary company's country)
6098        let pack = self.primary_pack().clone();
6099
6100        // Capture config values needed inside the parallel closure
6101        let vendors_per_company = self.phase_config.vendors_per_company;
6102        let customers_per_company = self.phase_config.customers_per_company;
6103        let materials_per_company = self.phase_config.materials_per_company;
6104        let assets_per_company = self.phase_config.assets_per_company;
6105        let coa_framework = self.resolve_coa_framework();
6106
6107        // Generate all master data in parallel across companies.
6108        // Each company's data is independent, making this embarrassingly parallel.
6109        let per_company_results: Vec<_> = self
6110            .config
6111            .companies
6112            .par_iter()
6113            .enumerate()
6114            .map(|(i, company)| {
6115                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
6116                let pack = pack.clone();
6117
6118                // Generate vendors (offset counter so IDs are globally unique across companies)
6119                let mut vendor_gen = VendorGenerator::new(company_seed);
6120                vendor_gen.set_country_pack(pack.clone());
6121                vendor_gen.set_coa_framework(coa_framework);
6122                vendor_gen.set_counter_offset(i * vendors_per_company);
6123                let vendor_pool =
6124                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
6125
6126                // Generate customers (offset counter so IDs are globally unique across companies)
6127                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
6128                customer_gen.set_country_pack(pack.clone());
6129                customer_gen.set_coa_framework(coa_framework);
6130                customer_gen.set_counter_offset(i * customers_per_company);
6131                let customer_pool = customer_gen.generate_customer_pool(
6132                    customers_per_company,
6133                    &company.code,
6134                    start_date,
6135                );
6136
6137                // Generate materials (offset counter so IDs are globally unique across companies)
6138                let mut material_gen = MaterialGenerator::new(company_seed + 200);
6139                material_gen.set_country_pack(pack.clone());
6140                material_gen.set_counter_offset(i * materials_per_company);
6141                let material_pool = material_gen.generate_material_pool(
6142                    materials_per_company,
6143                    &company.code,
6144                    start_date,
6145                );
6146
6147                // Generate fixed assets
6148                let mut asset_gen = AssetGenerator::new(company_seed + 300);
6149                let asset_pool = asset_gen.generate_asset_pool(
6150                    assets_per_company,
6151                    &company.code,
6152                    (start_date, end_date),
6153                );
6154
6155                // Generate employees
6156                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
6157                employee_gen.set_country_pack(pack);
6158                let employee_pool =
6159                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
6160
6161                (
6162                    vendor_pool.vendors,
6163                    customer_pool.customers,
6164                    material_pool.materials,
6165                    asset_pool.assets,
6166                    employee_pool.employees,
6167                )
6168            })
6169            .collect();
6170
6171        // Aggregate results from all companies
6172        for (vendors, customers, materials, assets, employees) in per_company_results {
6173            self.master_data.vendors.extend(vendors);
6174            self.master_data.customers.extend(customers);
6175            self.master_data.materials.extend(materials);
6176            self.master_data.assets.extend(assets);
6177            self.master_data.employees.extend(employees);
6178        }
6179
6180        if let Some(pb) = &pb {
6181            pb.inc(total);
6182        }
6183        if let Some(pb) = pb {
6184            pb.finish_with_message("Master data generation complete");
6185        }
6186
6187        Ok(())
6188    }
6189
6190    /// Generate document flows (P2P and O2C).
6191    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
6192        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6193            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6194
6195        // Generate P2P chains
6196        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
6197        let months = (self.config.global.period_months as usize).max(1);
6198        let p2p_count = self
6199            .phase_config
6200            .p2p_chains
6201            .min(self.master_data.vendors.len() * 2 * months);
6202        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
6203
6204        // Convert P2P config from schema to generator config
6205        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
6206        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
6207        p2p_gen.set_country_pack(self.primary_pack().clone());
6208
6209        for i in 0..p2p_count {
6210            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
6211            let materials: Vec<&Material> = self
6212                .master_data
6213                .materials
6214                .iter()
6215                .skip(i % self.master_data.materials.len().max(1))
6216                .take(2.min(self.master_data.materials.len()))
6217                .collect();
6218
6219            if materials.is_empty() {
6220                continue;
6221            }
6222
6223            let company = &self.config.companies[i % self.config.companies.len()];
6224            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
6225            let fiscal_period = po_date.month() as u8;
6226            let created_by = if self.master_data.employees.is_empty() {
6227                "SYSTEM"
6228            } else {
6229                self.master_data.employees[i % self.master_data.employees.len()]
6230                    .user_id
6231                    .as_str()
6232            };
6233
6234            let chain = p2p_gen.generate_chain(
6235                &company.code,
6236                vendor,
6237                &materials,
6238                po_date,
6239                start_date.year() as u16,
6240                fiscal_period,
6241                created_by,
6242            );
6243
6244            // Flatten documents
6245            flows.purchase_orders.push(chain.purchase_order.clone());
6246            flows.goods_receipts.extend(chain.goods_receipts.clone());
6247            if let Some(vi) = &chain.vendor_invoice {
6248                flows.vendor_invoices.push(vi.clone());
6249            }
6250            if let Some(payment) = &chain.payment {
6251                flows.payments.push(payment.clone());
6252            }
6253            for remainder in &chain.remainder_payments {
6254                flows.payments.push(remainder.clone());
6255            }
6256            flows.p2p_chains.push(chain);
6257
6258            if let Some(pb) = &pb {
6259                pb.inc(1);
6260            }
6261        }
6262
6263        if let Some(pb) = pb {
6264            pb.finish_with_message("P2P document flows complete");
6265        }
6266
6267        // Generate O2C chains
6268        // Cap at ~2 SOs per customer per month to keep order volume realistic
6269        let o2c_count = self
6270            .phase_config
6271            .o2c_chains
6272            .min(self.master_data.customers.len() * 2 * months);
6273        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
6274
6275        // Convert O2C config from schema to generator config
6276        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
6277        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
6278        o2c_gen.set_country_pack(self.primary_pack().clone());
6279
6280        for i in 0..o2c_count {
6281            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
6282            let materials: Vec<&Material> = self
6283                .master_data
6284                .materials
6285                .iter()
6286                .skip(i % self.master_data.materials.len().max(1))
6287                .take(2.min(self.master_data.materials.len()))
6288                .collect();
6289
6290            if materials.is_empty() {
6291                continue;
6292            }
6293
6294            let company = &self.config.companies[i % self.config.companies.len()];
6295            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
6296            let fiscal_period = so_date.month() as u8;
6297            let created_by = if self.master_data.employees.is_empty() {
6298                "SYSTEM"
6299            } else {
6300                self.master_data.employees[i % self.master_data.employees.len()]
6301                    .user_id
6302                    .as_str()
6303            };
6304
6305            let chain = o2c_gen.generate_chain(
6306                &company.code,
6307                customer,
6308                &materials,
6309                so_date,
6310                start_date.year() as u16,
6311                fiscal_period,
6312                created_by,
6313            );
6314
6315            // Flatten documents
6316            flows.sales_orders.push(chain.sales_order.clone());
6317            flows.deliveries.extend(chain.deliveries.clone());
6318            if let Some(ci) = &chain.customer_invoice {
6319                flows.customer_invoices.push(ci.clone());
6320            }
6321            if let Some(receipt) = &chain.customer_receipt {
6322                flows.payments.push(receipt.clone());
6323            }
6324            // Extract remainder receipts (follow-up to partial payments)
6325            for receipt in &chain.remainder_receipts {
6326                flows.payments.push(receipt.clone());
6327            }
6328            flows.o2c_chains.push(chain);
6329
6330            if let Some(pb) = &pb {
6331                pb.inc(1);
6332            }
6333        }
6334
6335        if let Some(pb) = pb {
6336            pb.finish_with_message("O2C document flows complete");
6337        }
6338
6339        Ok(())
6340    }
6341
6342    /// Generate journal entries using parallel generation across multiple cores.
6343    fn generate_journal_entries(
6344        &mut self,
6345        coa: &Arc<ChartOfAccounts>,
6346    ) -> SynthResult<Vec<JournalEntry>> {
6347        use datasynth_core::traits::ParallelGenerator;
6348
6349        let total = self.calculate_total_transactions();
6350        let pb = self.create_progress_bar(total, "Generating Journal Entries");
6351
6352        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6353            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6354        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6355
6356        let company_codes: Vec<String> = self
6357            .config
6358            .companies
6359            .iter()
6360            .map(|c| c.code.clone())
6361            .collect();
6362
6363        let generator = JournalEntryGenerator::new_with_params(
6364            self.config.transactions.clone(),
6365            Arc::clone(coa),
6366            company_codes,
6367            start_date,
6368            end_date,
6369            self.seed,
6370        );
6371
6372        // Connect generated master data to ensure JEs reference real entities
6373        // Enable persona-based error injection for realistic human behavior
6374        // Pass fraud configuration for fraud injection
6375        let je_pack = self.primary_pack();
6376
6377        let mut generator = generator
6378            .with_master_data(
6379                &self.master_data.vendors,
6380                &self.master_data.customers,
6381                &self.master_data.materials,
6382            )
6383            .with_country_pack_names(je_pack)
6384            .with_country_pack_temporal(
6385                self.config.temporal_patterns.clone(),
6386                self.seed + 200,
6387                je_pack,
6388            )
6389            .with_persona_errors(true)
6390            .with_fraud_config(self.config.fraud.clone());
6391
6392        // Apply temporal drift if configured
6393        if self.config.temporal.enabled {
6394            let drift_config = self.config.temporal.to_core_config();
6395            generator = generator.with_drift_config(drift_config, self.seed + 100);
6396        }
6397
6398        // Check memory limit at start
6399        self.check_memory_limit()?;
6400
6401        // Determine parallelism: use available cores, but cap at total entries
6402        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
6403
6404        // Use parallel generation for datasets with 10K+ entries.
6405        // Below this threshold, the statistical properties of a single-seeded
6406        // generator (e.g. Benford compliance) are better preserved.
6407        let entries = if total >= 10_000 && num_threads > 1 {
6408            // Parallel path: split the generator across cores and generate in parallel.
6409            // Each sub-generator gets a unique seed for deterministic, independent generation.
6410            let sub_generators = generator.split(num_threads);
6411            let entries_per_thread = total as usize / num_threads;
6412            let remainder = total as usize % num_threads;
6413
6414            let batches: Vec<Vec<JournalEntry>> = sub_generators
6415                .into_par_iter()
6416                .enumerate()
6417                .map(|(i, mut gen)| {
6418                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
6419                    gen.generate_batch(count)
6420                })
6421                .collect();
6422
6423            // Merge all batches into a single Vec
6424            let entries = JournalEntryGenerator::merge_results(batches);
6425
6426            if let Some(pb) = &pb {
6427                pb.inc(total);
6428            }
6429            entries
6430        } else {
6431            // Sequential path for small datasets (< 1000 entries)
6432            let mut entries = Vec::with_capacity(total as usize);
6433            for _ in 0..total {
6434                let entry = generator.generate();
6435                entries.push(entry);
6436                if let Some(pb) = &pb {
6437                    pb.inc(1);
6438                }
6439            }
6440            entries
6441        };
6442
6443        if let Some(pb) = pb {
6444            pb.finish_with_message("Journal entries complete");
6445        }
6446
6447        Ok(entries)
6448    }
6449
6450    /// Generate journal entries from document flows.
6451    ///
6452    /// This creates proper GL entries for each document in the P2P and O2C flows,
6453    /// ensuring that document activity is reflected in the general ledger.
6454    fn generate_jes_from_document_flows(
6455        &mut self,
6456        flows: &DocumentFlowSnapshot,
6457    ) -> SynthResult<Vec<JournalEntry>> {
6458        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
6459        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
6460
6461        let je_config = match self.resolve_coa_framework() {
6462            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
6463            CoAFramework::GermanSkr04 => {
6464                let fa = datasynth_core::FrameworkAccounts::german_gaap();
6465                DocumentFlowJeConfig::from(&fa)
6466            }
6467            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
6468        };
6469
6470        let populate_fec = je_config.populate_fec_fields;
6471        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
6472
6473        // Build auxiliary account lookup from vendor/customer master data so that
6474        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
6475        // PCG "4010001") instead of raw partner IDs.
6476        if populate_fec {
6477            let mut aux_lookup = std::collections::HashMap::new();
6478            for vendor in &self.master_data.vendors {
6479                if let Some(ref aux) = vendor.auxiliary_gl_account {
6480                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
6481                }
6482            }
6483            for customer in &self.master_data.customers {
6484                if let Some(ref aux) = customer.auxiliary_gl_account {
6485                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
6486                }
6487            }
6488            if !aux_lookup.is_empty() {
6489                generator.set_auxiliary_account_lookup(aux_lookup);
6490            }
6491        }
6492
6493        let mut entries = Vec::new();
6494
6495        // Generate JEs from P2P chains
6496        for chain in &flows.p2p_chains {
6497            let chain_entries = generator.generate_from_p2p_chain(chain);
6498            entries.extend(chain_entries);
6499            if let Some(pb) = &pb {
6500                pb.inc(1);
6501            }
6502        }
6503
6504        // Generate JEs from O2C chains
6505        for chain in &flows.o2c_chains {
6506            let chain_entries = generator.generate_from_o2c_chain(chain);
6507            entries.extend(chain_entries);
6508            if let Some(pb) = &pb {
6509                pb.inc(1);
6510            }
6511        }
6512
6513        if let Some(pb) = pb {
6514            pb.finish_with_message(format!(
6515                "Generated {} JEs from document flows",
6516                entries.len()
6517            ));
6518        }
6519
6520        Ok(entries)
6521    }
6522
6523    /// Generate journal entries from payroll runs.
6524    ///
6525    /// Creates one JE per payroll run:
6526    /// - DR Salaries & Wages (6100) for gross pay
6527    /// - CR Payroll Clearing (9100) for gross pay
6528    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
6529        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
6530
6531        let mut jes = Vec::with_capacity(payroll_runs.len());
6532
6533        for run in payroll_runs {
6534            let mut je = JournalEntry::new_simple(
6535                format!("JE-PAYROLL-{}", run.payroll_id),
6536                run.company_code.clone(),
6537                run.run_date,
6538                format!("Payroll {}", run.payroll_id),
6539            );
6540
6541            // Debit Salaries & Wages for gross pay
6542            je.add_line(JournalEntryLine {
6543                line_number: 1,
6544                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
6545                debit_amount: run.total_gross,
6546                reference: Some(run.payroll_id.clone()),
6547                text: Some(format!(
6548                    "Payroll {} ({} employees)",
6549                    run.payroll_id, run.employee_count
6550                )),
6551                ..Default::default()
6552            });
6553
6554            // Credit Payroll Clearing for gross pay
6555            je.add_line(JournalEntryLine {
6556                line_number: 2,
6557                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
6558                credit_amount: run.total_gross,
6559                reference: Some(run.payroll_id.clone()),
6560                ..Default::default()
6561            });
6562
6563            jes.push(je);
6564        }
6565
6566        jes
6567    }
6568
6569    /// Generate journal entries from production orders.
6570    ///
6571    /// Creates one JE per completed production order:
6572    /// - DR Raw Materials (5100) for material consumption (actual_cost)
6573    /// - CR Inventory (1200) for material consumption
6574    fn generate_manufacturing_jes(production_orders: &[ProductionOrder]) -> Vec<JournalEntry> {
6575        use datasynth_core::accounts::{control_accounts, expense_accounts};
6576        use datasynth_core::models::ProductionOrderStatus;
6577
6578        let mut jes = Vec::new();
6579
6580        for order in production_orders {
6581            // Only generate JEs for completed or closed orders
6582            if !matches!(
6583                order.status,
6584                ProductionOrderStatus::Completed | ProductionOrderStatus::Closed
6585            ) {
6586                continue;
6587            }
6588
6589            let mut je = JournalEntry::new_simple(
6590                format!("JE-MFG-{}", order.order_id),
6591                order.company_code.clone(),
6592                order.actual_end.unwrap_or(order.planned_end),
6593                format!(
6594                    "Production Order {} - {}",
6595                    order.order_id, order.material_description
6596                ),
6597            );
6598
6599            // Debit Raw Materials / Manufacturing expense for actual cost
6600            je.add_line(JournalEntryLine {
6601                line_number: 1,
6602                gl_account: expense_accounts::RAW_MATERIALS.to_string(),
6603                debit_amount: order.actual_cost,
6604                reference: Some(order.order_id.clone()),
6605                text: Some(format!(
6606                    "Material consumption for {}",
6607                    order.material_description
6608                )),
6609                quantity: Some(order.actual_quantity),
6610                unit: Some("EA".to_string()),
6611                ..Default::default()
6612            });
6613
6614            // Credit Inventory for material consumption
6615            je.add_line(JournalEntryLine {
6616                line_number: 2,
6617                gl_account: control_accounts::INVENTORY.to_string(),
6618                credit_amount: order.actual_cost,
6619                reference: Some(order.order_id.clone()),
6620                ..Default::default()
6621            });
6622
6623            jes.push(je);
6624        }
6625
6626        jes
6627    }
6628
6629    /// Link document flows to subledger records.
6630    ///
6631    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
6632    /// ensuring subledger data is coherent with document flow data.
6633    fn link_document_flows_to_subledgers(
6634        &mut self,
6635        flows: &DocumentFlowSnapshot,
6636    ) -> SynthResult<SubledgerSnapshot> {
6637        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
6638        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
6639
6640        // Build vendor/customer name maps from master data for realistic subledger names
6641        let vendor_names: std::collections::HashMap<String, String> = self
6642            .master_data
6643            .vendors
6644            .iter()
6645            .map(|v| (v.vendor_id.clone(), v.name.clone()))
6646            .collect();
6647        let customer_names: std::collections::HashMap<String, String> = self
6648            .master_data
6649            .customers
6650            .iter()
6651            .map(|c| (c.customer_id.clone(), c.name.clone()))
6652            .collect();
6653
6654        let mut linker = DocumentFlowLinker::new()
6655            .with_vendor_names(vendor_names)
6656            .with_customer_names(customer_names);
6657
6658        // Convert vendor invoices to AP invoices
6659        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
6660        if let Some(pb) = &pb {
6661            pb.inc(flows.vendor_invoices.len() as u64);
6662        }
6663
6664        // Convert customer invoices to AR invoices
6665        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
6666        if let Some(pb) = &pb {
6667            pb.inc(flows.customer_invoices.len() as u64);
6668        }
6669
6670        if let Some(pb) = pb {
6671            pb.finish_with_message(format!(
6672                "Linked {} AP and {} AR invoices",
6673                ap_invoices.len(),
6674                ar_invoices.len()
6675            ));
6676        }
6677
6678        Ok(SubledgerSnapshot {
6679            ap_invoices,
6680            ar_invoices,
6681            fa_records: Vec::new(),
6682            inventory_positions: Vec::new(),
6683            inventory_movements: Vec::new(),
6684        })
6685    }
6686
6687    /// Generate OCPM events from document flows.
6688    ///
6689    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
6690    /// capturing the object-centric process perspective.
6691    #[allow(clippy::too_many_arguments)]
6692    fn generate_ocpm_events(
6693        &mut self,
6694        flows: &DocumentFlowSnapshot,
6695        sourcing: &SourcingSnapshot,
6696        hr: &HrSnapshot,
6697        manufacturing: &ManufacturingSnapshot,
6698        banking: &BankingSnapshot,
6699        audit: &AuditSnapshot,
6700        financial_reporting: &FinancialReportingSnapshot,
6701    ) -> SynthResult<OcpmSnapshot> {
6702        let total_chains = flows.p2p_chains.len()
6703            + flows.o2c_chains.len()
6704            + sourcing.sourcing_projects.len()
6705            + hr.payroll_runs.len()
6706            + manufacturing.production_orders.len()
6707            + banking.customers.len()
6708            + audit.engagements.len()
6709            + financial_reporting.bank_reconciliations.len();
6710        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
6711
6712        // Create OCPM event log with standard types
6713        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
6714        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
6715
6716        // Configure the OCPM generator
6717        let ocpm_config = OcpmGeneratorConfig {
6718            generate_p2p: true,
6719            generate_o2c: true,
6720            generate_s2c: !sourcing.sourcing_projects.is_empty(),
6721            generate_h2r: !hr.payroll_runs.is_empty(),
6722            generate_mfg: !manufacturing.production_orders.is_empty(),
6723            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
6724            generate_bank: !banking.customers.is_empty(),
6725            generate_audit: !audit.engagements.is_empty(),
6726            happy_path_rate: 0.75,
6727            exception_path_rate: 0.20,
6728            error_path_rate: 0.05,
6729            add_duration_variability: true,
6730            duration_std_dev_factor: 0.3,
6731        };
6732        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
6733        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
6734
6735        // Get available users for resource assignment
6736        let available_users: Vec<String> = self
6737            .master_data
6738            .employees
6739            .iter()
6740            .take(20)
6741            .map(|e| e.user_id.clone())
6742            .collect();
6743
6744        // Deterministic base date from config (avoids Utc::now() non-determinism)
6745        let fallback_date =
6746            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
6747        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6748            .unwrap_or(fallback_date);
6749        let base_midnight = base_date
6750            .and_hms_opt(0, 0, 0)
6751            .expect("midnight is always valid");
6752        let base_datetime =
6753            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
6754
6755        // Helper closure to add case results to event log
6756        let add_result = |event_log: &mut OcpmEventLog,
6757                          result: datasynth_ocpm::CaseGenerationResult| {
6758            for event in result.events {
6759                event_log.add_event(event);
6760            }
6761            for object in result.objects {
6762                event_log.add_object(object);
6763            }
6764            for relationship in result.relationships {
6765                event_log.add_relationship(relationship);
6766            }
6767            for corr in result.correlation_events {
6768                event_log.add_correlation_event(corr);
6769            }
6770            event_log.add_case(result.case_trace);
6771        };
6772
6773        // Generate events from P2P chains
6774        for chain in &flows.p2p_chains {
6775            let po = &chain.purchase_order;
6776            let documents = P2pDocuments::new(
6777                &po.header.document_id,
6778                &po.vendor_id,
6779                &po.header.company_code,
6780                po.total_net_amount,
6781                &po.header.currency,
6782                &ocpm_uuid_factory,
6783            )
6784            .with_goods_receipt(
6785                chain
6786                    .goods_receipts
6787                    .first()
6788                    .map(|gr| gr.header.document_id.as_str())
6789                    .unwrap_or(""),
6790                &ocpm_uuid_factory,
6791            )
6792            .with_invoice(
6793                chain
6794                    .vendor_invoice
6795                    .as_ref()
6796                    .map(|vi| vi.header.document_id.as_str())
6797                    .unwrap_or(""),
6798                &ocpm_uuid_factory,
6799            )
6800            .with_payment(
6801                chain
6802                    .payment
6803                    .as_ref()
6804                    .map(|p| p.header.document_id.as_str())
6805                    .unwrap_or(""),
6806                &ocpm_uuid_factory,
6807            );
6808
6809            let start_time =
6810                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
6811            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
6812            add_result(&mut event_log, result);
6813
6814            if let Some(pb) = &pb {
6815                pb.inc(1);
6816            }
6817        }
6818
6819        // Generate events from O2C chains
6820        for chain in &flows.o2c_chains {
6821            let so = &chain.sales_order;
6822            let documents = O2cDocuments::new(
6823                &so.header.document_id,
6824                &so.customer_id,
6825                &so.header.company_code,
6826                so.total_net_amount,
6827                &so.header.currency,
6828                &ocpm_uuid_factory,
6829            )
6830            .with_delivery(
6831                chain
6832                    .deliveries
6833                    .first()
6834                    .map(|d| d.header.document_id.as_str())
6835                    .unwrap_or(""),
6836                &ocpm_uuid_factory,
6837            )
6838            .with_invoice(
6839                chain
6840                    .customer_invoice
6841                    .as_ref()
6842                    .map(|ci| ci.header.document_id.as_str())
6843                    .unwrap_or(""),
6844                &ocpm_uuid_factory,
6845            )
6846            .with_receipt(
6847                chain
6848                    .customer_receipt
6849                    .as_ref()
6850                    .map(|r| r.header.document_id.as_str())
6851                    .unwrap_or(""),
6852                &ocpm_uuid_factory,
6853            );
6854
6855            let start_time =
6856                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
6857            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
6858            add_result(&mut event_log, result);
6859
6860            if let Some(pb) = &pb {
6861                pb.inc(1);
6862            }
6863        }
6864
6865        // Generate events from S2C sourcing projects
6866        for project in &sourcing.sourcing_projects {
6867            // Find vendor from contracts or qualifications
6868            let vendor_id = sourcing
6869                .contracts
6870                .iter()
6871                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6872                .map(|c| c.vendor_id.clone())
6873                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
6874                .or_else(|| {
6875                    self.master_data
6876                        .vendors
6877                        .first()
6878                        .map(|v| v.vendor_id.clone())
6879                })
6880                .unwrap_or_else(|| "V000".to_string());
6881            let mut docs = S2cDocuments::new(
6882                &project.project_id,
6883                &vendor_id,
6884                &project.company_code,
6885                project.estimated_annual_spend,
6886                &ocpm_uuid_factory,
6887            );
6888            // Link RFx if available
6889            if let Some(rfx) = sourcing
6890                .rfx_events
6891                .iter()
6892                .find(|r| r.sourcing_project_id == project.project_id)
6893            {
6894                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
6895                // Link winning bid (status == Accepted)
6896                if let Some(bid) = sourcing.bids.iter().find(|b| {
6897                    b.rfx_id == rfx.rfx_id
6898                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
6899                }) {
6900                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
6901                }
6902            }
6903            // Link contract
6904            if let Some(contract) = sourcing
6905                .contracts
6906                .iter()
6907                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
6908            {
6909                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
6910            }
6911            let start_time = base_datetime - chrono::Duration::days(90);
6912            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
6913            add_result(&mut event_log, result);
6914
6915            if let Some(pb) = &pb {
6916                pb.inc(1);
6917            }
6918        }
6919
6920        // Generate events from H2R payroll runs
6921        for run in &hr.payroll_runs {
6922            // Use first matching payroll line item's employee, or fallback
6923            let employee_id = hr
6924                .payroll_line_items
6925                .iter()
6926                .find(|li| li.payroll_id == run.payroll_id)
6927                .map(|li| li.employee_id.as_str())
6928                .unwrap_or("EMP000");
6929            let docs = H2rDocuments::new(
6930                &run.payroll_id,
6931                employee_id,
6932                &run.company_code,
6933                run.total_gross,
6934                &ocpm_uuid_factory,
6935            )
6936            .with_time_entries(
6937                hr.time_entries
6938                    .iter()
6939                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
6940                    .take(5)
6941                    .map(|t| t.entry_id.as_str())
6942                    .collect(),
6943            );
6944            let start_time = base_datetime - chrono::Duration::days(30);
6945            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
6946            add_result(&mut event_log, result);
6947
6948            if let Some(pb) = &pb {
6949                pb.inc(1);
6950            }
6951        }
6952
6953        // Generate events from MFG production orders
6954        for order in &manufacturing.production_orders {
6955            let mut docs = MfgDocuments::new(
6956                &order.order_id,
6957                &order.material_id,
6958                &order.company_code,
6959                order.planned_quantity,
6960                &ocpm_uuid_factory,
6961            )
6962            .with_operations(
6963                order
6964                    .operations
6965                    .iter()
6966                    .map(|o| format!("OP-{:04}", o.operation_number))
6967                    .collect::<Vec<_>>()
6968                    .iter()
6969                    .map(std::string::String::as_str)
6970                    .collect(),
6971            );
6972            // Link quality inspection if available (via reference_id matching order_id)
6973            if let Some(insp) = manufacturing
6974                .quality_inspections
6975                .iter()
6976                .find(|i| i.reference_id == order.order_id)
6977            {
6978                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
6979            }
6980            // Link cycle count if available (match by material_id in items)
6981            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
6982                cc.items
6983                    .iter()
6984                    .any(|item| item.material_id == order.material_id)
6985            }) {
6986                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
6987            }
6988            let start_time = base_datetime - chrono::Duration::days(60);
6989            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
6990            add_result(&mut event_log, result);
6991
6992            if let Some(pb) = &pb {
6993                pb.inc(1);
6994            }
6995        }
6996
6997        // Generate events from Banking customers
6998        for customer in &banking.customers {
6999            let customer_id_str = customer.customer_id.to_string();
7000            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
7001            // Link accounts (primary_owner_id matches customer_id)
7002            if let Some(account) = banking
7003                .accounts
7004                .iter()
7005                .find(|a| a.primary_owner_id == customer.customer_id)
7006            {
7007                let account_id_str = account.account_id.to_string();
7008                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
7009                // Link transactions for this account
7010                let txn_strs: Vec<String> = banking
7011                    .transactions
7012                    .iter()
7013                    .filter(|t| t.account_id == account.account_id)
7014                    .take(10)
7015                    .map(|t| t.transaction_id.to_string())
7016                    .collect();
7017                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
7018                let txn_amounts: Vec<rust_decimal::Decimal> = banking
7019                    .transactions
7020                    .iter()
7021                    .filter(|t| t.account_id == account.account_id)
7022                    .take(10)
7023                    .map(|t| t.amount)
7024                    .collect();
7025                if !txn_ids.is_empty() {
7026                    docs = docs.with_transactions(txn_ids, txn_amounts);
7027                }
7028            }
7029            let start_time = base_datetime - chrono::Duration::days(180);
7030            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
7031            add_result(&mut event_log, result);
7032
7033            if let Some(pb) = &pb {
7034                pb.inc(1);
7035            }
7036        }
7037
7038        // Generate events from Audit engagements
7039        for engagement in &audit.engagements {
7040            let engagement_id_str = engagement.engagement_id.to_string();
7041            let docs = AuditDocuments::new(
7042                &engagement_id_str,
7043                &engagement.client_entity_id,
7044                &ocpm_uuid_factory,
7045            )
7046            .with_workpapers(
7047                audit
7048                    .workpapers
7049                    .iter()
7050                    .filter(|w| w.engagement_id == engagement.engagement_id)
7051                    .take(10)
7052                    .map(|w| w.workpaper_id.to_string())
7053                    .collect::<Vec<_>>()
7054                    .iter()
7055                    .map(std::string::String::as_str)
7056                    .collect(),
7057            )
7058            .with_evidence(
7059                audit
7060                    .evidence
7061                    .iter()
7062                    .filter(|e| e.engagement_id == engagement.engagement_id)
7063                    .take(10)
7064                    .map(|e| e.evidence_id.to_string())
7065                    .collect::<Vec<_>>()
7066                    .iter()
7067                    .map(std::string::String::as_str)
7068                    .collect(),
7069            )
7070            .with_risks(
7071                audit
7072                    .risk_assessments
7073                    .iter()
7074                    .filter(|r| r.engagement_id == engagement.engagement_id)
7075                    .take(5)
7076                    .map(|r| r.risk_id.to_string())
7077                    .collect::<Vec<_>>()
7078                    .iter()
7079                    .map(std::string::String::as_str)
7080                    .collect(),
7081            )
7082            .with_findings(
7083                audit
7084                    .findings
7085                    .iter()
7086                    .filter(|f| f.engagement_id == engagement.engagement_id)
7087                    .take(5)
7088                    .map(|f| f.finding_id.to_string())
7089                    .collect::<Vec<_>>()
7090                    .iter()
7091                    .map(std::string::String::as_str)
7092                    .collect(),
7093            )
7094            .with_judgments(
7095                audit
7096                    .judgments
7097                    .iter()
7098                    .filter(|j| j.engagement_id == engagement.engagement_id)
7099                    .take(5)
7100                    .map(|j| j.judgment_id.to_string())
7101                    .collect::<Vec<_>>()
7102                    .iter()
7103                    .map(std::string::String::as_str)
7104                    .collect(),
7105            );
7106            let start_time = base_datetime - chrono::Duration::days(120);
7107            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
7108            add_result(&mut event_log, result);
7109
7110            if let Some(pb) = &pb {
7111                pb.inc(1);
7112            }
7113        }
7114
7115        // Generate events from Bank Reconciliations
7116        for recon in &financial_reporting.bank_reconciliations {
7117            let docs = BankReconDocuments::new(
7118                &recon.reconciliation_id,
7119                &recon.bank_account_id,
7120                &recon.company_code,
7121                recon.bank_ending_balance,
7122                &ocpm_uuid_factory,
7123            )
7124            .with_statement_lines(
7125                recon
7126                    .statement_lines
7127                    .iter()
7128                    .take(20)
7129                    .map(|l| l.line_id.as_str())
7130                    .collect(),
7131            )
7132            .with_reconciling_items(
7133                recon
7134                    .reconciling_items
7135                    .iter()
7136                    .take(10)
7137                    .map(|i| i.item_id.as_str())
7138                    .collect(),
7139            );
7140            let start_time = base_datetime - chrono::Duration::days(30);
7141            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
7142            add_result(&mut event_log, result);
7143
7144            if let Some(pb) = &pb {
7145                pb.inc(1);
7146            }
7147        }
7148
7149        // Compute process variants
7150        event_log.compute_variants();
7151
7152        let summary = event_log.summary();
7153
7154        if let Some(pb) = pb {
7155            pb.finish_with_message(format!(
7156                "Generated {} OCPM events, {} objects",
7157                summary.event_count, summary.object_count
7158            ));
7159        }
7160
7161        Ok(OcpmSnapshot {
7162            event_count: summary.event_count,
7163            object_count: summary.object_count,
7164            case_count: summary.case_count,
7165            event_log: Some(event_log),
7166        })
7167    }
7168
7169    /// Inject anomalies into journal entries.
7170    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
7171        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
7172
7173        // Read anomaly rates from config instead of using hardcoded values.
7174        // Priority: anomaly_injection config > fraud config > default 0.02
7175        let total_rate = if self.config.anomaly_injection.enabled {
7176            self.config.anomaly_injection.rates.total_rate
7177        } else if self.config.fraud.enabled {
7178            self.config.fraud.fraud_rate
7179        } else {
7180            0.02
7181        };
7182
7183        let fraud_rate = if self.config.anomaly_injection.enabled {
7184            self.config.anomaly_injection.rates.fraud_rate
7185        } else {
7186            AnomalyRateConfig::default().fraud_rate
7187        };
7188
7189        let error_rate = if self.config.anomaly_injection.enabled {
7190            self.config.anomaly_injection.rates.error_rate
7191        } else {
7192            AnomalyRateConfig::default().error_rate
7193        };
7194
7195        let process_issue_rate = if self.config.anomaly_injection.enabled {
7196            self.config.anomaly_injection.rates.process_rate
7197        } else {
7198            AnomalyRateConfig::default().process_issue_rate
7199        };
7200
7201        let anomaly_config = AnomalyInjectorConfig {
7202            rates: AnomalyRateConfig {
7203                total_rate,
7204                fraud_rate,
7205                error_rate,
7206                process_issue_rate,
7207                ..Default::default()
7208            },
7209            seed: self.seed + 5000,
7210            ..Default::default()
7211        };
7212
7213        let mut injector = AnomalyInjector::new(anomaly_config);
7214        let result = injector.process_entries(entries);
7215
7216        if let Some(pb) = &pb {
7217            pb.inc(entries.len() as u64);
7218            pb.finish_with_message("Anomaly injection complete");
7219        }
7220
7221        let mut by_type = HashMap::new();
7222        for label in &result.labels {
7223            *by_type
7224                .entry(format!("{:?}", label.anomaly_type))
7225                .or_insert(0) += 1;
7226        }
7227
7228        Ok(AnomalyLabels {
7229            labels: result.labels,
7230            summary: Some(result.summary),
7231            by_type,
7232        })
7233    }
7234
7235    /// Validate journal entries using running balance tracker.
7236    ///
7237    /// Applies all entries to the balance tracker and validates:
7238    /// - Each entry is internally balanced (debits = credits)
7239    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
7240    ///
7241    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
7242    /// excluded from balance validation as they may be intentionally unbalanced.
7243    fn validate_journal_entries(
7244        &mut self,
7245        entries: &[JournalEntry],
7246    ) -> SynthResult<BalanceValidationResult> {
7247        // Filter out entries with human errors as they may be intentionally unbalanced
7248        let clean_entries: Vec<&JournalEntry> = entries
7249            .iter()
7250            .filter(|e| {
7251                e.header
7252                    .header_text
7253                    .as_ref()
7254                    .map(|t| !t.contains("[HUMAN_ERROR:"))
7255                    .unwrap_or(true)
7256            })
7257            .collect();
7258
7259        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
7260
7261        // Configure tracker to not fail on errors (collect them instead)
7262        let config = BalanceTrackerConfig {
7263            validate_on_each_entry: false,   // We'll validate at the end
7264            track_history: false,            // Skip history for performance
7265            fail_on_validation_error: false, // Collect errors, don't fail
7266            ..Default::default()
7267        };
7268        let validation_currency = self
7269            .config
7270            .companies
7271            .first()
7272            .map(|c| c.currency.clone())
7273            .unwrap_or_else(|| "USD".to_string());
7274
7275        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
7276
7277        // Apply clean entries (without human errors)
7278        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
7279        let errors = tracker.apply_entries(&clean_refs);
7280
7281        if let Some(pb) = &pb {
7282            pb.inc(entries.len() as u64);
7283        }
7284
7285        // Check if any entries were unbalanced
7286        // Note: When fail_on_validation_error is false, errors are stored in tracker
7287        let has_unbalanced = tracker
7288            .get_validation_errors()
7289            .iter()
7290            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
7291
7292        // Validate balance sheet for each company
7293        // Include both returned errors and collected validation errors
7294        let mut all_errors = errors;
7295        all_errors.extend(tracker.get_validation_errors().iter().cloned());
7296        let company_codes: Vec<String> = self
7297            .config
7298            .companies
7299            .iter()
7300            .map(|c| c.code.clone())
7301            .collect();
7302
7303        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7304            .map(|d| d + chrono::Months::new(self.config.global.period_months))
7305            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7306
7307        for company_code in &company_codes {
7308            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
7309                all_errors.push(e);
7310            }
7311        }
7312
7313        // Get statistics after all mutable operations are done
7314        let stats = tracker.get_statistics();
7315
7316        // Determine if balanced overall
7317        let is_balanced = all_errors.is_empty();
7318
7319        if let Some(pb) = pb {
7320            let msg = if is_balanced {
7321                "Balance validation passed"
7322            } else {
7323                "Balance validation completed with errors"
7324            };
7325            pb.finish_with_message(msg);
7326        }
7327
7328        Ok(BalanceValidationResult {
7329            validated: true,
7330            is_balanced,
7331            entries_processed: stats.entries_processed,
7332            total_debits: stats.total_debits,
7333            total_credits: stats.total_credits,
7334            accounts_tracked: stats.accounts_tracked,
7335            companies_tracked: stats.companies_tracked,
7336            validation_errors: all_errors,
7337            has_unbalanced_entries: has_unbalanced,
7338        })
7339    }
7340
7341    /// Inject data quality variations into journal entries.
7342    ///
7343    /// Applies typos, missing values, and format variations to make
7344    /// the synthetic data more realistic for testing data cleaning pipelines.
7345    fn inject_data_quality(
7346        &mut self,
7347        entries: &mut [JournalEntry],
7348    ) -> SynthResult<DataQualityStats> {
7349        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
7350
7351        // Build config from user-specified schema settings when data_quality is enabled;
7352        // otherwise fall back to the low-rate minimal() preset.
7353        let config = if self.config.data_quality.enabled {
7354            let dq = &self.config.data_quality;
7355            DataQualityConfig {
7356                enable_missing_values: dq.missing_values.enabled,
7357                missing_values: datasynth_generators::MissingValueConfig {
7358                    global_rate: dq.effective_missing_rate(),
7359                    ..Default::default()
7360                },
7361                enable_format_variations: dq.format_variations.enabled,
7362                format_variations: datasynth_generators::FormatVariationConfig {
7363                    date_variation_rate: dq.format_variations.dates.rate,
7364                    amount_variation_rate: dq.format_variations.amounts.rate,
7365                    identifier_variation_rate: dq.format_variations.identifiers.rate,
7366                    ..Default::default()
7367                },
7368                enable_duplicates: dq.duplicates.enabled,
7369                duplicates: datasynth_generators::DuplicateConfig {
7370                    duplicate_rate: dq.effective_duplicate_rate(),
7371                    ..Default::default()
7372                },
7373                enable_typos: dq.typos.enabled,
7374                typos: datasynth_generators::TypoConfig {
7375                    char_error_rate: dq.effective_typo_rate(),
7376                    ..Default::default()
7377                },
7378                enable_encoding_issues: dq.encoding_issues.enabled,
7379                encoding_issue_rate: dq.encoding_issues.rate,
7380                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
7381                track_statistics: true,
7382            }
7383        } else {
7384            DataQualityConfig::minimal()
7385        };
7386        let mut injector = DataQualityInjector::new(config);
7387
7388        // Wire country pack for locale-aware format baselines
7389        injector.set_country_pack(self.primary_pack().clone());
7390
7391        // Build context for missing value decisions
7392        let context = HashMap::new();
7393
7394        for entry in entries.iter_mut() {
7395            // Process header_text field (common target for typos)
7396            if let Some(text) = &entry.header.header_text {
7397                let processed = injector.process_text_field(
7398                    "header_text",
7399                    text,
7400                    &entry.header.document_id.to_string(),
7401                    &context,
7402                );
7403                match processed {
7404                    Some(new_text) if new_text != *text => {
7405                        entry.header.header_text = Some(new_text);
7406                    }
7407                    None => {
7408                        entry.header.header_text = None; // Missing value
7409                    }
7410                    _ => {}
7411                }
7412            }
7413
7414            // Process reference field
7415            if let Some(ref_text) = &entry.header.reference {
7416                let processed = injector.process_text_field(
7417                    "reference",
7418                    ref_text,
7419                    &entry.header.document_id.to_string(),
7420                    &context,
7421                );
7422                match processed {
7423                    Some(new_text) if new_text != *ref_text => {
7424                        entry.header.reference = Some(new_text);
7425                    }
7426                    None => {
7427                        entry.header.reference = None;
7428                    }
7429                    _ => {}
7430                }
7431            }
7432
7433            // Process user_persona field (potential for typos in user IDs)
7434            let user_persona = entry.header.user_persona.clone();
7435            if let Some(processed) = injector.process_text_field(
7436                "user_persona",
7437                &user_persona,
7438                &entry.header.document_id.to_string(),
7439                &context,
7440            ) {
7441                if processed != user_persona {
7442                    entry.header.user_persona = processed;
7443                }
7444            }
7445
7446            // Process line items
7447            for line in &mut entry.lines {
7448                // Process line description if present
7449                if let Some(ref text) = line.line_text {
7450                    let processed = injector.process_text_field(
7451                        "line_text",
7452                        text,
7453                        &entry.header.document_id.to_string(),
7454                        &context,
7455                    );
7456                    match processed {
7457                        Some(new_text) if new_text != *text => {
7458                            line.line_text = Some(new_text);
7459                        }
7460                        None => {
7461                            line.line_text = None;
7462                        }
7463                        _ => {}
7464                    }
7465                }
7466
7467                // Process cost_center if present
7468                if let Some(cc) = &line.cost_center {
7469                    let processed = injector.process_text_field(
7470                        "cost_center",
7471                        cc,
7472                        &entry.header.document_id.to_string(),
7473                        &context,
7474                    );
7475                    match processed {
7476                        Some(new_cc) if new_cc != *cc => {
7477                            line.cost_center = Some(new_cc);
7478                        }
7479                        None => {
7480                            line.cost_center = None;
7481                        }
7482                        _ => {}
7483                    }
7484                }
7485            }
7486
7487            if let Some(pb) = &pb {
7488                pb.inc(1);
7489            }
7490        }
7491
7492        if let Some(pb) = pb {
7493            pb.finish_with_message("Data quality injection complete");
7494        }
7495
7496        Ok(injector.stats().clone())
7497    }
7498
7499    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
7500    ///
7501    /// Creates complete audit documentation for each company in the configuration,
7502    /// following ISA standards:
7503    /// - ISA 210/220: Engagement acceptance and terms
7504    /// - ISA 230: Audit documentation (workpapers)
7505    /// - ISA 265: Control deficiencies (findings)
7506    /// - ISA 315/330: Risk assessment and response
7507    /// - ISA 500: Audit evidence
7508    /// - ISA 200: Professional judgment
7509    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
7510        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7511            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7512        let fiscal_year = start_date.year() as u16;
7513        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
7514
7515        // Calculate rough total revenue from entries for materiality
7516        let total_revenue: rust_decimal::Decimal = entries
7517            .iter()
7518            .flat_map(|e| e.lines.iter())
7519            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
7520            .map(|l| l.credit_amount)
7521            .sum();
7522
7523        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
7524        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
7525
7526        let mut snapshot = AuditSnapshot::default();
7527
7528        // Initialize generators
7529        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
7530        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
7531        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
7532        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
7533        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
7534        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
7535
7536        // Get list of accounts from CoA for risk assessment
7537        let accounts: Vec<String> = self
7538            .coa
7539            .as_ref()
7540            .map(|coa| {
7541                coa.get_postable_accounts()
7542                    .iter()
7543                    .map(|acc| acc.account_code().to_string())
7544                    .collect()
7545            })
7546            .unwrap_or_default();
7547
7548        // Generate engagements for each company
7549        for (i, company) in self.config.companies.iter().enumerate() {
7550            // Calculate company-specific revenue (proportional to volume weight)
7551            let company_revenue = total_revenue
7552                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
7553
7554            // Generate engagements for this company
7555            let engagements_for_company =
7556                self.phase_config.audit_engagements / self.config.companies.len().max(1);
7557            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
7558                1
7559            } else {
7560                0
7561            };
7562
7563            for _eng_idx in 0..(engagements_for_company + extra) {
7564                // Generate the engagement
7565                let mut engagement = engagement_gen.generate_engagement(
7566                    &company.code,
7567                    &company.name,
7568                    fiscal_year,
7569                    period_end,
7570                    company_revenue,
7571                    None, // Use default engagement type
7572                );
7573
7574                // Replace synthetic team IDs with real employee IDs from master data
7575                if !self.master_data.employees.is_empty() {
7576                    let emp_count = self.master_data.employees.len();
7577                    // Use employee IDs deterministically based on engagement index
7578                    let base = (i * 10 + _eng_idx) % emp_count;
7579                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
7580                        .employee_id
7581                        .clone();
7582                    engagement.engagement_manager_id = self.master_data.employees
7583                        [(base + 1) % emp_count]
7584                        .employee_id
7585                        .clone();
7586                    let real_team: Vec<String> = engagement
7587                        .team_member_ids
7588                        .iter()
7589                        .enumerate()
7590                        .map(|(j, _)| {
7591                            self.master_data.employees[(base + 2 + j) % emp_count]
7592                                .employee_id
7593                                .clone()
7594                        })
7595                        .collect();
7596                    engagement.team_member_ids = real_team;
7597                }
7598
7599                if let Some(pb) = &pb {
7600                    pb.inc(1);
7601                }
7602
7603                // Get team members from the engagement
7604                let team_members: Vec<String> = engagement.team_member_ids.clone();
7605
7606                // Generate workpapers for the engagement
7607                let workpapers =
7608                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
7609
7610                for wp in &workpapers {
7611                    if let Some(pb) = &pb {
7612                        pb.inc(1);
7613                    }
7614
7615                    // Generate evidence for each workpaper
7616                    let evidence = evidence_gen.generate_evidence_for_workpaper(
7617                        wp,
7618                        &team_members,
7619                        wp.preparer_date,
7620                    );
7621
7622                    for _ in &evidence {
7623                        if let Some(pb) = &pb {
7624                            pb.inc(1);
7625                        }
7626                    }
7627
7628                    snapshot.evidence.extend(evidence);
7629                }
7630
7631                // Generate risk assessments for the engagement
7632                let risks =
7633                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
7634
7635                for _ in &risks {
7636                    if let Some(pb) = &pb {
7637                        pb.inc(1);
7638                    }
7639                }
7640                snapshot.risk_assessments.extend(risks);
7641
7642                // Generate findings for the engagement
7643                let findings = finding_gen.generate_findings_for_engagement(
7644                    &engagement,
7645                    &workpapers,
7646                    &team_members,
7647                );
7648
7649                for _ in &findings {
7650                    if let Some(pb) = &pb {
7651                        pb.inc(1);
7652                    }
7653                }
7654                snapshot.findings.extend(findings);
7655
7656                // Generate professional judgments for the engagement
7657                let judgments =
7658                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
7659
7660                for _ in &judgments {
7661                    if let Some(pb) = &pb {
7662                        pb.inc(1);
7663                    }
7664                }
7665                snapshot.judgments.extend(judgments);
7666
7667                // Add workpapers after findings since findings need them
7668                snapshot.workpapers.extend(workpapers);
7669                snapshot.engagements.push(engagement);
7670            }
7671        }
7672
7673        if let Some(pb) = pb {
7674            pb.finish_with_message(format!(
7675                "Audit data: {} engagements, {} workpapers, {} evidence",
7676                snapshot.engagements.len(),
7677                snapshot.workpapers.len(),
7678                snapshot.evidence.len()
7679            ));
7680        }
7681
7682        Ok(snapshot)
7683    }
7684
7685    /// Export journal entries as graph data for ML training and network reconstruction.
7686    ///
7687    /// Builds a transaction graph where:
7688    /// - Nodes are GL accounts
7689    /// - Edges are money flows from credit to debit accounts
7690    /// - Edge attributes include amount, date, business process, anomaly flags
7691    fn export_graphs(
7692        &mut self,
7693        entries: &[JournalEntry],
7694        _coa: &Arc<ChartOfAccounts>,
7695        stats: &mut EnhancedGenerationStatistics,
7696    ) -> SynthResult<GraphExportSnapshot> {
7697        let pb = self.create_progress_bar(100, "Exporting Graphs");
7698
7699        let mut snapshot = GraphExportSnapshot::default();
7700
7701        // Get output directory
7702        let output_dir = self
7703            .output_path
7704            .clone()
7705            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7706        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7707
7708        // Process each graph type configuration
7709        for graph_type in &self.config.graph_export.graph_types {
7710            if let Some(pb) = &pb {
7711                pb.inc(10);
7712            }
7713
7714            // Build transaction graph
7715            let graph_config = TransactionGraphConfig {
7716                include_vendors: false,
7717                include_customers: false,
7718                create_debit_credit_edges: true,
7719                include_document_nodes: graph_type.include_document_nodes,
7720                min_edge_weight: graph_type.min_edge_weight,
7721                aggregate_parallel_edges: graph_type.aggregate_edges,
7722                framework: None,
7723            };
7724
7725            let mut builder = TransactionGraphBuilder::new(graph_config);
7726            builder.add_journal_entries(entries);
7727            let graph = builder.build();
7728
7729            // Update stats
7730            stats.graph_node_count += graph.node_count();
7731            stats.graph_edge_count += graph.edge_count();
7732
7733            if let Some(pb) = &pb {
7734                pb.inc(40);
7735            }
7736
7737            // Export to each configured format
7738            for format in &self.config.graph_export.formats {
7739                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
7740
7741                // Create output directory
7742                if let Err(e) = std::fs::create_dir_all(&format_dir) {
7743                    warn!("Failed to create graph output directory: {}", e);
7744                    continue;
7745                }
7746
7747                match format {
7748                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
7749                        let pyg_config = PyGExportConfig {
7750                            common: datasynth_graph::CommonExportConfig {
7751                                export_node_features: true,
7752                                export_edge_features: true,
7753                                export_node_labels: true,
7754                                export_edge_labels: true,
7755                                export_masks: true,
7756                                train_ratio: self.config.graph_export.train_ratio,
7757                                val_ratio: self.config.graph_export.validation_ratio,
7758                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7759                            },
7760                            one_hot_categoricals: false,
7761                        };
7762
7763                        let exporter = PyGExporter::new(pyg_config);
7764                        match exporter.export(&graph, &format_dir) {
7765                            Ok(metadata) => {
7766                                snapshot.exports.insert(
7767                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
7768                                    GraphExportInfo {
7769                                        name: graph_type.name.clone(),
7770                                        format: "pytorch_geometric".to_string(),
7771                                        output_path: format_dir.clone(),
7772                                        node_count: metadata.num_nodes,
7773                                        edge_count: metadata.num_edges,
7774                                    },
7775                                );
7776                                snapshot.graph_count += 1;
7777                            }
7778                            Err(e) => {
7779                                warn!("Failed to export PyTorch Geometric graph: {}", e);
7780                            }
7781                        }
7782                    }
7783                    datasynth_config::schema::GraphExportFormat::Neo4j => {
7784                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
7785
7786                        let neo4j_config = Neo4jExportConfig {
7787                            export_node_properties: true,
7788                            export_edge_properties: true,
7789                            export_features: true,
7790                            generate_cypher: true,
7791                            generate_admin_import: true,
7792                            database_name: "synth".to_string(),
7793                            cypher_batch_size: 1000,
7794                        };
7795
7796                        let exporter = Neo4jExporter::new(neo4j_config);
7797                        match exporter.export(&graph, &format_dir) {
7798                            Ok(metadata) => {
7799                                snapshot.exports.insert(
7800                                    format!("{}_{}", graph_type.name, "neo4j"),
7801                                    GraphExportInfo {
7802                                        name: graph_type.name.clone(),
7803                                        format: "neo4j".to_string(),
7804                                        output_path: format_dir.clone(),
7805                                        node_count: metadata.num_nodes,
7806                                        edge_count: metadata.num_edges,
7807                                    },
7808                                );
7809                                snapshot.graph_count += 1;
7810                            }
7811                            Err(e) => {
7812                                warn!("Failed to export Neo4j graph: {}", e);
7813                            }
7814                        }
7815                    }
7816                    datasynth_config::schema::GraphExportFormat::Dgl => {
7817                        use datasynth_graph::{DGLExportConfig, DGLExporter};
7818
7819                        let dgl_config = DGLExportConfig {
7820                            common: datasynth_graph::CommonExportConfig {
7821                                export_node_features: true,
7822                                export_edge_features: true,
7823                                export_node_labels: true,
7824                                export_edge_labels: true,
7825                                export_masks: true,
7826                                train_ratio: self.config.graph_export.train_ratio,
7827                                val_ratio: self.config.graph_export.validation_ratio,
7828                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
7829                            },
7830                            heterogeneous: false,
7831                            include_pickle_script: true, // DGL ecosystem standard helper
7832                        };
7833
7834                        let exporter = DGLExporter::new(dgl_config);
7835                        match exporter.export(&graph, &format_dir) {
7836                            Ok(metadata) => {
7837                                snapshot.exports.insert(
7838                                    format!("{}_{}", graph_type.name, "dgl"),
7839                                    GraphExportInfo {
7840                                        name: graph_type.name.clone(),
7841                                        format: "dgl".to_string(),
7842                                        output_path: format_dir.clone(),
7843                                        node_count: metadata.common.num_nodes,
7844                                        edge_count: metadata.common.num_edges,
7845                                    },
7846                                );
7847                                snapshot.graph_count += 1;
7848                            }
7849                            Err(e) => {
7850                                warn!("Failed to export DGL graph: {}", e);
7851                            }
7852                        }
7853                    }
7854                    datasynth_config::schema::GraphExportFormat::RustGraph => {
7855                        use datasynth_graph::{
7856                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
7857                        };
7858
7859                        let rustgraph_config = RustGraphExportConfig {
7860                            include_features: true,
7861                            include_temporal: true,
7862                            include_labels: true,
7863                            source_name: "datasynth".to_string(),
7864                            batch_id: None,
7865                            output_format: RustGraphOutputFormat::JsonLines,
7866                            export_node_properties: true,
7867                            export_edge_properties: true,
7868                            pretty_print: false,
7869                        };
7870
7871                        let exporter = RustGraphExporter::new(rustgraph_config);
7872                        match exporter.export(&graph, &format_dir) {
7873                            Ok(metadata) => {
7874                                snapshot.exports.insert(
7875                                    format!("{}_{}", graph_type.name, "rustgraph"),
7876                                    GraphExportInfo {
7877                                        name: graph_type.name.clone(),
7878                                        format: "rustgraph".to_string(),
7879                                        output_path: format_dir.clone(),
7880                                        node_count: metadata.num_nodes,
7881                                        edge_count: metadata.num_edges,
7882                                    },
7883                                );
7884                                snapshot.graph_count += 1;
7885                            }
7886                            Err(e) => {
7887                                warn!("Failed to export RustGraph: {}", e);
7888                            }
7889                        }
7890                    }
7891                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
7892                        // Hypergraph export is handled separately in Phase 10b
7893                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
7894                    }
7895                }
7896            }
7897
7898            if let Some(pb) = &pb {
7899                pb.inc(40);
7900            }
7901        }
7902
7903        stats.graph_export_count = snapshot.graph_count;
7904        snapshot.exported = snapshot.graph_count > 0;
7905
7906        if let Some(pb) = pb {
7907            pb.finish_with_message(format!(
7908                "Graphs exported: {} graphs ({} nodes, {} edges)",
7909                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
7910            ));
7911        }
7912
7913        Ok(snapshot)
7914    }
7915
7916    /// Build additional graph types (banking, approval, entity) when relevant data
7917    /// is available. These run as a late phase because the data they need (banking
7918    /// snapshot, intercompany snapshot) is only generated after the main graph
7919    /// export phase.
7920    fn build_additional_graphs(
7921        &self,
7922        banking: &BankingSnapshot,
7923        intercompany: &IntercompanySnapshot,
7924        entries: &[JournalEntry],
7925        stats: &mut EnhancedGenerationStatistics,
7926    ) {
7927        let output_dir = self
7928            .output_path
7929            .clone()
7930            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
7931        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
7932
7933        // Banking graph: build when banking customers and transactions exist
7934        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
7935            info!("Phase 10c: Building banking network graph");
7936            let config = BankingGraphConfig::default();
7937            let mut builder = BankingGraphBuilder::new(config);
7938            builder.add_customers(&banking.customers);
7939            builder.add_accounts(&banking.accounts, &banking.customers);
7940            builder.add_transactions(&banking.transactions);
7941            let graph = builder.build();
7942
7943            let node_count = graph.node_count();
7944            let edge_count = graph.edge_count();
7945            stats.graph_node_count += node_count;
7946            stats.graph_edge_count += edge_count;
7947
7948            // Export as PyG if configured
7949            for format in &self.config.graph_export.formats {
7950                if matches!(
7951                    format,
7952                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
7953                ) {
7954                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
7955                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
7956                        warn!("Failed to create banking graph output dir: {}", e);
7957                        continue;
7958                    }
7959                    let pyg_config = PyGExportConfig::default();
7960                    let exporter = PyGExporter::new(pyg_config);
7961                    if let Err(e) = exporter.export(&graph, &format_dir) {
7962                        warn!("Failed to export banking graph as PyG: {}", e);
7963                    } else {
7964                        info!(
7965                            "Banking network graph exported: {} nodes, {} edges",
7966                            node_count, edge_count
7967                        );
7968                    }
7969                }
7970            }
7971        }
7972
7973        // Approval graph: build from journal entry approval workflows
7974        let approval_entries: Vec<_> = entries
7975            .iter()
7976            .filter(|je| je.header.approval_workflow.is_some())
7977            .collect();
7978
7979        if !approval_entries.is_empty() {
7980            info!(
7981                "Phase 10c: Building approval network graph ({} entries with approvals)",
7982                approval_entries.len()
7983            );
7984            let config = ApprovalGraphConfig::default();
7985            let mut builder = ApprovalGraphBuilder::new(config);
7986
7987            for je in &approval_entries {
7988                if let Some(ref wf) = je.header.approval_workflow {
7989                    for action in &wf.actions {
7990                        let record = datasynth_core::models::ApprovalRecord {
7991                            approval_id: format!(
7992                                "APR-{}-{}",
7993                                je.header.document_id, action.approval_level
7994                            ),
7995                            document_number: je.header.document_id.to_string(),
7996                            document_type: "JE".to_string(),
7997                            company_code: je.company_code().to_string(),
7998                            requester_id: wf.preparer_id.clone(),
7999                            requester_name: Some(wf.preparer_name.clone()),
8000                            approver_id: action.actor_id.clone(),
8001                            approver_name: action.actor_name.clone(),
8002                            approval_date: je.posting_date(),
8003                            action: format!("{:?}", action.action),
8004                            amount: wf.amount,
8005                            approval_limit: None,
8006                            comments: action.comments.clone(),
8007                            delegation_from: None,
8008                            is_auto_approved: false,
8009                        };
8010                        builder.add_approval(&record);
8011                    }
8012                }
8013            }
8014
8015            let graph = builder.build();
8016            let node_count = graph.node_count();
8017            let edge_count = graph.edge_count();
8018            stats.graph_node_count += node_count;
8019            stats.graph_edge_count += edge_count;
8020
8021            // Export as PyG if configured
8022            for format in &self.config.graph_export.formats {
8023                if matches!(
8024                    format,
8025                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8026                ) {
8027                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
8028                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8029                        warn!("Failed to create approval graph output dir: {}", e);
8030                        continue;
8031                    }
8032                    let pyg_config = PyGExportConfig::default();
8033                    let exporter = PyGExporter::new(pyg_config);
8034                    if let Err(e) = exporter.export(&graph, &format_dir) {
8035                        warn!("Failed to export approval graph as PyG: {}", e);
8036                    } else {
8037                        info!(
8038                            "Approval network graph exported: {} nodes, {} edges",
8039                            node_count, edge_count
8040                        );
8041                    }
8042                }
8043            }
8044        }
8045
8046        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
8047        if self.config.companies.len() >= 2 {
8048            info!(
8049                "Phase 10c: Building entity relationship graph ({} companies)",
8050                self.config.companies.len()
8051            );
8052
8053            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8054                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
8055
8056            // Map CompanyConfig → Company objects
8057            let parent_code = &self.config.companies[0].code;
8058            let mut companies: Vec<datasynth_core::models::Company> =
8059                Vec::with_capacity(self.config.companies.len());
8060
8061            // First company is the parent
8062            let first = &self.config.companies[0];
8063            companies.push(datasynth_core::models::Company::parent(
8064                &first.code,
8065                &first.name,
8066                &first.country,
8067                &first.currency,
8068            ));
8069
8070            // Remaining companies are subsidiaries (100% owned by parent)
8071            for cc in self.config.companies.iter().skip(1) {
8072                companies.push(datasynth_core::models::Company::subsidiary(
8073                    &cc.code,
8074                    &cc.name,
8075                    &cc.country,
8076                    &cc.currency,
8077                    parent_code,
8078                    rust_decimal::Decimal::from(100),
8079                ));
8080            }
8081
8082            // Build IntercompanyRelationship records (same logic as phase_intercompany)
8083            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
8084                self.config
8085                    .companies
8086                    .iter()
8087                    .skip(1)
8088                    .enumerate()
8089                    .map(|(i, cc)| {
8090                        let mut rel =
8091                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
8092                                format!("REL{:03}", i + 1),
8093                                parent_code.clone(),
8094                                cc.code.clone(),
8095                                rust_decimal::Decimal::from(100),
8096                                start_date,
8097                            );
8098                        rel.functional_currency = cc.currency.clone();
8099                        rel
8100                    })
8101                    .collect();
8102
8103            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
8104            builder.add_companies(&companies);
8105            builder.add_ownership_relationships(&relationships);
8106
8107            // Thread IC matched-pair transaction edges into the entity graph
8108            for pair in &intercompany.matched_pairs {
8109                builder.add_intercompany_edge(
8110                    &pair.seller_company,
8111                    &pair.buyer_company,
8112                    pair.amount,
8113                    &format!("{:?}", pair.transaction_type),
8114                );
8115            }
8116
8117            let graph = builder.build();
8118            let node_count = graph.node_count();
8119            let edge_count = graph.edge_count();
8120            stats.graph_node_count += node_count;
8121            stats.graph_edge_count += edge_count;
8122
8123            // Export as PyG if configured
8124            for format in &self.config.graph_export.formats {
8125                if matches!(
8126                    format,
8127                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
8128                ) {
8129                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
8130                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
8131                        warn!("Failed to create entity graph output dir: {}", e);
8132                        continue;
8133                    }
8134                    let pyg_config = PyGExportConfig::default();
8135                    let exporter = PyGExporter::new(pyg_config);
8136                    if let Err(e) = exporter.export(&graph, &format_dir) {
8137                        warn!("Failed to export entity graph as PyG: {}", e);
8138                    } else {
8139                        info!(
8140                            "Entity relationship graph exported: {} nodes, {} edges",
8141                            node_count, edge_count
8142                        );
8143                    }
8144                }
8145            }
8146        } else {
8147            debug!(
8148                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
8149                self.config.companies.len()
8150            );
8151        }
8152    }
8153
8154    /// Export a multi-layer hypergraph for RustGraph integration.
8155    ///
8156    /// Builds a 3-layer hypergraph:
8157    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
8158    /// - Layer 2: Process Events (all process family document flows + OCPM events)
8159    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
8160    #[allow(clippy::too_many_arguments)]
8161    fn export_hypergraph(
8162        &self,
8163        coa: &Arc<ChartOfAccounts>,
8164        entries: &[JournalEntry],
8165        document_flows: &DocumentFlowSnapshot,
8166        sourcing: &SourcingSnapshot,
8167        hr: &HrSnapshot,
8168        manufacturing: &ManufacturingSnapshot,
8169        banking: &BankingSnapshot,
8170        audit: &AuditSnapshot,
8171        financial_reporting: &FinancialReportingSnapshot,
8172        ocpm: &OcpmSnapshot,
8173        stats: &mut EnhancedGenerationStatistics,
8174    ) -> SynthResult<HypergraphExportInfo> {
8175        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
8176        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
8177        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
8178        use datasynth_graph::models::hypergraph::AggregationStrategy;
8179
8180        let hg_settings = &self.config.graph_export.hypergraph;
8181
8182        // Parse aggregation strategy from config string
8183        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
8184            "truncate" => AggregationStrategy::Truncate,
8185            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
8186            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
8187            "importance_sample" => AggregationStrategy::ImportanceSample,
8188            _ => AggregationStrategy::PoolByCounterparty,
8189        };
8190
8191        let builder_config = HypergraphConfig {
8192            max_nodes: hg_settings.max_nodes,
8193            aggregation_strategy,
8194            include_coso: hg_settings.governance_layer.include_coso,
8195            include_controls: hg_settings.governance_layer.include_controls,
8196            include_sox: hg_settings.governance_layer.include_sox,
8197            include_vendors: hg_settings.governance_layer.include_vendors,
8198            include_customers: hg_settings.governance_layer.include_customers,
8199            include_employees: hg_settings.governance_layer.include_employees,
8200            include_p2p: hg_settings.process_layer.include_p2p,
8201            include_o2c: hg_settings.process_layer.include_o2c,
8202            include_s2c: hg_settings.process_layer.include_s2c,
8203            include_h2r: hg_settings.process_layer.include_h2r,
8204            include_mfg: hg_settings.process_layer.include_mfg,
8205            include_bank: hg_settings.process_layer.include_bank,
8206            include_audit: hg_settings.process_layer.include_audit,
8207            include_r2r: hg_settings.process_layer.include_r2r,
8208            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
8209            docs_per_counterparty_threshold: hg_settings
8210                .process_layer
8211                .docs_per_counterparty_threshold,
8212            include_accounts: hg_settings.accounting_layer.include_accounts,
8213            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
8214            include_cross_layer_edges: hg_settings.cross_layer.enabled,
8215        };
8216
8217        let mut builder = HypergraphBuilder::new(builder_config);
8218
8219        // Layer 1: Governance & Controls
8220        builder.add_coso_framework();
8221
8222        // Add controls if available (generated during JE generation)
8223        // Controls are generated per-company; we use the standard set
8224        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
8225            let controls = InternalControl::standard_controls();
8226            builder.add_controls(&controls);
8227        }
8228
8229        // Add master data
8230        builder.add_vendors(&self.master_data.vendors);
8231        builder.add_customers(&self.master_data.customers);
8232        builder.add_employees(&self.master_data.employees);
8233
8234        // Layer 2: Process Events (all process families)
8235        builder.add_p2p_documents(
8236            &document_flows.purchase_orders,
8237            &document_flows.goods_receipts,
8238            &document_flows.vendor_invoices,
8239            &document_flows.payments,
8240        );
8241        builder.add_o2c_documents(
8242            &document_flows.sales_orders,
8243            &document_flows.deliveries,
8244            &document_flows.customer_invoices,
8245        );
8246        builder.add_s2c_documents(
8247            &sourcing.sourcing_projects,
8248            &sourcing.qualifications,
8249            &sourcing.rfx_events,
8250            &sourcing.bids,
8251            &sourcing.bid_evaluations,
8252            &sourcing.contracts,
8253        );
8254        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
8255        builder.add_mfg_documents(
8256            &manufacturing.production_orders,
8257            &manufacturing.quality_inspections,
8258            &manufacturing.cycle_counts,
8259        );
8260        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
8261        builder.add_audit_documents(
8262            &audit.engagements,
8263            &audit.workpapers,
8264            &audit.findings,
8265            &audit.evidence,
8266            &audit.risk_assessments,
8267            &audit.judgments,
8268        );
8269        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
8270
8271        // OCPM events as hyperedges
8272        if let Some(ref event_log) = ocpm.event_log {
8273            builder.add_ocpm_events(event_log);
8274        }
8275
8276        // Layer 3: Accounting Network
8277        builder.add_accounts(coa);
8278        builder.add_journal_entries_as_hyperedges(entries);
8279
8280        // Build the hypergraph
8281        let hypergraph = builder.build();
8282
8283        // Export
8284        let output_dir = self
8285            .output_path
8286            .clone()
8287            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
8288        let hg_dir = output_dir
8289            .join(&self.config.graph_export.output_subdirectory)
8290            .join(&hg_settings.output_subdirectory);
8291
8292        // Branch on output format
8293        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
8294            "unified" => {
8295                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8296                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8297                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
8298                })?;
8299                (
8300                    metadata.num_nodes,
8301                    metadata.num_edges,
8302                    metadata.num_hyperedges,
8303                )
8304            }
8305            _ => {
8306                // "native" or any unrecognized format → use existing exporter
8307                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
8308                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
8309                    SynthError::generation(format!("Hypergraph export failed: {e}"))
8310                })?;
8311                (
8312                    metadata.num_nodes,
8313                    metadata.num_edges,
8314                    metadata.num_hyperedges,
8315                )
8316            }
8317        };
8318
8319        // Stream to RustGraph ingest endpoint if configured
8320        #[cfg(feature = "streaming")]
8321        if let Some(ref target_url) = hg_settings.stream_target {
8322            use crate::stream_client::{StreamClient, StreamConfig};
8323            use std::io::Write as _;
8324
8325            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
8326            let stream_config = StreamConfig {
8327                target_url: target_url.clone(),
8328                batch_size: hg_settings.stream_batch_size,
8329                api_key,
8330                ..StreamConfig::default()
8331            };
8332
8333            match StreamClient::new(stream_config) {
8334                Ok(mut client) => {
8335                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
8336                    match exporter.export_to_writer(&hypergraph, &mut client) {
8337                        Ok(_) => {
8338                            if let Err(e) = client.flush() {
8339                                warn!("Failed to flush stream client: {}", e);
8340                            } else {
8341                                info!("Streamed {} records to {}", client.total_sent(), target_url);
8342                            }
8343                        }
8344                        Err(e) => {
8345                            warn!("Streaming export failed: {}", e);
8346                        }
8347                    }
8348                }
8349                Err(e) => {
8350                    warn!("Failed to create stream client: {}", e);
8351                }
8352            }
8353        }
8354
8355        // Update stats
8356        stats.graph_node_count += num_nodes;
8357        stats.graph_edge_count += num_edges;
8358        stats.graph_export_count += 1;
8359
8360        Ok(HypergraphExportInfo {
8361            node_count: num_nodes,
8362            edge_count: num_edges,
8363            hyperedge_count: num_hyperedges,
8364            output_path: hg_dir,
8365        })
8366    }
8367
8368    /// Generate banking KYC/AML data.
8369    ///
8370    /// Creates banking customers, accounts, and transactions with AML typology injection.
8371    /// Uses the BankingOrchestrator from synth-banking crate.
8372    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
8373        let pb = self.create_progress_bar(100, "Generating Banking Data");
8374
8375        // Build the banking orchestrator from config
8376        let orchestrator = BankingOrchestratorBuilder::new()
8377            .config(self.config.banking.clone())
8378            .seed(self.seed + 9000)
8379            .country_pack(self.primary_pack().clone())
8380            .build();
8381
8382        if let Some(pb) = &pb {
8383            pb.inc(10);
8384        }
8385
8386        // Generate the banking data
8387        let result = orchestrator.generate();
8388
8389        if let Some(pb) = &pb {
8390            pb.inc(90);
8391            pb.finish_with_message(format!(
8392                "Banking: {} customers, {} transactions",
8393                result.customers.len(),
8394                result.transactions.len()
8395            ));
8396        }
8397
8398        // Cross-reference banking customers with core master data so that
8399        // banking customer names align with the enterprise customer list.
8400        // We rotate through core customers, overlaying their name and country
8401        // onto the generated banking customers where possible.
8402        let mut banking_customers = result.customers;
8403        let core_customers = &self.master_data.customers;
8404        if !core_customers.is_empty() {
8405            for (i, bc) in banking_customers.iter_mut().enumerate() {
8406                let core = &core_customers[i % core_customers.len()];
8407                bc.name = CustomerName::business(&core.name);
8408                bc.residence_country = core.country.clone();
8409                bc.enterprise_customer_id = Some(core.customer_id.clone());
8410            }
8411            debug!(
8412                "Cross-referenced {} banking customers with {} core customers",
8413                banking_customers.len(),
8414                core_customers.len()
8415            );
8416        }
8417
8418        Ok(BankingSnapshot {
8419            customers: banking_customers,
8420            accounts: result.accounts,
8421            transactions: result.transactions,
8422            transaction_labels: result.transaction_labels,
8423            customer_labels: result.customer_labels,
8424            account_labels: result.account_labels,
8425            relationship_labels: result.relationship_labels,
8426            narratives: result.narratives,
8427            suspicious_count: result.stats.suspicious_count,
8428            scenario_count: result.scenarios.len(),
8429        })
8430    }
8431
8432    /// Calculate total transactions to generate.
8433    fn calculate_total_transactions(&self) -> u64 {
8434        let months = self.config.global.period_months as f64;
8435        self.config
8436            .companies
8437            .iter()
8438            .map(|c| {
8439                let annual = c.annual_transaction_volume.count() as f64;
8440                let weighted = annual * c.volume_weight;
8441                (weighted * months / 12.0) as u64
8442            })
8443            .sum()
8444    }
8445
8446    /// Create a progress bar if progress display is enabled.
8447    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
8448        if !self.phase_config.show_progress {
8449            return None;
8450        }
8451
8452        let pb = if let Some(mp) = &self.multi_progress {
8453            mp.add(ProgressBar::new(total))
8454        } else {
8455            ProgressBar::new(total)
8456        };
8457
8458        pb.set_style(
8459            ProgressStyle::default_bar()
8460                .template(&format!(
8461                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
8462                ))
8463                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
8464                .progress_chars("#>-"),
8465        );
8466
8467        Some(pb)
8468    }
8469
8470    /// Get the generated chart of accounts.
8471    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
8472        self.coa.clone()
8473    }
8474
8475    /// Get the generated master data.
8476    pub fn get_master_data(&self) -> &MasterDataSnapshot {
8477        &self.master_data
8478    }
8479
8480    /// Build a lineage graph describing config → phase → output relationships.
8481    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
8482        use super::lineage::LineageGraphBuilder;
8483
8484        let mut builder = LineageGraphBuilder::new();
8485
8486        // Config sections
8487        builder.add_config_section("config:global", "Global Config");
8488        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
8489        builder.add_config_section("config:transactions", "Transaction Config");
8490
8491        // Generator phases
8492        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
8493        builder.add_generator_phase("phase:je", "Journal Entry Generation");
8494
8495        // Config → phase edges
8496        builder.configured_by("phase:coa", "config:chart_of_accounts");
8497        builder.configured_by("phase:je", "config:transactions");
8498
8499        // Output files
8500        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
8501        builder.produced_by("output:je", "phase:je");
8502
8503        // Optional phases based on config
8504        if self.phase_config.generate_master_data {
8505            builder.add_config_section("config:master_data", "Master Data Config");
8506            builder.add_generator_phase("phase:master_data", "Master Data Generation");
8507            builder.configured_by("phase:master_data", "config:master_data");
8508            builder.input_to("phase:master_data", "phase:je");
8509        }
8510
8511        if self.phase_config.generate_document_flows {
8512            builder.add_config_section("config:document_flows", "Document Flow Config");
8513            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
8514            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
8515            builder.configured_by("phase:p2p", "config:document_flows");
8516            builder.configured_by("phase:o2c", "config:document_flows");
8517
8518            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
8519            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
8520            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
8521            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
8522            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
8523
8524            builder.produced_by("output:po", "phase:p2p");
8525            builder.produced_by("output:gr", "phase:p2p");
8526            builder.produced_by("output:vi", "phase:p2p");
8527            builder.produced_by("output:so", "phase:o2c");
8528            builder.produced_by("output:ci", "phase:o2c");
8529        }
8530
8531        if self.phase_config.inject_anomalies {
8532            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
8533            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
8534            builder.configured_by("phase:anomaly", "config:fraud");
8535            builder.add_output_file(
8536                "output:labels",
8537                "Anomaly Labels",
8538                "labels/anomaly_labels.csv",
8539            );
8540            builder.produced_by("output:labels", "phase:anomaly");
8541        }
8542
8543        if self.phase_config.generate_audit {
8544            builder.add_config_section("config:audit", "Audit Config");
8545            builder.add_generator_phase("phase:audit", "Audit Data Generation");
8546            builder.configured_by("phase:audit", "config:audit");
8547        }
8548
8549        if self.phase_config.generate_banking {
8550            builder.add_config_section("config:banking", "Banking Config");
8551            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
8552            builder.configured_by("phase:banking", "config:banking");
8553        }
8554
8555        if self.config.llm.enabled {
8556            builder.add_config_section("config:llm", "LLM Enrichment Config");
8557            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
8558            builder.configured_by("phase:llm_enrichment", "config:llm");
8559        }
8560
8561        if self.config.diffusion.enabled {
8562            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
8563            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
8564            builder.configured_by("phase:diffusion", "config:diffusion");
8565        }
8566
8567        if self.config.causal.enabled {
8568            builder.add_config_section("config:causal", "Causal Generation Config");
8569            builder.add_generator_phase("phase:causal", "Causal Overlay");
8570            builder.configured_by("phase:causal", "config:causal");
8571        }
8572
8573        builder.build()
8574    }
8575}
8576
8577/// Get the directory name for a graph export format.
8578fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
8579    match format {
8580        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
8581        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
8582        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
8583        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
8584        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
8585    }
8586}
8587
8588#[cfg(test)]
8589#[allow(clippy::unwrap_used)]
8590mod tests {
8591    use super::*;
8592    use datasynth_config::schema::*;
8593
8594    fn create_test_config() -> GeneratorConfig {
8595        GeneratorConfig {
8596            global: GlobalConfig {
8597                industry: IndustrySector::Manufacturing,
8598                start_date: "2024-01-01".to_string(),
8599                period_months: 1,
8600                seed: Some(42),
8601                parallel: false,
8602                group_currency: "USD".to_string(),
8603                worker_threads: 0,
8604                memory_limit_mb: 0,
8605                fiscal_year_months: None,
8606            },
8607            companies: vec![CompanyConfig {
8608                code: "1000".to_string(),
8609                name: "Test Company".to_string(),
8610                currency: "USD".to_string(),
8611                country: "US".to_string(),
8612                annual_transaction_volume: TransactionVolume::TenK,
8613                volume_weight: 1.0,
8614                fiscal_year_variant: "K4".to_string(),
8615            }],
8616            chart_of_accounts: ChartOfAccountsConfig {
8617                complexity: CoAComplexity::Small,
8618                industry_specific: true,
8619                custom_accounts: None,
8620                min_hierarchy_depth: 2,
8621                max_hierarchy_depth: 4,
8622            },
8623            transactions: TransactionConfig::default(),
8624            output: OutputConfig::default(),
8625            fraud: FraudConfig::default(),
8626            internal_controls: InternalControlsConfig::default(),
8627            business_processes: BusinessProcessConfig::default(),
8628            user_personas: UserPersonaConfig::default(),
8629            templates: TemplateConfig::default(),
8630            approval: ApprovalConfig::default(),
8631            departments: DepartmentConfig::default(),
8632            master_data: MasterDataConfig::default(),
8633            document_flows: DocumentFlowConfig::default(),
8634            intercompany: IntercompanyConfig::default(),
8635            balance: BalanceConfig::default(),
8636            ocpm: OcpmConfig::default(),
8637            audit: AuditGenerationConfig::default(),
8638            banking: datasynth_banking::BankingConfig::default(),
8639            data_quality: DataQualitySchemaConfig::default(),
8640            scenario: ScenarioConfig::default(),
8641            temporal: TemporalDriftConfig::default(),
8642            graph_export: GraphExportConfig::default(),
8643            streaming: StreamingSchemaConfig::default(),
8644            rate_limit: RateLimitSchemaConfig::default(),
8645            temporal_attributes: TemporalAttributeSchemaConfig::default(),
8646            relationships: RelationshipSchemaConfig::default(),
8647            accounting_standards: AccountingStandardsConfig::default(),
8648            audit_standards: AuditStandardsConfig::default(),
8649            distributions: Default::default(),
8650            temporal_patterns: Default::default(),
8651            vendor_network: VendorNetworkSchemaConfig::default(),
8652            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
8653            relationship_strength: RelationshipStrengthSchemaConfig::default(),
8654            cross_process_links: CrossProcessLinksSchemaConfig::default(),
8655            organizational_events: OrganizationalEventsSchemaConfig::default(),
8656            behavioral_drift: BehavioralDriftSchemaConfig::default(),
8657            market_drift: MarketDriftSchemaConfig::default(),
8658            drift_labeling: DriftLabelingSchemaConfig::default(),
8659            anomaly_injection: Default::default(),
8660            industry_specific: Default::default(),
8661            fingerprint_privacy: Default::default(),
8662            quality_gates: Default::default(),
8663            compliance: Default::default(),
8664            webhooks: Default::default(),
8665            llm: Default::default(),
8666            diffusion: Default::default(),
8667            causal: Default::default(),
8668            source_to_pay: Default::default(),
8669            financial_reporting: Default::default(),
8670            hr: Default::default(),
8671            manufacturing: Default::default(),
8672            sales_quotes: Default::default(),
8673            tax: Default::default(),
8674            treasury: Default::default(),
8675            project_accounting: Default::default(),
8676            esg: Default::default(),
8677            country_packs: None,
8678            scenarios: Default::default(),
8679            session: Default::default(),
8680        }
8681    }
8682
8683    #[test]
8684    fn test_enhanced_orchestrator_creation() {
8685        let config = create_test_config();
8686        let orchestrator = EnhancedOrchestrator::with_defaults(config);
8687        assert!(orchestrator.is_ok());
8688    }
8689
8690    #[test]
8691    fn test_minimal_generation() {
8692        let config = create_test_config();
8693        let phase_config = PhaseConfig {
8694            generate_master_data: false,
8695            generate_document_flows: false,
8696            generate_journal_entries: true,
8697            inject_anomalies: false,
8698            show_progress: false,
8699            ..Default::default()
8700        };
8701
8702        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8703        let result = orchestrator.generate();
8704
8705        assert!(result.is_ok());
8706        let result = result.unwrap();
8707        assert!(!result.journal_entries.is_empty());
8708    }
8709
8710    #[test]
8711    fn test_master_data_generation() {
8712        let config = create_test_config();
8713        let phase_config = PhaseConfig {
8714            generate_master_data: true,
8715            generate_document_flows: false,
8716            generate_journal_entries: false,
8717            inject_anomalies: false,
8718            show_progress: false,
8719            vendors_per_company: 5,
8720            customers_per_company: 5,
8721            materials_per_company: 10,
8722            assets_per_company: 5,
8723            employees_per_company: 10,
8724            ..Default::default()
8725        };
8726
8727        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8728        let result = orchestrator.generate().unwrap();
8729
8730        assert!(!result.master_data.vendors.is_empty());
8731        assert!(!result.master_data.customers.is_empty());
8732        assert!(!result.master_data.materials.is_empty());
8733    }
8734
8735    #[test]
8736    fn test_document_flow_generation() {
8737        let config = create_test_config();
8738        let phase_config = PhaseConfig {
8739            generate_master_data: true,
8740            generate_document_flows: true,
8741            generate_journal_entries: false,
8742            inject_anomalies: false,
8743            inject_data_quality: false,
8744            validate_balances: false,
8745            generate_ocpm_events: false,
8746            show_progress: false,
8747            vendors_per_company: 5,
8748            customers_per_company: 5,
8749            materials_per_company: 10,
8750            assets_per_company: 5,
8751            employees_per_company: 10,
8752            p2p_chains: 5,
8753            o2c_chains: 5,
8754            ..Default::default()
8755        };
8756
8757        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8758        let result = orchestrator.generate().unwrap();
8759
8760        // Should have generated P2P and O2C chains
8761        assert!(!result.document_flows.p2p_chains.is_empty());
8762        assert!(!result.document_flows.o2c_chains.is_empty());
8763
8764        // Flattened documents should be populated
8765        assert!(!result.document_flows.purchase_orders.is_empty());
8766        assert!(!result.document_flows.sales_orders.is_empty());
8767    }
8768
8769    #[test]
8770    fn test_anomaly_injection() {
8771        let config = create_test_config();
8772        let phase_config = PhaseConfig {
8773            generate_master_data: false,
8774            generate_document_flows: false,
8775            generate_journal_entries: true,
8776            inject_anomalies: true,
8777            show_progress: false,
8778            ..Default::default()
8779        };
8780
8781        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8782        let result = orchestrator.generate().unwrap();
8783
8784        // Should have journal entries
8785        assert!(!result.journal_entries.is_empty());
8786
8787        // With ~833 entries and 2% rate, expect some anomalies
8788        // Note: This is probabilistic, so we just verify the structure exists
8789        assert!(result.anomaly_labels.summary.is_some());
8790    }
8791
8792    #[test]
8793    fn test_full_generation_pipeline() {
8794        let config = create_test_config();
8795        let phase_config = PhaseConfig {
8796            generate_master_data: true,
8797            generate_document_flows: true,
8798            generate_journal_entries: true,
8799            inject_anomalies: false,
8800            inject_data_quality: false,
8801            validate_balances: true,
8802            generate_ocpm_events: false,
8803            show_progress: false,
8804            vendors_per_company: 3,
8805            customers_per_company: 3,
8806            materials_per_company: 5,
8807            assets_per_company: 3,
8808            employees_per_company: 5,
8809            p2p_chains: 3,
8810            o2c_chains: 3,
8811            ..Default::default()
8812        };
8813
8814        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8815        let result = orchestrator.generate().unwrap();
8816
8817        // All phases should have results
8818        assert!(!result.master_data.vendors.is_empty());
8819        assert!(!result.master_data.customers.is_empty());
8820        assert!(!result.document_flows.p2p_chains.is_empty());
8821        assert!(!result.document_flows.o2c_chains.is_empty());
8822        assert!(!result.journal_entries.is_empty());
8823        assert!(result.statistics.accounts_count > 0);
8824
8825        // Subledger linking should have run
8826        assert!(!result.subledger.ap_invoices.is_empty());
8827        assert!(!result.subledger.ar_invoices.is_empty());
8828
8829        // Balance validation should have run
8830        assert!(result.balance_validation.validated);
8831        assert!(result.balance_validation.entries_processed > 0);
8832    }
8833
8834    #[test]
8835    fn test_subledger_linking() {
8836        let config = create_test_config();
8837        let phase_config = PhaseConfig {
8838            generate_master_data: true,
8839            generate_document_flows: true,
8840            generate_journal_entries: false,
8841            inject_anomalies: false,
8842            inject_data_quality: false,
8843            validate_balances: false,
8844            generate_ocpm_events: false,
8845            show_progress: false,
8846            vendors_per_company: 5,
8847            customers_per_company: 5,
8848            materials_per_company: 10,
8849            assets_per_company: 3,
8850            employees_per_company: 5,
8851            p2p_chains: 5,
8852            o2c_chains: 5,
8853            ..Default::default()
8854        };
8855
8856        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8857        let result = orchestrator.generate().unwrap();
8858
8859        // Should have document flows
8860        assert!(!result.document_flows.vendor_invoices.is_empty());
8861        assert!(!result.document_flows.customer_invoices.is_empty());
8862
8863        // Subledger should be linked from document flows
8864        assert!(!result.subledger.ap_invoices.is_empty());
8865        assert!(!result.subledger.ar_invoices.is_empty());
8866
8867        // AP invoices count should match vendor invoices count
8868        assert_eq!(
8869            result.subledger.ap_invoices.len(),
8870            result.document_flows.vendor_invoices.len()
8871        );
8872
8873        // AR invoices count should match customer invoices count
8874        assert_eq!(
8875            result.subledger.ar_invoices.len(),
8876            result.document_flows.customer_invoices.len()
8877        );
8878
8879        // Statistics should reflect subledger counts
8880        assert_eq!(
8881            result.statistics.ap_invoice_count,
8882            result.subledger.ap_invoices.len()
8883        );
8884        assert_eq!(
8885            result.statistics.ar_invoice_count,
8886            result.subledger.ar_invoices.len()
8887        );
8888    }
8889
8890    #[test]
8891    fn test_balance_validation() {
8892        let config = create_test_config();
8893        let phase_config = PhaseConfig {
8894            generate_master_data: false,
8895            generate_document_flows: false,
8896            generate_journal_entries: true,
8897            inject_anomalies: false,
8898            validate_balances: true,
8899            show_progress: false,
8900            ..Default::default()
8901        };
8902
8903        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8904        let result = orchestrator.generate().unwrap();
8905
8906        // Balance validation should run
8907        assert!(result.balance_validation.validated);
8908        assert!(result.balance_validation.entries_processed > 0);
8909
8910        // Generated JEs should be balanced (no unbalanced entries)
8911        assert!(!result.balance_validation.has_unbalanced_entries);
8912
8913        // Total debits should equal total credits
8914        assert_eq!(
8915            result.balance_validation.total_debits,
8916            result.balance_validation.total_credits
8917        );
8918    }
8919
8920    #[test]
8921    fn test_statistics_accuracy() {
8922        let config = create_test_config();
8923        let phase_config = PhaseConfig {
8924            generate_master_data: true,
8925            generate_document_flows: false,
8926            generate_journal_entries: true,
8927            inject_anomalies: false,
8928            show_progress: false,
8929            vendors_per_company: 10,
8930            customers_per_company: 20,
8931            materials_per_company: 15,
8932            assets_per_company: 5,
8933            employees_per_company: 8,
8934            ..Default::default()
8935        };
8936
8937        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8938        let result = orchestrator.generate().unwrap();
8939
8940        // Statistics should match actual data
8941        assert_eq!(
8942            result.statistics.vendor_count,
8943            result.master_data.vendors.len()
8944        );
8945        assert_eq!(
8946            result.statistics.customer_count,
8947            result.master_data.customers.len()
8948        );
8949        assert_eq!(
8950            result.statistics.material_count,
8951            result.master_data.materials.len()
8952        );
8953        assert_eq!(
8954            result.statistics.total_entries as usize,
8955            result.journal_entries.len()
8956        );
8957    }
8958
8959    #[test]
8960    fn test_phase_config_defaults() {
8961        let config = PhaseConfig::default();
8962        assert!(config.generate_master_data);
8963        assert!(config.generate_document_flows);
8964        assert!(config.generate_journal_entries);
8965        assert!(!config.inject_anomalies);
8966        assert!(config.validate_balances);
8967        assert!(config.show_progress);
8968        assert!(config.vendors_per_company > 0);
8969        assert!(config.customers_per_company > 0);
8970    }
8971
8972    #[test]
8973    fn test_get_coa_before_generation() {
8974        let config = create_test_config();
8975        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
8976
8977        // Before generation, CoA should be None
8978        assert!(orchestrator.get_coa().is_none());
8979    }
8980
8981    #[test]
8982    fn test_get_coa_after_generation() {
8983        let config = create_test_config();
8984        let phase_config = PhaseConfig {
8985            generate_master_data: false,
8986            generate_document_flows: false,
8987            generate_journal_entries: true,
8988            inject_anomalies: false,
8989            show_progress: false,
8990            ..Default::default()
8991        };
8992
8993        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
8994        let _ = orchestrator.generate().unwrap();
8995
8996        // After generation, CoA should be available
8997        assert!(orchestrator.get_coa().is_some());
8998    }
8999
9000    #[test]
9001    fn test_get_master_data() {
9002        let config = create_test_config();
9003        let phase_config = PhaseConfig {
9004            generate_master_data: true,
9005            generate_document_flows: false,
9006            generate_journal_entries: false,
9007            inject_anomalies: false,
9008            show_progress: false,
9009            vendors_per_company: 5,
9010            customers_per_company: 5,
9011            materials_per_company: 5,
9012            assets_per_company: 5,
9013            employees_per_company: 5,
9014            ..Default::default()
9015        };
9016
9017        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9018        let result = orchestrator.generate().unwrap();
9019
9020        // After generate(), master_data is moved into the result
9021        assert!(!result.master_data.vendors.is_empty());
9022    }
9023
9024    #[test]
9025    fn test_with_progress_builder() {
9026        let config = create_test_config();
9027        let orchestrator = EnhancedOrchestrator::with_defaults(config)
9028            .unwrap()
9029            .with_progress(false);
9030
9031        // Should still work without progress
9032        assert!(!orchestrator.phase_config.show_progress);
9033    }
9034
9035    #[test]
9036    fn test_multi_company_generation() {
9037        let mut config = create_test_config();
9038        config.companies.push(CompanyConfig {
9039            code: "2000".to_string(),
9040            name: "Subsidiary".to_string(),
9041            currency: "EUR".to_string(),
9042            country: "DE".to_string(),
9043            annual_transaction_volume: TransactionVolume::TenK,
9044            volume_weight: 0.5,
9045            fiscal_year_variant: "K4".to_string(),
9046        });
9047
9048        let phase_config = PhaseConfig {
9049            generate_master_data: true,
9050            generate_document_flows: false,
9051            generate_journal_entries: true,
9052            inject_anomalies: false,
9053            show_progress: false,
9054            vendors_per_company: 5,
9055            customers_per_company: 5,
9056            materials_per_company: 5,
9057            assets_per_company: 5,
9058            employees_per_company: 5,
9059            ..Default::default()
9060        };
9061
9062        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9063        let result = orchestrator.generate().unwrap();
9064
9065        // Should have master data for both companies
9066        assert!(result.statistics.vendor_count >= 10); // 5 per company
9067        assert!(result.statistics.customer_count >= 10);
9068        assert!(result.statistics.companies_count == 2);
9069    }
9070
9071    #[test]
9072    fn test_empty_master_data_skips_document_flows() {
9073        let config = create_test_config();
9074        let phase_config = PhaseConfig {
9075            generate_master_data: false,   // Skip master data
9076            generate_document_flows: true, // Try to generate flows
9077            generate_journal_entries: false,
9078            inject_anomalies: false,
9079            show_progress: false,
9080            ..Default::default()
9081        };
9082
9083        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9084        let result = orchestrator.generate().unwrap();
9085
9086        // Without master data, document flows should be empty
9087        assert!(result.document_flows.p2p_chains.is_empty());
9088        assert!(result.document_flows.o2c_chains.is_empty());
9089    }
9090
9091    #[test]
9092    fn test_journal_entry_line_item_count() {
9093        let config = create_test_config();
9094        let phase_config = PhaseConfig {
9095            generate_master_data: false,
9096            generate_document_flows: false,
9097            generate_journal_entries: true,
9098            inject_anomalies: false,
9099            show_progress: false,
9100            ..Default::default()
9101        };
9102
9103        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9104        let result = orchestrator.generate().unwrap();
9105
9106        // Total line items should match sum of all entry line counts
9107        let calculated_line_items: u64 = result
9108            .journal_entries
9109            .iter()
9110            .map(|e| e.line_count() as u64)
9111            .sum();
9112        assert_eq!(result.statistics.total_line_items, calculated_line_items);
9113    }
9114
9115    #[test]
9116    fn test_audit_generation() {
9117        let config = create_test_config();
9118        let phase_config = PhaseConfig {
9119            generate_master_data: false,
9120            generate_document_flows: false,
9121            generate_journal_entries: true,
9122            inject_anomalies: false,
9123            show_progress: false,
9124            generate_audit: true,
9125            audit_engagements: 2,
9126            workpapers_per_engagement: 5,
9127            evidence_per_workpaper: 2,
9128            risks_per_engagement: 3,
9129            findings_per_engagement: 2,
9130            judgments_per_engagement: 2,
9131            ..Default::default()
9132        };
9133
9134        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9135        let result = orchestrator.generate().unwrap();
9136
9137        // Should have generated audit data
9138        assert_eq!(result.audit.engagements.len(), 2);
9139        assert!(!result.audit.workpapers.is_empty());
9140        assert!(!result.audit.evidence.is_empty());
9141        assert!(!result.audit.risk_assessments.is_empty());
9142        assert!(!result.audit.findings.is_empty());
9143        assert!(!result.audit.judgments.is_empty());
9144
9145        // Statistics should match
9146        assert_eq!(
9147            result.statistics.audit_engagement_count,
9148            result.audit.engagements.len()
9149        );
9150        assert_eq!(
9151            result.statistics.audit_workpaper_count,
9152            result.audit.workpapers.len()
9153        );
9154        assert_eq!(
9155            result.statistics.audit_evidence_count,
9156            result.audit.evidence.len()
9157        );
9158        assert_eq!(
9159            result.statistics.audit_risk_count,
9160            result.audit.risk_assessments.len()
9161        );
9162        assert_eq!(
9163            result.statistics.audit_finding_count,
9164            result.audit.findings.len()
9165        );
9166        assert_eq!(
9167            result.statistics.audit_judgment_count,
9168            result.audit.judgments.len()
9169        );
9170    }
9171
9172    #[test]
9173    fn test_new_phases_disabled_by_default() {
9174        let config = create_test_config();
9175        // Verify new config fields default to disabled
9176        assert!(!config.llm.enabled);
9177        assert!(!config.diffusion.enabled);
9178        assert!(!config.causal.enabled);
9179
9180        let phase_config = PhaseConfig {
9181            generate_master_data: false,
9182            generate_document_flows: false,
9183            generate_journal_entries: true,
9184            inject_anomalies: false,
9185            show_progress: false,
9186            ..Default::default()
9187        };
9188
9189        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9190        let result = orchestrator.generate().unwrap();
9191
9192        // All new phase statistics should be zero when disabled
9193        assert_eq!(result.statistics.llm_enrichment_ms, 0);
9194        assert_eq!(result.statistics.llm_vendors_enriched, 0);
9195        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
9196        assert_eq!(result.statistics.diffusion_samples_generated, 0);
9197        assert_eq!(result.statistics.causal_generation_ms, 0);
9198        assert_eq!(result.statistics.causal_samples_generated, 0);
9199        assert!(result.statistics.causal_validation_passed.is_none());
9200        assert_eq!(result.statistics.counterfactual_pair_count, 0);
9201        assert!(result.counterfactual_pairs.is_empty());
9202    }
9203
9204    #[test]
9205    fn test_counterfactual_generation_enabled() {
9206        let config = create_test_config();
9207        let phase_config = PhaseConfig {
9208            generate_master_data: false,
9209            generate_document_flows: false,
9210            generate_journal_entries: true,
9211            inject_anomalies: false,
9212            show_progress: false,
9213            generate_counterfactuals: true,
9214            ..Default::default()
9215        };
9216
9217        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9218        let result = orchestrator.generate().unwrap();
9219
9220        // With JE generation enabled, counterfactual pairs should be generated
9221        if !result.journal_entries.is_empty() {
9222            assert_eq!(
9223                result.counterfactual_pairs.len(),
9224                result.journal_entries.len()
9225            );
9226            assert_eq!(
9227                result.statistics.counterfactual_pair_count,
9228                result.journal_entries.len()
9229            );
9230            // Each pair should have a distinct pair_id
9231            let ids: std::collections::HashSet<_> = result
9232                .counterfactual_pairs
9233                .iter()
9234                .map(|p| p.pair_id.clone())
9235                .collect();
9236            assert_eq!(ids.len(), result.counterfactual_pairs.len());
9237        }
9238    }
9239
9240    #[test]
9241    fn test_llm_enrichment_enabled() {
9242        let mut config = create_test_config();
9243        config.llm.enabled = true;
9244        config.llm.max_vendor_enrichments = 3;
9245
9246        let phase_config = PhaseConfig {
9247            generate_master_data: true,
9248            generate_document_flows: false,
9249            generate_journal_entries: false,
9250            inject_anomalies: false,
9251            show_progress: false,
9252            vendors_per_company: 5,
9253            customers_per_company: 3,
9254            materials_per_company: 3,
9255            assets_per_company: 3,
9256            employees_per_company: 3,
9257            ..Default::default()
9258        };
9259
9260        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9261        let result = orchestrator.generate().unwrap();
9262
9263        // LLM enrichment should have run
9264        assert!(result.statistics.llm_vendors_enriched > 0);
9265        assert!(result.statistics.llm_vendors_enriched <= 3);
9266    }
9267
9268    #[test]
9269    fn test_diffusion_enhancement_enabled() {
9270        let mut config = create_test_config();
9271        config.diffusion.enabled = true;
9272        config.diffusion.n_steps = 50;
9273        config.diffusion.sample_size = 20;
9274
9275        let phase_config = PhaseConfig {
9276            generate_master_data: false,
9277            generate_document_flows: false,
9278            generate_journal_entries: true,
9279            inject_anomalies: false,
9280            show_progress: false,
9281            ..Default::default()
9282        };
9283
9284        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9285        let result = orchestrator.generate().unwrap();
9286
9287        // Diffusion phase should have generated samples
9288        assert_eq!(result.statistics.diffusion_samples_generated, 20);
9289    }
9290
9291    #[test]
9292    fn test_causal_overlay_enabled() {
9293        let mut config = create_test_config();
9294        config.causal.enabled = true;
9295        config.causal.template = "fraud_detection".to_string();
9296        config.causal.sample_size = 100;
9297        config.causal.validate = true;
9298
9299        let phase_config = PhaseConfig {
9300            generate_master_data: false,
9301            generate_document_flows: false,
9302            generate_journal_entries: true,
9303            inject_anomalies: false,
9304            show_progress: false,
9305            ..Default::default()
9306        };
9307
9308        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9309        let result = orchestrator.generate().unwrap();
9310
9311        // Causal phase should have generated samples
9312        assert_eq!(result.statistics.causal_samples_generated, 100);
9313        // Validation should have run
9314        assert!(result.statistics.causal_validation_passed.is_some());
9315    }
9316
9317    #[test]
9318    fn test_causal_overlay_revenue_cycle_template() {
9319        let mut config = create_test_config();
9320        config.causal.enabled = true;
9321        config.causal.template = "revenue_cycle".to_string();
9322        config.causal.sample_size = 50;
9323        config.causal.validate = false;
9324
9325        let phase_config = PhaseConfig {
9326            generate_master_data: false,
9327            generate_document_flows: false,
9328            generate_journal_entries: true,
9329            inject_anomalies: false,
9330            show_progress: false,
9331            ..Default::default()
9332        };
9333
9334        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9335        let result = orchestrator.generate().unwrap();
9336
9337        // Causal phase should have generated samples
9338        assert_eq!(result.statistics.causal_samples_generated, 50);
9339        // Validation was disabled
9340        assert!(result.statistics.causal_validation_passed.is_none());
9341    }
9342
9343    #[test]
9344    fn test_all_new_phases_enabled_together() {
9345        let mut config = create_test_config();
9346        config.llm.enabled = true;
9347        config.llm.max_vendor_enrichments = 2;
9348        config.diffusion.enabled = true;
9349        config.diffusion.n_steps = 20;
9350        config.diffusion.sample_size = 10;
9351        config.causal.enabled = true;
9352        config.causal.sample_size = 50;
9353        config.causal.validate = true;
9354
9355        let phase_config = PhaseConfig {
9356            generate_master_data: true,
9357            generate_document_flows: false,
9358            generate_journal_entries: true,
9359            inject_anomalies: false,
9360            show_progress: false,
9361            vendors_per_company: 5,
9362            customers_per_company: 3,
9363            materials_per_company: 3,
9364            assets_per_company: 3,
9365            employees_per_company: 3,
9366            ..Default::default()
9367        };
9368
9369        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
9370        let result = orchestrator.generate().unwrap();
9371
9372        // All three phases should have run
9373        assert!(result.statistics.llm_vendors_enriched > 0);
9374        assert_eq!(result.statistics.diffusion_samples_generated, 10);
9375        assert_eq!(result.statistics.causal_samples_generated, 50);
9376        assert!(result.statistics.causal_validation_passed.is_some());
9377    }
9378
9379    #[test]
9380    fn test_statistics_serialization_with_new_fields() {
9381        let stats = EnhancedGenerationStatistics {
9382            total_entries: 100,
9383            total_line_items: 500,
9384            llm_enrichment_ms: 42,
9385            llm_vendors_enriched: 10,
9386            diffusion_enhancement_ms: 100,
9387            diffusion_samples_generated: 50,
9388            causal_generation_ms: 200,
9389            causal_samples_generated: 100,
9390            causal_validation_passed: Some(true),
9391            ..Default::default()
9392        };
9393
9394        let json = serde_json::to_string(&stats).unwrap();
9395        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
9396
9397        assert_eq!(deserialized.llm_enrichment_ms, 42);
9398        assert_eq!(deserialized.llm_vendors_enriched, 10);
9399        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
9400        assert_eq!(deserialized.diffusion_samples_generated, 50);
9401        assert_eq!(deserialized.causal_generation_ms, 200);
9402        assert_eq!(deserialized.causal_samples_generated, 100);
9403        assert_eq!(deserialized.causal_validation_passed, Some(true));
9404    }
9405
9406    #[test]
9407    fn test_statistics_backward_compat_deserialization() {
9408        // Old JSON without the new fields should still deserialize
9409        let old_json = r#"{
9410            "total_entries": 100,
9411            "total_line_items": 500,
9412            "accounts_count": 50,
9413            "companies_count": 1,
9414            "period_months": 12,
9415            "vendor_count": 10,
9416            "customer_count": 20,
9417            "material_count": 15,
9418            "asset_count": 5,
9419            "employee_count": 8,
9420            "p2p_chain_count": 5,
9421            "o2c_chain_count": 5,
9422            "ap_invoice_count": 5,
9423            "ar_invoice_count": 5,
9424            "ocpm_event_count": 0,
9425            "ocpm_object_count": 0,
9426            "ocpm_case_count": 0,
9427            "audit_engagement_count": 0,
9428            "audit_workpaper_count": 0,
9429            "audit_evidence_count": 0,
9430            "audit_risk_count": 0,
9431            "audit_finding_count": 0,
9432            "audit_judgment_count": 0,
9433            "anomalies_injected": 0,
9434            "data_quality_issues": 0,
9435            "banking_customer_count": 0,
9436            "banking_account_count": 0,
9437            "banking_transaction_count": 0,
9438            "banking_suspicious_count": 0,
9439            "graph_export_count": 0,
9440            "graph_node_count": 0,
9441            "graph_edge_count": 0
9442        }"#;
9443
9444        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
9445
9446        // New fields should default to 0 / None
9447        assert_eq!(stats.llm_enrichment_ms, 0);
9448        assert_eq!(stats.llm_vendors_enriched, 0);
9449        assert_eq!(stats.diffusion_enhancement_ms, 0);
9450        assert_eq!(stats.diffusion_samples_generated, 0);
9451        assert_eq!(stats.causal_generation_ms, 0);
9452        assert_eq!(stats.causal_samples_generated, 0);
9453        assert!(stats.causal_validation_passed.is_none());
9454    }
9455}