Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1264    /// `None` when the neural/hybrid backend is not active.
1265    #[serde(default, skip_serializing_if = "Option::is_none")]
1266    pub neural_hybrid_weight: Option<f64>,
1267    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1268    #[serde(default, skip_serializing_if = "Option::is_none")]
1269    pub neural_hybrid_strategy: Option<String>,
1270    /// How many columns were routed through the neural backend.
1271    #[serde(default, skip_serializing_if = "Option::is_none")]
1272    pub neural_routed_column_count: Option<usize>,
1273    /// Causal generation timing (milliseconds).
1274    #[serde(default)]
1275    pub causal_generation_ms: u64,
1276    /// Number of causal samples generated.
1277    #[serde(default)]
1278    pub causal_samples_generated: usize,
1279    /// Whether causal validation passed.
1280    #[serde(default)]
1281    pub causal_validation_passed: Option<bool>,
1282    /// S2C sourcing counts.
1283    #[serde(default)]
1284    pub sourcing_project_count: usize,
1285    #[serde(default)]
1286    pub rfx_event_count: usize,
1287    #[serde(default)]
1288    pub bid_count: usize,
1289    #[serde(default)]
1290    pub contract_count: usize,
1291    #[serde(default)]
1292    pub catalog_item_count: usize,
1293    #[serde(default)]
1294    pub scorecard_count: usize,
1295    /// Financial reporting counts.
1296    #[serde(default)]
1297    pub financial_statement_count: usize,
1298    #[serde(default)]
1299    pub bank_reconciliation_count: usize,
1300    /// HR counts.
1301    #[serde(default)]
1302    pub payroll_run_count: usize,
1303    #[serde(default)]
1304    pub time_entry_count: usize,
1305    #[serde(default)]
1306    pub expense_report_count: usize,
1307    #[serde(default)]
1308    pub benefit_enrollment_count: usize,
1309    #[serde(default)]
1310    pub pension_plan_count: usize,
1311    #[serde(default)]
1312    pub stock_grant_count: usize,
1313    /// Accounting standards counts.
1314    #[serde(default)]
1315    pub revenue_contract_count: usize,
1316    #[serde(default)]
1317    pub impairment_test_count: usize,
1318    #[serde(default)]
1319    pub business_combination_count: usize,
1320    #[serde(default)]
1321    pub ecl_model_count: usize,
1322    #[serde(default)]
1323    pub provision_count: usize,
1324    /// Manufacturing counts.
1325    #[serde(default)]
1326    pub production_order_count: usize,
1327    #[serde(default)]
1328    pub quality_inspection_count: usize,
1329    #[serde(default)]
1330    pub cycle_count_count: usize,
1331    #[serde(default)]
1332    pub bom_component_count: usize,
1333    #[serde(default)]
1334    pub inventory_movement_count: usize,
1335    /// Sales & reporting counts.
1336    #[serde(default)]
1337    pub sales_quote_count: usize,
1338    #[serde(default)]
1339    pub kpi_count: usize,
1340    #[serde(default)]
1341    pub budget_line_count: usize,
1342    /// Tax counts.
1343    #[serde(default)]
1344    pub tax_jurisdiction_count: usize,
1345    #[serde(default)]
1346    pub tax_code_count: usize,
1347    /// ESG counts.
1348    #[serde(default)]
1349    pub esg_emission_count: usize,
1350    #[serde(default)]
1351    pub esg_disclosure_count: usize,
1352    /// Intercompany counts.
1353    #[serde(default)]
1354    pub ic_matched_pair_count: usize,
1355    #[serde(default)]
1356    pub ic_elimination_count: usize,
1357    /// Number of intercompany journal entries (seller + buyer side).
1358    #[serde(default)]
1359    pub ic_transaction_count: usize,
1360    /// Number of fixed asset subledger records.
1361    #[serde(default)]
1362    pub fa_subledger_count: usize,
1363    /// Number of inventory subledger records.
1364    #[serde(default)]
1365    pub inventory_subledger_count: usize,
1366    /// Treasury debt instrument count.
1367    #[serde(default)]
1368    pub treasury_debt_instrument_count: usize,
1369    /// Treasury hedging instrument count.
1370    #[serde(default)]
1371    pub treasury_hedging_instrument_count: usize,
1372    /// Project accounting project count.
1373    #[serde(default)]
1374    pub project_count: usize,
1375    /// Project accounting change order count.
1376    #[serde(default)]
1377    pub project_change_order_count: usize,
1378    /// Tax provision count.
1379    #[serde(default)]
1380    pub tax_provision_count: usize,
1381    /// Opening balance count.
1382    #[serde(default)]
1383    pub opening_balance_count: usize,
1384    /// Subledger reconciliation count.
1385    #[serde(default)]
1386    pub subledger_reconciliation_count: usize,
1387    /// Tax line count.
1388    #[serde(default)]
1389    pub tax_line_count: usize,
1390    /// Project cost line count.
1391    #[serde(default)]
1392    pub project_cost_line_count: usize,
1393    /// Cash position count.
1394    #[serde(default)]
1395    pub cash_position_count: usize,
1396    /// Cash forecast count.
1397    #[serde(default)]
1398    pub cash_forecast_count: usize,
1399    /// Cash pool count.
1400    #[serde(default)]
1401    pub cash_pool_count: usize,
1402    /// Process evolution event count.
1403    #[serde(default)]
1404    pub process_evolution_event_count: usize,
1405    /// Organizational event count.
1406    #[serde(default)]
1407    pub organizational_event_count: usize,
1408    /// Counterfactual pair count.
1409    #[serde(default)]
1410    pub counterfactual_pair_count: usize,
1411    /// Number of fraud red-flag indicators generated.
1412    #[serde(default)]
1413    pub red_flag_count: usize,
1414    /// Number of collusion rings generated.
1415    #[serde(default)]
1416    pub collusion_ring_count: usize,
1417    /// Number of bi-temporal vendor version chains generated.
1418    #[serde(default)]
1419    pub temporal_version_chain_count: usize,
1420    /// Number of nodes in the entity relationship graph.
1421    #[serde(default)]
1422    pub entity_relationship_node_count: usize,
1423    /// Number of edges in the entity relationship graph.
1424    #[serde(default)]
1425    pub entity_relationship_edge_count: usize,
1426    /// Number of cross-process links generated.
1427    #[serde(default)]
1428    pub cross_process_link_count: usize,
1429    /// Number of disruption events generated.
1430    #[serde(default)]
1431    pub disruption_event_count: usize,
1432    /// Number of industry-specific GL accounts generated.
1433    #[serde(default)]
1434    pub industry_gl_account_count: usize,
1435    /// Number of period-close journal entries generated (tax provision + closing entries).
1436    #[serde(default)]
1437    pub period_close_je_count: usize,
1438}
1439
1440/// Enhanced orchestrator with full feature integration.
1441pub struct EnhancedOrchestrator {
1442    config: GeneratorConfig,
1443    phase_config: PhaseConfig,
1444    coa: Option<Arc<ChartOfAccounts>>,
1445    master_data: MasterDataSnapshot,
1446    seed: u64,
1447    multi_progress: Option<MultiProgress>,
1448    /// Resource guard for memory, disk, and CPU monitoring
1449    resource_guard: ResourceGuard,
1450    /// Output path for disk space monitoring
1451    output_path: Option<PathBuf>,
1452    /// Copula generators for preserving correlations (from fingerprint)
1453    copula_generators: Vec<CopulaGeneratorSpec>,
1454    /// Country pack registry for localized data generation
1455    country_pack_registry: datasynth_core::CountryPackRegistry,
1456    /// Optional streaming sink for phase-by-phase output
1457    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1458}
1459
1460impl EnhancedOrchestrator {
1461    /// Create a new enhanced orchestrator.
1462    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1463        datasynth_config::validate_config(&config)?;
1464
1465        let seed = config.global.seed.unwrap_or_else(rand::random);
1466
1467        // Build resource guard from config
1468        let resource_guard = Self::build_resource_guard(&config, None);
1469
1470        // Build country pack registry from config
1471        let country_pack_registry = match &config.country_packs {
1472            Some(cp) => {
1473                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1474                    .map_err(|e| SynthError::config(e.to_string()))?
1475            }
1476            None => datasynth_core::CountryPackRegistry::builtin_only()
1477                .map_err(|e| SynthError::config(e.to_string()))?,
1478        };
1479
1480        Ok(Self {
1481            config,
1482            phase_config,
1483            coa: None,
1484            master_data: MasterDataSnapshot::default(),
1485            seed,
1486            multi_progress: None,
1487            resource_guard,
1488            output_path: None,
1489            copula_generators: Vec::new(),
1490            country_pack_registry,
1491            phase_sink: None,
1492        })
1493    }
1494
1495    /// Create with default phase config.
1496    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1497        Self::new(config, PhaseConfig::default())
1498    }
1499
1500    /// Set a streaming phase sink for real-time output (builder pattern).
1501    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1502        self.phase_sink = Some(sink);
1503        self
1504    }
1505
1506    /// Set a streaming phase sink on an existing orchestrator.
1507    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1508        self.phase_sink = Some(sink);
1509    }
1510
1511    /// Emit a batch of items to the phase sink (if configured).
1512    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1513        if let Some(ref sink) = self.phase_sink {
1514            for item in items {
1515                if let Ok(value) = serde_json::to_value(item) {
1516                    if let Err(e) = sink.emit(phase, type_name, &value) {
1517                        warn!(
1518                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1519                        );
1520                    }
1521                }
1522            }
1523            if let Err(e) = sink.phase_complete(phase) {
1524                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1525            }
1526        }
1527    }
1528
1529    /// Enable/disable progress bars.
1530    pub fn with_progress(mut self, show: bool) -> Self {
1531        self.phase_config.show_progress = show;
1532        if show {
1533            self.multi_progress = Some(MultiProgress::new());
1534        }
1535        self
1536    }
1537
1538    /// Set the output path for disk space monitoring.
1539    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1540        let path = path.into();
1541        self.output_path = Some(path.clone());
1542        // Rebuild resource guard with the output path
1543        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1544        self
1545    }
1546
1547    /// Access the country pack registry.
1548    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1549        &self.country_pack_registry
1550    }
1551
1552    /// Look up a country pack by country code string.
1553    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1554        self.country_pack_registry.get_by_str(country)
1555    }
1556
1557    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1558    /// company, defaulting to `"US"` if no companies are configured.
1559    fn primary_country_code(&self) -> &str {
1560        self.config
1561            .companies
1562            .first()
1563            .map(|c| c.country.as_str())
1564            .unwrap_or("US")
1565    }
1566
1567    /// Resolve the country pack for the primary (first) company.
1568    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1569        self.country_pack_for(self.primary_country_code())
1570    }
1571
1572    /// Resolve the CoA framework from config/country-pack.
1573    fn resolve_coa_framework(&self) -> CoAFramework {
1574        if self.config.accounting_standards.enabled {
1575            match self.config.accounting_standards.framework {
1576                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1577                    return CoAFramework::FrenchPcg;
1578                }
1579                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1580                    return CoAFramework::GermanSkr04;
1581                }
1582                _ => {}
1583            }
1584        }
1585        // Fallback: derive from country pack
1586        let pack = self.primary_pack();
1587        match pack.accounting.framework.as_str() {
1588            "french_gaap" => CoAFramework::FrenchPcg,
1589            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1590            _ => CoAFramework::UsGaap,
1591        }
1592    }
1593
1594    /// Check if copula generators are available.
1595    ///
1596    /// Returns true if the orchestrator has copula generators for preserving
1597    /// correlations (typically from fingerprint-based generation).
1598    pub fn has_copulas(&self) -> bool {
1599        !self.copula_generators.is_empty()
1600    }
1601
1602    /// Get the copula generators.
1603    ///
1604    /// Returns a reference to the copula generators for use during generation.
1605    /// These can be used to generate correlated samples that preserve the
1606    /// statistical relationships from the source data.
1607    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1608        &self.copula_generators
1609    }
1610
1611    /// Get a mutable reference to the copula generators.
1612    ///
1613    /// Allows generators to sample from copulas during data generation.
1614    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1615        &mut self.copula_generators
1616    }
1617
1618    /// Sample correlated values from a named copula.
1619    ///
1620    /// Returns None if the copula doesn't exist.
1621    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1622        self.copula_generators
1623            .iter_mut()
1624            .find(|c| c.name == copula_name)
1625            .map(|c| c.generator.sample())
1626    }
1627
1628    /// Create an orchestrator from a fingerprint file.
1629    ///
1630    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1631    /// and creates an orchestrator configured to generate data matching
1632    /// the statistical properties of the original data.
1633    ///
1634    /// # Arguments
1635    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1636    /// * `phase_config` - Phase configuration for generation
1637    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1638    ///
1639    /// # Example
1640    /// ```no_run
1641    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1642    /// use std::path::Path;
1643    ///
1644    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1645    ///     Path::new("fingerprint.dsf"),
1646    ///     PhaseConfig::default(),
1647    ///     1.0,
1648    /// ).unwrap();
1649    /// ```
1650    pub fn from_fingerprint(
1651        fingerprint_path: &std::path::Path,
1652        phase_config: PhaseConfig,
1653        scale: f64,
1654    ) -> SynthResult<Self> {
1655        info!("Loading fingerprint from: {}", fingerprint_path.display());
1656
1657        // Read the fingerprint
1658        let reader = FingerprintReader::new();
1659        let fingerprint = reader
1660            .read_from_file(fingerprint_path)
1661            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1662
1663        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1664    }
1665
1666    /// Create an orchestrator from a loaded fingerprint.
1667    ///
1668    /// # Arguments
1669    /// * `fingerprint` - The loaded fingerprint
1670    /// * `phase_config` - Phase configuration for generation
1671    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1672    pub fn from_fingerprint_data(
1673        fingerprint: Fingerprint,
1674        phase_config: PhaseConfig,
1675        scale: f64,
1676    ) -> SynthResult<Self> {
1677        info!(
1678            "Synthesizing config from fingerprint (version: {}, tables: {})",
1679            fingerprint.manifest.version,
1680            fingerprint.schema.tables.len()
1681        );
1682
1683        // Generate a seed for the synthesis
1684        let seed: u64 = rand::random();
1685        info!("Fingerprint synthesis seed: {}", seed);
1686
1687        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1688        let options = SynthesisOptions {
1689            scale,
1690            seed: Some(seed),
1691            preserve_correlations: true,
1692            inject_anomalies: true,
1693        };
1694        let synthesizer = ConfigSynthesizer::with_options(options);
1695
1696        // Synthesize full result including copula generators
1697        let synthesis_result = synthesizer
1698            .synthesize_full(&fingerprint, seed)
1699            .map_err(|e| {
1700                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1701            })?;
1702
1703        // Start with a base config from the fingerprint's industry if available
1704        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1705            Self::base_config_for_industry(industry)
1706        } else {
1707            Self::base_config_for_industry("manufacturing")
1708        };
1709
1710        // Apply the synthesized patches
1711        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1712
1713        // Log synthesis results
1714        info!(
1715            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1716            fingerprint.schema.tables.len(),
1717            scale,
1718            synthesis_result.copula_generators.len()
1719        );
1720
1721        if !synthesis_result.copula_generators.is_empty() {
1722            for spec in &synthesis_result.copula_generators {
1723                info!(
1724                    "  Copula '{}' for table '{}': {} columns",
1725                    spec.name,
1726                    spec.table,
1727                    spec.columns.len()
1728                );
1729            }
1730        }
1731
1732        // Create the orchestrator with the synthesized config
1733        let mut orchestrator = Self::new(config, phase_config)?;
1734
1735        // Store copula generators for use during generation
1736        orchestrator.copula_generators = synthesis_result.copula_generators;
1737
1738        Ok(orchestrator)
1739    }
1740
1741    /// Create a base config for a given industry.
1742    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1743        use datasynth_config::presets::create_preset;
1744        use datasynth_config::TransactionVolume;
1745        use datasynth_core::models::{CoAComplexity, IndustrySector};
1746
1747        let sector = match industry.to_lowercase().as_str() {
1748            "manufacturing" => IndustrySector::Manufacturing,
1749            "retail" => IndustrySector::Retail,
1750            "financial" | "financial_services" => IndustrySector::FinancialServices,
1751            "healthcare" => IndustrySector::Healthcare,
1752            "technology" | "tech" => IndustrySector::Technology,
1753            _ => IndustrySector::Manufacturing,
1754        };
1755
1756        // Create a preset with reasonable defaults
1757        create_preset(
1758            sector,
1759            1,  // company count
1760            12, // period months
1761            CoAComplexity::Medium,
1762            TransactionVolume::TenK,
1763        )
1764    }
1765
1766    /// Apply a config patch to a GeneratorConfig.
1767    fn apply_config_patch(
1768        mut config: GeneratorConfig,
1769        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1770    ) -> GeneratorConfig {
1771        use datasynth_fingerprint::synthesis::ConfigValue;
1772
1773        for (key, value) in patch.values() {
1774            match (key.as_str(), value) {
1775                // Transaction count is handled via TransactionVolume enum on companies
1776                // Log it but cannot directly set it (would need to modify company volumes)
1777                ("transactions.count", ConfigValue::Integer(n)) => {
1778                    info!(
1779                        "Fingerprint suggests {} transactions (apply via company volumes)",
1780                        n
1781                    );
1782                }
1783                ("global.period_months", ConfigValue::Integer(n)) => {
1784                    config.global.period_months = (*n).clamp(1, 120) as u32;
1785                }
1786                ("global.start_date", ConfigValue::String(s)) => {
1787                    config.global.start_date = s.clone();
1788                }
1789                ("global.seed", ConfigValue::Integer(n)) => {
1790                    config.global.seed = Some(*n as u64);
1791                }
1792                ("fraud.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1799                    config.data_quality.enabled = *b;
1800                }
1801                // Handle anomaly injection paths (mapped to fraud config)
1802                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1803                    config.fraud.enabled = *b;
1804                }
1805                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1806                    config.fraud.fraud_rate = *f;
1807                }
1808                _ => {
1809                    debug!("Ignoring unknown config patch key: {}", key);
1810                }
1811            }
1812        }
1813
1814        config
1815    }
1816
1817    /// Build a resource guard from the configuration.
1818    fn build_resource_guard(
1819        config: &GeneratorConfig,
1820        output_path: Option<PathBuf>,
1821    ) -> ResourceGuard {
1822        let mut builder = ResourceGuardBuilder::new();
1823
1824        // Configure memory limit if set
1825        if config.global.memory_limit_mb > 0 {
1826            builder = builder.memory_limit(config.global.memory_limit_mb);
1827        }
1828
1829        // Configure disk monitoring for output path
1830        if let Some(path) = output_path {
1831            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1832        }
1833
1834        // Use conservative degradation settings for production safety
1835        builder = builder.conservative();
1836
1837        builder.build()
1838    }
1839
1840    /// Check resources (memory, disk, CPU) and return degradation level.
1841    ///
1842    /// Returns an error if hard limits are exceeded.
1843    /// Returns Ok(DegradationLevel) indicating current resource state.
1844    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1845        self.resource_guard.check()
1846    }
1847
1848    /// Check resources with logging.
1849    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1850        let level = self.resource_guard.check()?;
1851
1852        if level != DegradationLevel::Normal {
1853            warn!(
1854                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1855                phase,
1856                level,
1857                self.resource_guard.current_memory_mb(),
1858                self.resource_guard.available_disk_mb()
1859            );
1860        }
1861
1862        Ok(level)
1863    }
1864
1865    /// Get current degradation actions based on resource state.
1866    fn get_degradation_actions(&self) -> DegradationActions {
1867        self.resource_guard.get_actions()
1868    }
1869
1870    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1871    fn check_memory_limit(&self) -> SynthResult<()> {
1872        self.check_resources()?;
1873        Ok(())
1874    }
1875
1876    /// Run the complete generation workflow.
1877    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1878        info!("Starting enhanced generation workflow");
1879        info!(
1880            "Config: industry={:?}, period_months={}, companies={}",
1881            self.config.global.industry,
1882            self.config.global.period_months,
1883            self.config.companies.len()
1884        );
1885
1886        // Set decimal serialization mode (thread-local, affects JSON output).
1887        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1888        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1889        datasynth_core::serde_decimal::set_numeric_native(is_native);
1890        struct NumericModeGuard;
1891        impl Drop for NumericModeGuard {
1892            fn drop(&mut self) {
1893                datasynth_core::serde_decimal::set_numeric_native(false);
1894            }
1895        }
1896        let _numeric_guard = if is_native {
1897            Some(NumericModeGuard)
1898        } else {
1899            None
1900        };
1901
1902        // Initial resource check before starting
1903        let initial_level = self.check_resources_with_log("initial")?;
1904        if initial_level == DegradationLevel::Emergency {
1905            return Err(SynthError::resource(
1906                "Insufficient resources to start generation",
1907            ));
1908        }
1909
1910        let mut stats = EnhancedGenerationStatistics {
1911            companies_count: self.config.companies.len(),
1912            period_months: self.config.global.period_months,
1913            ..Default::default()
1914        };
1915
1916        // Phase 1: Chart of Accounts
1917        let coa = self.phase_chart_of_accounts(&mut stats)?;
1918
1919        // Phase 2: Master Data
1920        self.phase_master_data(&mut stats)?;
1921
1922        // Emit master data to stream sink
1923        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1924        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1925        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1926
1927        // Phase 3: Document Flows + Subledger Linking
1928        let (mut document_flows, mut subledger, fa_journal_entries) =
1929            self.phase_document_flows(&mut stats)?;
1930
1931        // Emit document flows to stream sink
1932        self.emit_phase_items(
1933            "document_flows",
1934            "PurchaseOrder",
1935            &document_flows.purchase_orders,
1936        );
1937        self.emit_phase_items(
1938            "document_flows",
1939            "GoodsReceipt",
1940            &document_flows.goods_receipts,
1941        );
1942        self.emit_phase_items(
1943            "document_flows",
1944            "VendorInvoice",
1945            &document_flows.vendor_invoices,
1946        );
1947        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1948        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1949
1950        // Phase 3b: Opening Balances (before JE generation)
1951        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1952
1953        // Phase 3c: Convert opening balances to journal entries and prepend them.
1954        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1955        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1956        // balance map type.
1957        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1958            .iter()
1959            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1960            .collect();
1961        if !opening_balance_jes.is_empty() {
1962            debug!(
1963                "Prepending {} opening balance JEs to entries",
1964                opening_balance_jes.len()
1965            );
1966        }
1967
1968        // Phase 4: Journal Entries
1969        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1970
1971        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1972        // starts from the correct initial state.
1973        if !opening_balance_jes.is_empty() {
1974            let mut combined = opening_balance_jes;
1975            combined.extend(entries);
1976            entries = combined;
1977        }
1978
1979        // Phase 4c: Append FA acquisition journal entries to main entries
1980        if !fa_journal_entries.is_empty() {
1981            debug!(
1982                "Appending {} FA acquisition JEs to main entries",
1983                fa_journal_entries.len()
1984            );
1985            entries.extend(fa_journal_entries);
1986        }
1987
1988        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1989        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1990
1991        // Get current degradation actions for optional phases
1992        let actions = self.get_degradation_actions();
1993
1994        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1995        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1996
1997        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1998        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1999        if !sourcing.contracts.is_empty() {
2000            let mut linked_count = 0usize;
2001            // Collect (vendor_id, po_id) pairs from P2P chains
2002            let po_vendor_pairs: Vec<(String, String)> = document_flows
2003                .p2p_chains
2004                .iter()
2005                .map(|chain| {
2006                    (
2007                        chain.purchase_order.vendor_id.clone(),
2008                        chain.purchase_order.header.document_id.clone(),
2009                    )
2010                })
2011                .collect();
2012
2013            for chain in &mut document_flows.p2p_chains {
2014                if chain.purchase_order.contract_id.is_none() {
2015                    if let Some(contract) = sourcing
2016                        .contracts
2017                        .iter()
2018                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2019                    {
2020                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2021                        linked_count += 1;
2022                    }
2023                }
2024            }
2025
2026            // Populate reverse FK: purchase_order_ids on each contract
2027            for contract in &mut sourcing.contracts {
2028                let po_ids: Vec<String> = po_vendor_pairs
2029                    .iter()
2030                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2031                    .map(|(_, po_id)| po_id.clone())
2032                    .collect();
2033                if !po_ids.is_empty() {
2034                    contract.purchase_order_ids = po_ids;
2035                }
2036            }
2037
2038            if linked_count > 0 {
2039                debug!(
2040                    "Linked {} purchase orders to S2C contracts by vendor match",
2041                    linked_count
2042                );
2043            }
2044        }
2045
2046        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2047        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2048
2049        // Phase 5c: Append IC journal entries to main entries
2050        if !intercompany.seller_journal_entries.is_empty()
2051            || !intercompany.buyer_journal_entries.is_empty()
2052        {
2053            let ic_je_count = intercompany.seller_journal_entries.len()
2054                + intercompany.buyer_journal_entries.len();
2055            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2056            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2057            debug!(
2058                "Appended {} IC journal entries to main entries",
2059                ic_je_count
2060            );
2061        }
2062
2063        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2064        if !intercompany.elimination_entries.is_empty() {
2065            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2066                &intercompany.elimination_entries,
2067            );
2068            if !elim_jes.is_empty() {
2069                debug!(
2070                    "Appended {} elimination journal entries to main entries",
2071                    elim_jes.len()
2072                );
2073                // IC elimination net-zero assertion (v2.5 hardening)
2074                let elim_debit: rust_decimal::Decimal =
2075                    elim_jes.iter().map(|je| je.total_debit()).sum();
2076                let elim_credit: rust_decimal::Decimal =
2077                    elim_jes.iter().map(|je| je.total_credit()).sum();
2078                let elim_diff = (elim_debit - elim_credit).abs();
2079                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2080                if elim_diff > tolerance {
2081                    return Err(datasynth_core::error::SynthError::generation(format!(
2082                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2083                        elim_debit, elim_credit, elim_diff, tolerance
2084                    )));
2085                }
2086                debug!(
2087                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2088                    elim_debit, elim_credit, elim_diff
2089                );
2090                entries.extend(elim_jes);
2091            }
2092        }
2093
2094        // Phase 5e: Wire IC source documents into document flow snapshot
2095        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2096            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2097                document_flows
2098                    .customer_invoices
2099                    .extend(ic_docs.seller_invoices.iter().cloned());
2100                document_flows
2101                    .purchase_orders
2102                    .extend(ic_docs.buyer_orders.iter().cloned());
2103                document_flows
2104                    .goods_receipts
2105                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2106                document_flows
2107                    .vendor_invoices
2108                    .extend(ic_docs.buyer_invoices.iter().cloned());
2109                debug!(
2110                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2111                    ic_docs.seller_invoices.len(),
2112                    ic_docs.buyer_orders.len(),
2113                    ic_docs.buyer_goods_receipts.len(),
2114                    ic_docs.buyer_invoices.len(),
2115                );
2116            }
2117        }
2118
2119        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2120        let hr = self.phase_hr_data(&mut stats)?;
2121
2122        // Phase 6b: Generate JEs from payroll runs
2123        if !hr.payroll_runs.is_empty() {
2124            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2125            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2126            entries.extend(payroll_jes);
2127        }
2128
2129        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2130        if !hr.pension_journal_entries.is_empty() {
2131            debug!(
2132                "Generated {} JEs from pension plans",
2133                hr.pension_journal_entries.len()
2134            );
2135            entries.extend(hr.pension_journal_entries.iter().cloned());
2136        }
2137
2138        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2139        if !hr.stock_comp_journal_entries.is_empty() {
2140            debug!(
2141                "Generated {} JEs from stock-based compensation",
2142                hr.stock_comp_journal_entries.len()
2143            );
2144            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2145        }
2146
2147        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2148        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2149
2150        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2151        if !manufacturing_snap.production_orders.is_empty() {
2152            let currency = self
2153                .config
2154                .companies
2155                .first()
2156                .map(|c| c.currency.as_str())
2157                .unwrap_or("USD");
2158            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2159                &manufacturing_snap.production_orders,
2160                &manufacturing_snap.quality_inspections,
2161                currency,
2162            );
2163            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2164            entries.extend(mfg_jes);
2165        }
2166
2167        // Phase 7a-warranty: Generate warranty provisions per company
2168        if !manufacturing_snap.quality_inspections.is_empty() {
2169            let framework = match self.config.accounting_standards.framework {
2170                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2171                _ => "US_GAAP",
2172            };
2173            for company in &self.config.companies {
2174                let company_orders: Vec<_> = manufacturing_snap
2175                    .production_orders
2176                    .iter()
2177                    .filter(|o| o.company_code == company.code)
2178                    .cloned()
2179                    .collect();
2180                let company_inspections: Vec<_> = manufacturing_snap
2181                    .quality_inspections
2182                    .iter()
2183                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2184                    .cloned()
2185                    .collect();
2186                if company_inspections.is_empty() {
2187                    continue;
2188                }
2189                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2190                let warranty_result = warranty_gen.generate(
2191                    &company.code,
2192                    &company_orders,
2193                    &company_inspections,
2194                    &company.currency,
2195                    framework,
2196                );
2197                if !warranty_result.journal_entries.is_empty() {
2198                    debug!(
2199                        "Generated {} warranty provision JEs for {}",
2200                        warranty_result.journal_entries.len(),
2201                        company.code
2202                    );
2203                    entries.extend(warranty_result.journal_entries);
2204                }
2205            }
2206        }
2207
2208        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2209        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2210        {
2211            let cogs_currency = self
2212                .config
2213                .companies
2214                .first()
2215                .map(|c| c.currency.as_str())
2216                .unwrap_or("USD");
2217            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2218                &document_flows.deliveries,
2219                &manufacturing_snap.production_orders,
2220                cogs_currency,
2221            );
2222            if !cogs_jes.is_empty() {
2223                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2224                entries.extend(cogs_jes);
2225            }
2226        }
2227
2228        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2229        //
2230        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2231        // subledger inventory positions.  Here we reconcile them so that position balances
2232        // reflect the actual stock movements within the generation period.
2233        if !manufacturing_snap.inventory_movements.is_empty()
2234            && !subledger.inventory_positions.is_empty()
2235        {
2236            use datasynth_core::models::MovementType as MfgMovementType;
2237            let mut receipt_count = 0usize;
2238            let mut issue_count = 0usize;
2239            for movement in &manufacturing_snap.inventory_movements {
2240                // Find a matching position by material code and company
2241                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2242                    p.material_id == movement.material_code
2243                        && p.company_code == movement.entity_code
2244                }) {
2245                    match movement.movement_type {
2246                        MfgMovementType::GoodsReceipt => {
2247                            // Increase stock and update weighted-average cost
2248                            pos.add_quantity(
2249                                movement.quantity,
2250                                movement.value,
2251                                movement.movement_date,
2252                            );
2253                            receipt_count += 1;
2254                        }
2255                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2256                            // Decrease stock (best-effort; silently skip if insufficient)
2257                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2258                            issue_count += 1;
2259                        }
2260                        _ => {}
2261                    }
2262                }
2263            }
2264            debug!(
2265                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2266                manufacturing_snap.inventory_movements.len(),
2267                receipt_count,
2268                issue_count,
2269            );
2270        }
2271
2272        // Update final entry/line-item stats after all JE-generating phases
2273        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2274        if !entries.is_empty() {
2275            stats.total_entries = entries.len() as u64;
2276            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2277            debug!(
2278                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2279                stats.total_entries, stats.total_line_items
2280            );
2281        }
2282
2283        // Phase 7b: Apply internal controls to journal entries
2284        if self.config.internal_controls.enabled && !entries.is_empty() {
2285            info!("Phase 7b: Applying internal controls to journal entries");
2286            let control_config = ControlGeneratorConfig {
2287                exception_rate: self.config.internal_controls.exception_rate,
2288                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2289                enable_sox_marking: true,
2290                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2291                    self.config.internal_controls.sox_materiality_threshold,
2292                )
2293                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2294                ..Default::default()
2295            };
2296            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2297            for entry in &mut entries {
2298                control_gen.apply_controls(entry, &coa);
2299            }
2300            let with_controls = entries
2301                .iter()
2302                .filter(|e| !e.header.control_ids.is_empty())
2303                .count();
2304            info!(
2305                "Applied controls to {} entries ({} with control IDs assigned)",
2306                entries.len(),
2307                with_controls
2308            );
2309        }
2310
2311        // Phase 7c: Extract SoD violations from annotated journal entries.
2312        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2313        // Here we materialise those flags into standalone SodViolation records.
2314        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2315            .iter()
2316            .filter(|e| e.header.sod_violation)
2317            .filter_map(|e| {
2318                e.header.sod_conflict_type.map(|ct| {
2319                    use datasynth_core::models::{RiskLevel, SodViolation};
2320                    let severity = match ct {
2321                        datasynth_core::models::SodConflictType::PaymentReleaser
2322                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2323                            RiskLevel::Critical
2324                        }
2325                        datasynth_core::models::SodConflictType::PreparerApprover
2326                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2327                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2328                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2329                            RiskLevel::High
2330                        }
2331                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2332                            RiskLevel::Medium
2333                        }
2334                    };
2335                    let action = format!(
2336                        "SoD conflict {:?} on entry {} ({})",
2337                        ct, e.header.document_id, e.header.company_code
2338                    );
2339                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2340                })
2341            })
2342            .collect();
2343        if !sod_violations.is_empty() {
2344            info!(
2345                "Phase 7c: Extracted {} SoD violations from {} entries",
2346                sod_violations.len(),
2347                entries.len()
2348            );
2349        }
2350
2351        // Emit journal entries to stream sink (after all JE-generating phases)
2352        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2353
2354        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2355        //
2356        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2357        // document-level fraud are exempt from subsequent line-level flag
2358        // overwrites, and so downstream consumers see a coherent picture.
2359        //
2360        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2361        {
2362            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2363            if self.config.fraud.enabled && doc_rate > 0.0 {
2364                use datasynth_core::fraud_propagation::{
2365                    inject_document_fraud, propagate_documents_to_entries,
2366                };
2367                use datasynth_core::utils::weighted_select;
2368                use datasynth_core::FraudType;
2369                use rand_chacha::rand_core::SeedableRng;
2370
2371                let dist = &self.config.fraud.fraud_type_distribution;
2372                let fraud_type_weights: [(FraudType, f64); 8] = [
2373                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2374                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2375                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2376                    (
2377                        FraudType::ImproperCapitalization,
2378                        dist.expense_capitalization,
2379                    ),
2380                    (FraudType::SplitTransaction, dist.split_transaction),
2381                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2382                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2383                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2384                ];
2385                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2386                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2387                    if weights_sum <= 0.0 {
2388                        FraudType::FictitiousEntry
2389                    } else {
2390                        *weighted_select(rng, &fraud_type_weights)
2391                    }
2392                };
2393
2394                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2395                let mut doc_tagged = 0usize;
2396                macro_rules! inject_into {
2397                    ($collection:expr) => {{
2398                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2399                            $collection.iter_mut().map(|d| &mut d.header).collect();
2400                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2401                    }};
2402                }
2403                inject_into!(document_flows.purchase_orders);
2404                inject_into!(document_flows.goods_receipts);
2405                inject_into!(document_flows.vendor_invoices);
2406                inject_into!(document_flows.payments);
2407                inject_into!(document_flows.sales_orders);
2408                inject_into!(document_flows.deliveries);
2409                inject_into!(document_flows.customer_invoices);
2410                if doc_tagged > 0 {
2411                    info!(
2412                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2413                    );
2414                }
2415
2416                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2417                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2418                        Vec::new();
2419                    headers.extend(
2420                        document_flows
2421                            .purchase_orders
2422                            .iter()
2423                            .map(|d| d.header.clone()),
2424                    );
2425                    headers.extend(
2426                        document_flows
2427                            .goods_receipts
2428                            .iter()
2429                            .map(|d| d.header.clone()),
2430                    );
2431                    headers.extend(
2432                        document_flows
2433                            .vendor_invoices
2434                            .iter()
2435                            .map(|d| d.header.clone()),
2436                    );
2437                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2438                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2439                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2440                    headers.extend(
2441                        document_flows
2442                            .customer_invoices
2443                            .iter()
2444                            .map(|d| d.header.clone()),
2445                    );
2446                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2447                    if propagated > 0 {
2448                        info!(
2449                            "Propagated document-level fraud to {propagated} derived journal entries"
2450                        );
2451                    }
2452                }
2453            }
2454        }
2455
2456        // Phase 8: Anomaly Injection (after all JE-generating phases)
2457        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2458
2459        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2460        // through the anomaly injector.
2461        //
2462        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2463        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2464        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2465        //   - Any external mutation that sets is_fraud after the fact
2466        //
2467        // The anomaly injector already applies the same bias inline when it
2468        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2469        // so gating this sweep on `!is_anomaly` avoids double-application.
2470        //
2471        // Without this sweep, fraud entries from these paths show 0 lift on
2472        // the canonical forensic signals (is_round_1000, is_off_hours,
2473        // is_weekend, is_post_close), which is exactly what the SDK-side
2474        // evaluator caught in v3.1 — fraud features had worse lift than
2475        // baseline. See DS-3.1 post-deploy feedback.
2476        {
2477            use datasynth_core::fraud_bias::{
2478                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2479            };
2480            use rand_chacha::rand_core::SeedableRng;
2481            let cfg = FraudBehavioralBiasConfig::default();
2482            if cfg.enabled {
2483                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2484                let mut swept = 0usize;
2485                for entry in entries.iter_mut() {
2486                    if entry.header.is_fraud && !entry.header.is_anomaly {
2487                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2488                        swept += 1;
2489                    }
2490                }
2491                if swept > 0 {
2492                    info!(
2493                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2494                         (doc-propagated + je_generator intrinsic fraud)"
2495                    );
2496                }
2497            }
2498        }
2499
2500        // Emit anomaly labels to stream sink
2501        self.emit_phase_items(
2502            "anomaly_injection",
2503            "LabeledAnomaly",
2504            &anomaly_labels.labels,
2505        );
2506
2507        // Propagate fraud labels from journal entries to source documents.
2508        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2509        // instead of tracing through document_references.json.
2510        //
2511        // Gated by `fraud.propagate_to_document` (default true) — disable when
2512        // downstream consumers want document fraud flags to reflect only
2513        // document-level injection, not line-level.
2514        if self.config.fraud.propagate_to_document {
2515            use std::collections::HashMap;
2516            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2517            //
2518            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2519            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2520            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2521            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2522            // we register BOTH the prefixed form (raw reference) AND the bare form
2523            // (post-colon portion) in the map. Also register the JE's document_id
2524            // UUID so documents that set `journal_entry_id` match via that path.
2525            //
2526            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2527            // looked up "foo", silently producing 0 propagations.
2528            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2529            for je in &entries {
2530                if je.header.is_fraud {
2531                    if let Some(ref fraud_type) = je.header.fraud_type {
2532                        if let Some(ref reference) = je.header.reference {
2533                            // Register the full reference ("GR:PO-2024-000001")
2534                            fraud_map.insert(reference.clone(), *fraud_type);
2535                            // Also register the bare document ID ("PO-2024-000001")
2536                            // by stripping the "PREFIX:" if present.
2537                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2538                                if !bare.is_empty() {
2539                                    fraud_map.insert(bare.to_string(), *fraud_type);
2540                                }
2541                            }
2542                        }
2543                        // Also tag via journal_entry_id on document headers
2544                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2545                    }
2546                }
2547            }
2548            if !fraud_map.is_empty() {
2549                let mut propagated = 0usize;
2550                // Use DocumentHeader::propagate_fraud method for each doc type
2551                macro_rules! propagate_to {
2552                    ($collection:expr) => {
2553                        for doc in &mut $collection {
2554                            if doc.header.propagate_fraud(&fraud_map) {
2555                                propagated += 1;
2556                            }
2557                        }
2558                    };
2559                }
2560                propagate_to!(document_flows.purchase_orders);
2561                propagate_to!(document_flows.goods_receipts);
2562                propagate_to!(document_flows.vendor_invoices);
2563                propagate_to!(document_flows.payments);
2564                propagate_to!(document_flows.sales_orders);
2565                propagate_to!(document_flows.deliveries);
2566                propagate_to!(document_flows.customer_invoices);
2567                if propagated > 0 {
2568                    info!(
2569                        "Propagated fraud labels to {} document flow records",
2570                        propagated
2571                    );
2572                }
2573            }
2574        }
2575
2576        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2577        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2578
2579        // Emit red flags to stream sink
2580        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2581
2582        // Phase 26b: Collusion Ring Generation (after red flags)
2583        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2584
2585        // Emit collusion rings to stream sink
2586        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2587
2588        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2589        let balance_validation = self.phase_balance_validation(&entries)?;
2590
2591        // Phase 9b: GL-to-Subledger Reconciliation
2592        let subledger_reconciliation =
2593            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2594
2595        // Phase 10: Data Quality Injection
2596        let (data_quality_stats, quality_issues) =
2597            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2598
2599        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2600        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2601
2602        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2603        {
2604            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2605
2606            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2607            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2608            let mut unbalanced_clean = 0usize;
2609            for je in &entries {
2610                if je.header.is_fraud || je.header.is_anomaly {
2611                    continue;
2612                }
2613                let diff = (je.total_debit() - je.total_credit()).abs();
2614                if diff > tolerance {
2615                    unbalanced_clean += 1;
2616                    if unbalanced_clean <= 3 {
2617                        warn!(
2618                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2619                            je.header.document_id,
2620                            je.total_debit(),
2621                            je.total_credit(),
2622                            diff
2623                        );
2624                    }
2625                }
2626            }
2627            if unbalanced_clean > 0 {
2628                return Err(datasynth_core::error::SynthError::generation(format!(
2629                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2630                     First few logged above. Tolerance={}",
2631                    unbalanced_clean, tolerance
2632                )));
2633            }
2634            debug!(
2635                "Phase 10c: All {} non-anomaly JEs individually balanced",
2636                entries
2637                    .iter()
2638                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2639                    .count()
2640            );
2641
2642            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2643            let company_codes: Vec<String> = self
2644                .config
2645                .companies
2646                .iter()
2647                .map(|c| c.code.clone())
2648                .collect();
2649            for company_code in &company_codes {
2650                let mut assets = rust_decimal::Decimal::ZERO;
2651                let mut liab_equity = rust_decimal::Decimal::ZERO;
2652
2653                for entry in &entries {
2654                    if entry.header.company_code != *company_code {
2655                        continue;
2656                    }
2657                    for line in &entry.lines {
2658                        let acct = &line.gl_account;
2659                        let net = line.debit_amount - line.credit_amount;
2660                        // Asset accounts (1xxx): normal debit balance
2661                        if acct.starts_with('1') {
2662                            assets += net;
2663                        }
2664                        // Liability (2xxx) + Equity (3xxx): normal credit balance
2665                        else if acct.starts_with('2') || acct.starts_with('3') {
2666                            liab_equity -= net; // credit-normal, so negate debit-net
2667                        }
2668                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
2669                        // so they net to zero after closing entries
2670                    }
2671                }
2672
2673                let bs_diff = (assets - liab_equity).abs();
2674                if bs_diff > tolerance {
2675                    warn!(
2676                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
2677                         revenue/expense closing entries may not fully offset",
2678                        company_code, assets, liab_equity, bs_diff
2679                    );
2680                    // Warn rather than error: multi-period datasets may have timing
2681                    // differences from accruals/deferrals that resolve in later periods.
2682                    // The TB footing check (Assert 1) is the hard gate.
2683                } else {
2684                    debug!(
2685                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
2686                        company_code, assets, liab_equity, bs_diff
2687                    );
2688                }
2689            }
2690
2691            info!("Phase 10c: All generation-time accounting assertions passed");
2692        }
2693
2694        // Phase 11: Audit Data
2695        let audit = self.phase_audit_data(&entries, &mut stats)?;
2696
2697        // Phase 12: Banking KYC/AML Data
2698        let mut banking = self.phase_banking_data(&mut stats)?;
2699
2700        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2701        // Creates coherence between the accounting layer (payments, JEs) and the
2702        // banking layer (bank transactions). A vendor invoice payment now appears
2703        // on both sides with cross-references and fraud labels propagated.
2704        if self.phase_config.generate_banking
2705            && !document_flows.payments.is_empty()
2706            && !banking.accounts.is_empty()
2707        {
2708            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2709            if bridge_rate > 0.0 {
2710                let mut bridge =
2711                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2712                        self.seed,
2713                    );
2714                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2715                    &document_flows.payments,
2716                    &banking.customers,
2717                    &banking.accounts,
2718                    bridge_rate,
2719                );
2720                info!(
2721                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2722                    bridge_stats.bridged_count,
2723                    bridge_stats.transactions_emitted,
2724                    bridge_stats.fraud_propagated,
2725                );
2726                let bridged_count = bridged_txns.len();
2727                banking.transactions.extend(bridged_txns);
2728
2729                // Re-run velocity computation so bridged txns also get features
2730                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2731                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2732                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2733                        &mut banking.transactions,
2734                    );
2735                }
2736
2737                // Recompute suspicious count after bridging
2738                banking.suspicious_count = banking
2739                    .transactions
2740                    .iter()
2741                    .filter(|t| t.is_suspicious)
2742                    .count();
2743                stats.banking_transaction_count = banking.transactions.len();
2744                stats.banking_suspicious_count = banking.suspicious_count;
2745            }
2746        }
2747
2748        // Phase 13: Graph Export
2749        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2750
2751        // Phase 14: LLM Enrichment
2752        self.phase_llm_enrichment(&mut stats);
2753
2754        // Phase 15: Diffusion Enhancement
2755        self.phase_diffusion_enhancement(&mut stats);
2756
2757        // Phase 16: Causal Overlay
2758        self.phase_causal_overlay(&mut stats);
2759
2760        // Phase 17: Bank Reconciliation + Financial Statements
2761        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2762        // provision data (from accounting_standards / tax snapshots) can be wired in.
2763        let mut financial_reporting = self.phase_financial_reporting(
2764            &document_flows,
2765            &entries,
2766            &coa,
2767            &hr,
2768            &audit,
2769            &mut stats,
2770        )?;
2771
2772        // BS coherence check: assets = liabilities + equity
2773        {
2774            use datasynth_core::models::StatementType;
2775            for stmt in &financial_reporting.consolidated_statements {
2776                if stmt.statement_type == StatementType::BalanceSheet {
2777                    let total_assets: rust_decimal::Decimal = stmt
2778                        .line_items
2779                        .iter()
2780                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2781                        .map(|li| li.amount)
2782                        .sum();
2783                    let total_le: rust_decimal::Decimal = stmt
2784                        .line_items
2785                        .iter()
2786                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2787                        .map(|li| li.amount)
2788                        .sum();
2789                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2790                        warn!(
2791                            "BS equation imbalance: assets={}, L+E={}",
2792                            total_assets, total_le
2793                        );
2794                    }
2795                }
2796            }
2797        }
2798
2799        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2800        let accounting_standards =
2801            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2802
2803        // Phase 18a: Merge ECL journal entries into main GL
2804        if !accounting_standards.ecl_journal_entries.is_empty() {
2805            debug!(
2806                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2807                accounting_standards.ecl_journal_entries.len()
2808            );
2809            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2810        }
2811
2812        // Phase 18a: Merge provision journal entries into main GL
2813        if !accounting_standards.provision_journal_entries.is_empty() {
2814            debug!(
2815                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2816                accounting_standards.provision_journal_entries.len()
2817            );
2818            entries.extend(
2819                accounting_standards
2820                    .provision_journal_entries
2821                    .iter()
2822                    .cloned(),
2823            );
2824        }
2825
2826        // Phase 18b: OCPM Events (after all process data is available)
2827        let mut ocpm = self.phase_ocpm_events(
2828            &document_flows,
2829            &sourcing,
2830            &hr,
2831            &manufacturing_snap,
2832            &banking,
2833            &audit,
2834            &financial_reporting,
2835            &mut stats,
2836        )?;
2837
2838        // Emit OCPM events to stream sink
2839        if let Some(ref event_log) = ocpm.event_log {
2840            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2841        }
2842
2843        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
2844        if let Some(ref event_log) = ocpm.event_log {
2845            // Build reverse index: document_ref → (event_id, case_id, object_ids)
2846            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2847                std::collections::HashMap::new();
2848            for (idx, event) in event_log.events.iter().enumerate() {
2849                if let Some(ref doc_ref) = event.document_ref {
2850                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2851                }
2852            }
2853
2854            if !doc_index.is_empty() {
2855                let mut annotated = 0usize;
2856                for entry in &mut entries {
2857                    let doc_id_str = entry.header.document_id.to_string();
2858                    // Collect matching event indices from document_id and reference
2859                    let mut matched_indices: Vec<usize> = Vec::new();
2860                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2861                        matched_indices.extend(indices);
2862                    }
2863                    if let Some(ref reference) = entry.header.reference {
2864                        let bare_ref = reference
2865                            .find(':')
2866                            .map(|i| &reference[i + 1..])
2867                            .unwrap_or(reference.as_str());
2868                        if let Some(indices) = doc_index.get(bare_ref) {
2869                            for &idx in indices {
2870                                if !matched_indices.contains(&idx) {
2871                                    matched_indices.push(idx);
2872                                }
2873                            }
2874                        }
2875                    }
2876                    // Apply matches to JE header
2877                    if !matched_indices.is_empty() {
2878                        for &idx in &matched_indices {
2879                            let event = &event_log.events[idx];
2880                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2881                                entry.header.ocpm_event_ids.push(event.event_id);
2882                            }
2883                            for obj_ref in &event.object_refs {
2884                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2885                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
2886                                }
2887                            }
2888                            if entry.header.ocpm_case_id.is_none() {
2889                                entry.header.ocpm_case_id = event.case_id;
2890                            }
2891                        }
2892                        annotated += 1;
2893                    }
2894                }
2895                debug!(
2896                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2897                    annotated
2898                );
2899            }
2900        }
2901
2902        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
2903        // IC eliminations, opening balances, standards-driven entries) so
2904        // every JournalEntry carries at least one `ocpm_event_ids` link.
2905        if let Some(ref mut event_log) = ocpm.event_log {
2906            let synthesized =
2907                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
2908            if synthesized > 0 {
2909                info!(
2910                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
2911                );
2912            }
2913
2914            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
2915            // events and their owning CaseTrace. Without this, every exported
2916            // OCEL event has `is_anomaly = false` even when the underlying JE
2917            // was flagged.
2918            let anomaly_events =
2919                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
2920            if anomaly_events > 0 {
2921                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
2922            }
2923
2924            // Phase 18f: Inject process-variant imperfections (rework, skipped
2925            // steps, out-of-order events) so conformance checkers see
2926            // realistic variant counts and fitness < 1.0. Uses the P2P
2927            // process rates as the single source of truth.
2928            let p2p_cfg = &self.config.ocpm.p2p_process;
2929            let any_imperfection = p2p_cfg.rework_probability > 0.0
2930                || p2p_cfg.skip_step_probability > 0.0
2931                || p2p_cfg.out_of_order_probability > 0.0;
2932            if any_imperfection {
2933                use rand_chacha::rand_core::SeedableRng;
2934                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
2935                    rework_rate: p2p_cfg.rework_probability,
2936                    skip_rate: p2p_cfg.skip_step_probability,
2937                    out_of_order_rate: p2p_cfg.out_of_order_probability,
2938                };
2939                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
2940                let stats =
2941                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
2942                if stats.rework + stats.skipped + stats.out_of_order > 0 {
2943                    info!(
2944                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
2945                        stats.rework, stats.skipped, stats.out_of_order
2946                    );
2947                }
2948            }
2949        }
2950
2951        // Phase 19: Sales Quotes, Management KPIs, Budgets
2952        let sales_kpi_budgets =
2953            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2954
2955        // Phase 22: Treasury Data Generation
2956        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2957        // are included in the pre-tax income used by phase_tax_generation.
2958        let treasury =
2959            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2960
2961        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2962        if !treasury.journal_entries.is_empty() {
2963            debug!(
2964                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2965                treasury.journal_entries.len()
2966            );
2967            entries.extend(treasury.journal_entries.iter().cloned());
2968        }
2969
2970        // Phase 20: Tax Generation
2971        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2972
2973        // Phase 20 JEs: Merge tax posting journal entries into main GL
2974        if !tax.tax_posting_journal_entries.is_empty() {
2975            debug!(
2976                "Merging {} tax posting JEs into GL",
2977                tax.tax_posting_journal_entries.len()
2978            );
2979            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2980        }
2981
2982        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2983        // Build supplementary cash flow items from upstream JE data (depreciation,
2984        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2985        {
2986            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2987
2988            let framework_str = {
2989                use datasynth_config::schema::AccountingFrameworkConfig;
2990                match self
2991                    .config
2992                    .accounting_standards
2993                    .framework
2994                    .unwrap_or_default()
2995                {
2996                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2997                        "IFRS"
2998                    }
2999                    _ => "US_GAAP",
3000                }
3001            };
3002
3003            // Sum depreciation debits (account 6000) from close JEs
3004            let depreciation_total: rust_decimal::Decimal = entries
3005                .iter()
3006                .filter(|je| je.header.document_type == "CL")
3007                .flat_map(|je| je.lines.iter())
3008                .filter(|l| l.gl_account.starts_with("6000"))
3009                .map(|l| l.debit_amount)
3010                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3011
3012            // Sum interest expense debits (account 7100)
3013            let interest_paid: rust_decimal::Decimal = entries
3014                .iter()
3015                .flat_map(|je| je.lines.iter())
3016                .filter(|l| l.gl_account.starts_with("7100"))
3017                .map(|l| l.debit_amount)
3018                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3019
3020            // Sum tax expense debits (account 8000)
3021            let tax_paid: rust_decimal::Decimal = entries
3022                .iter()
3023                .flat_map(|je| je.lines.iter())
3024                .filter(|l| l.gl_account.starts_with("8000"))
3025                .map(|l| l.debit_amount)
3026                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3027
3028            // Sum capex debits on fixed assets (account 1500)
3029            let capex: rust_decimal::Decimal = entries
3030                .iter()
3031                .flat_map(|je| je.lines.iter())
3032                .filter(|l| l.gl_account.starts_with("1500"))
3033                .map(|l| l.debit_amount)
3034                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3035
3036            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3037            let dividends_paid: rust_decimal::Decimal = entries
3038                .iter()
3039                .flat_map(|je| je.lines.iter())
3040                .filter(|l| l.gl_account == "2170")
3041                .map(|l| l.debit_amount)
3042                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3043
3044            let cf_data = CashFlowSourceData {
3045                depreciation_total,
3046                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3047                delta_ar: rust_decimal::Decimal::ZERO,
3048                delta_ap: rust_decimal::Decimal::ZERO,
3049                delta_inventory: rust_decimal::Decimal::ZERO,
3050                capex,
3051                debt_issuance: rust_decimal::Decimal::ZERO,
3052                debt_repayment: rust_decimal::Decimal::ZERO,
3053                interest_paid,
3054                tax_paid,
3055                dividends_paid,
3056                framework: framework_str.to_string(),
3057            };
3058
3059            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3060            if !enhanced_cf_items.is_empty() {
3061                // Merge into ALL cash flow statements (standalone + consolidated)
3062                use datasynth_core::models::StatementType;
3063                let merge_count = enhanced_cf_items.len();
3064                for stmt in financial_reporting
3065                    .financial_statements
3066                    .iter_mut()
3067                    .chain(financial_reporting.consolidated_statements.iter_mut())
3068                    .chain(
3069                        financial_reporting
3070                            .standalone_statements
3071                            .values_mut()
3072                            .flat_map(|v| v.iter_mut()),
3073                    )
3074                {
3075                    if stmt.statement_type == StatementType::CashFlowStatement {
3076                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3077                    }
3078                }
3079                info!(
3080                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3081                    merge_count
3082                );
3083            }
3084        }
3085
3086        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3087        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3088        self.generate_notes_to_financial_statements(
3089            &mut financial_reporting,
3090            &accounting_standards,
3091            &tax,
3092            &hr,
3093            &audit,
3094            &treasury,
3095        );
3096
3097        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3098        // When we have 2+ companies, derive segment data from actual journal entries
3099        // to complement or replace the FS-generator-based segments.
3100        if self.config.companies.len() >= 2 && !entries.is_empty() {
3101            let companies: Vec<(String, String)> = self
3102                .config
3103                .companies
3104                .iter()
3105                .map(|c| (c.code.clone(), c.name.clone()))
3106                .collect();
3107            let ic_elim: rust_decimal::Decimal =
3108                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3109            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3110                .unwrap_or(NaiveDate::MIN);
3111            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3112            let period_label = format!(
3113                "{}-{:02}",
3114                end_date.year(),
3115                (end_date - chrono::Days::new(1)).month()
3116            );
3117
3118            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3119            let (je_segments, je_recon) =
3120                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3121            if !je_segments.is_empty() {
3122                info!(
3123                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3124                    je_segments.len(),
3125                    ic_elim,
3126                );
3127                // Replace if existing segment_reports were empty; otherwise supplement
3128                if financial_reporting.segment_reports.is_empty() {
3129                    financial_reporting.segment_reports = je_segments;
3130                    financial_reporting.segment_reconciliations = vec![je_recon];
3131                } else {
3132                    financial_reporting.segment_reports.extend(je_segments);
3133                    financial_reporting.segment_reconciliations.push(je_recon);
3134                }
3135            }
3136        }
3137
3138        // Phase 21: ESG Data Generation
3139        let esg_snap =
3140            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3141
3142        // Phase 23: Project Accounting Data Generation
3143        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3144
3145        // Phase 24: Process Evolution + Organizational Events
3146        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3147
3148        // Phase 24b: Disruption Events
3149        let disruption_events = self.phase_disruption_events(&mut stats)?;
3150
3151        // Phase 27: Bi-Temporal Vendor Version Chains
3152        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3153
3154        // Phase 28: Entity Relationship Graph + Cross-Process Links
3155        let (entity_relationship_graph, cross_process_links) =
3156            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3157
3158        // Phase 29: Industry-specific GL accounts
3159        let industry_output = self.phase_industry_data(&mut stats);
3160
3161        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3162        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3163
3164        // Phase: Neural enhancement (optional — requires neural feature + config)
3165        if self.config.diffusion.enabled
3166            && (self.config.diffusion.backend == "neural"
3167                || self.config.diffusion.backend == "hybrid")
3168        {
3169            let neural = &self.config.diffusion.neural;
3170            // Validate hybrid_strategy early so an unknown string doesn't
3171            // silently fall through to weighted_average semantics.
3172            const VALID_STRATEGIES: &[&str] = &["weighted_average", "column_select", "threshold"];
3173            if !VALID_STRATEGIES.contains(&neural.hybrid_strategy.as_str()) {
3174                warn!(
3175                    "Unknown diffusion.neural.hybrid_strategy='{}' — expected one of {:?}; \
3176                     falling back to 'weighted_average'.",
3177                    neural.hybrid_strategy, VALID_STRATEGIES
3178                );
3179            }
3180            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3181            if (weight - neural.hybrid_weight).abs() > f64::EPSILON {
3182                warn!(
3183                    "diffusion.neural.hybrid_weight={} clamped to [0,1] → {}",
3184                    neural.hybrid_weight, weight
3185                );
3186            }
3187            info!(
3188                "Phase neural enhancement: backend={} strategy={} weight={:.2} columns={} \
3189                 (neural_columns: {:?})",
3190                self.config.diffusion.backend,
3191                neural.hybrid_strategy,
3192                weight,
3193                neural.neural_columns.len(),
3194                neural.neural_columns,
3195            );
3196            stats.neural_hybrid_weight = Some(weight);
3197            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3198            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3199            // Neural enhancement integrates via the DiffusionBackend trait:
3200            // 1. NeuralDiffusionTrainer::train() on generated amounts
3201            // 2. HybridGenerator blends rule-based + neural at configured weight
3202            // 3. TabularTransformer for conditional column prediction
3203            // 4. GnnGraphTrainer for entity relationship structure
3204            // Actual training requires the `neural` cargo feature on datasynth-core.
3205            // The orchestrator delegates to the diffusion module which is feature-gated.
3206        }
3207
3208        // Phase 19b: Hypergraph Export (after all data is available)
3209        self.phase_hypergraph_export(
3210            &coa,
3211            &entries,
3212            &document_flows,
3213            &sourcing,
3214            &hr,
3215            &manufacturing_snap,
3216            &banking,
3217            &audit,
3218            &financial_reporting,
3219            &ocpm,
3220            &compliance_regulations,
3221            &mut stats,
3222        )?;
3223
3224        // Phase 10c: Additional graph builders (approval, entity, banking)
3225        // These run after all data is available since they need banking/IC data.
3226        if self.phase_config.generate_graph_export {
3227            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3228        }
3229
3230        // Log informational messages for config sections not yet fully wired
3231        if self.config.streaming.enabled {
3232            info!("Note: streaming config is enabled but batch mode does not use it");
3233        }
3234        if self.config.vendor_network.enabled {
3235            debug!("Vendor network config available; relationship graph generation is partial");
3236        }
3237        if self.config.customer_segmentation.enabled {
3238            debug!("Customer segmentation config available; segment-aware generation is partial");
3239        }
3240
3241        // Log final resource statistics
3242        let resource_stats = self.resource_guard.stats();
3243        info!(
3244            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3245            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3246            resource_stats.disk.estimated_bytes_written,
3247            resource_stats.degradation_level
3248        );
3249
3250        // Flush any remaining stream sink data
3251        if let Some(ref sink) = self.phase_sink {
3252            if let Err(e) = sink.flush() {
3253                warn!("Stream sink flush failed: {e}");
3254            }
3255        }
3256
3257        // Build data lineage graph
3258        let lineage = self.build_lineage_graph();
3259
3260        // Evaluate quality gates if enabled in config
3261        let gate_result = if self.config.quality_gates.enabled {
3262            let profile_name = &self.config.quality_gates.profile;
3263            match datasynth_eval::gates::get_profile(profile_name) {
3264                Some(profile) => {
3265                    // Build an evaluation populated with actual generation metrics.
3266                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3267
3268                    // Populate balance sheet evaluation from balance validation results
3269                    if balance_validation.validated {
3270                        eval.coherence.balance =
3271                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3272                                equation_balanced: balance_validation.is_balanced,
3273                                max_imbalance: (balance_validation.total_debits
3274                                    - balance_validation.total_credits)
3275                                    .abs(),
3276                                periods_evaluated: 1,
3277                                periods_imbalanced: if balance_validation.is_balanced {
3278                                    0
3279                                } else {
3280                                    1
3281                                },
3282                                period_results: Vec::new(),
3283                                companies_evaluated: self.config.companies.len(),
3284                            });
3285                    }
3286
3287                    // Set coherence passes based on balance validation
3288                    eval.coherence.passes = balance_validation.is_balanced;
3289                    if !balance_validation.is_balanced {
3290                        eval.coherence
3291                            .failures
3292                            .push("Balance sheet equation not satisfied".to_string());
3293                    }
3294
3295                    // Set statistical score based on entry count (basic sanity)
3296                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3297                    eval.statistical.passes = !entries.is_empty();
3298
3299                    // Set quality score from data quality stats
3300                    eval.quality.overall_score = 0.9; // Default high for generated data
3301                    eval.quality.passes = true;
3302
3303                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3304                    info!(
3305                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3306                        profile_name, result.gates_passed, result.gates_total, result.summary
3307                    );
3308                    Some(result)
3309                }
3310                None => {
3311                    warn!(
3312                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3313                        profile_name
3314                    );
3315                    None
3316                }
3317            }
3318        } else {
3319            None
3320        };
3321
3322        // Generate internal controls if enabled
3323        let internal_controls = if self.config.internal_controls.enabled {
3324            InternalControl::standard_controls()
3325        } else {
3326            Vec::new()
3327        };
3328
3329        Ok(EnhancedGenerationResult {
3330            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3331            master_data: std::mem::take(&mut self.master_data),
3332            document_flows,
3333            subledger,
3334            ocpm,
3335            audit,
3336            banking,
3337            graph_export,
3338            sourcing,
3339            financial_reporting,
3340            hr,
3341            accounting_standards,
3342            manufacturing: manufacturing_snap,
3343            sales_kpi_budgets,
3344            tax,
3345            esg: esg_snap,
3346            treasury,
3347            project_accounting,
3348            process_evolution,
3349            organizational_events,
3350            disruption_events,
3351            intercompany,
3352            journal_entries: entries,
3353            anomaly_labels,
3354            balance_validation,
3355            data_quality_stats,
3356            quality_issues,
3357            statistics: stats,
3358            lineage: Some(lineage),
3359            gate_result,
3360            internal_controls,
3361            sod_violations,
3362            opening_balances,
3363            subledger_reconciliation,
3364            counterfactual_pairs,
3365            red_flags,
3366            collusion_rings,
3367            temporal_vendor_chains,
3368            entity_relationship_graph,
3369            cross_process_links,
3370            industry_output,
3371            compliance_regulations,
3372        })
3373    }
3374
3375    // ========================================================================
3376    // Generation Phase Methods
3377    // ========================================================================
3378
3379    /// Phase 1: Generate Chart of Accounts and update statistics.
3380    fn phase_chart_of_accounts(
3381        &mut self,
3382        stats: &mut EnhancedGenerationStatistics,
3383    ) -> SynthResult<Arc<ChartOfAccounts>> {
3384        info!("Phase 1: Generating Chart of Accounts");
3385        let coa = self.generate_coa()?;
3386        stats.accounts_count = coa.account_count();
3387        info!(
3388            "Chart of Accounts generated: {} accounts",
3389            stats.accounts_count
3390        );
3391        self.check_resources_with_log("post-coa")?;
3392        Ok(coa)
3393    }
3394
3395    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3396    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3397        if self.phase_config.generate_master_data {
3398            info!("Phase 2: Generating Master Data");
3399            self.generate_master_data()?;
3400            stats.vendor_count = self.master_data.vendors.len();
3401            stats.customer_count = self.master_data.customers.len();
3402            stats.material_count = self.master_data.materials.len();
3403            stats.asset_count = self.master_data.assets.len();
3404            stats.employee_count = self.master_data.employees.len();
3405            info!(
3406                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3407                stats.vendor_count, stats.customer_count, stats.material_count,
3408                stats.asset_count, stats.employee_count
3409            );
3410            self.check_resources_with_log("post-master-data")?;
3411        } else {
3412            debug!("Phase 2: Skipped (master data generation disabled)");
3413        }
3414        Ok(())
3415    }
3416
3417    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3418    fn phase_document_flows(
3419        &mut self,
3420        stats: &mut EnhancedGenerationStatistics,
3421    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3422        let mut document_flows = DocumentFlowSnapshot::default();
3423        let mut subledger = SubledgerSnapshot::default();
3424        // Dunning JEs (interest + charges) accumulated here and merged into the
3425        // main FA-JE list below so they appear in the GL.
3426        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3427
3428        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3429            info!("Phase 3: Generating Document Flows");
3430            self.generate_document_flows(&mut document_flows)?;
3431            stats.p2p_chain_count = document_flows.p2p_chains.len();
3432            stats.o2c_chain_count = document_flows.o2c_chains.len();
3433            info!(
3434                "Document flows generated: {} P2P chains, {} O2C chains",
3435                stats.p2p_chain_count, stats.o2c_chain_count
3436            );
3437
3438            // Phase 3b: Link document flows to subledgers (for data coherence)
3439            debug!("Phase 3b: Linking document flows to subledgers");
3440            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3441            stats.ap_invoice_count = subledger.ap_invoices.len();
3442            stats.ar_invoice_count = subledger.ar_invoices.len();
3443            debug!(
3444                "Subledgers linked: {} AP invoices, {} AR invoices",
3445                stats.ap_invoice_count, stats.ar_invoice_count
3446            );
3447
3448            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3449            // Without this step the subledger is systematically overstated because
3450            // amount_remaining is set at invoice creation and never reduced by
3451            // the payments that were generated in the document-flow phase.
3452            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3453            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3454            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3455            debug!("Payment settlements applied to AP and AR subledgers");
3456
3457            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3458            // The as-of date is the last day of the configured period.
3459            if let Ok(start_date) =
3460                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3461            {
3462                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3463                    - chrono::Days::new(1);
3464                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3465                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3466                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3467                // derived from JE-level aggregation and will typically differ. This is a known
3468                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3469                // generated independently. A future reconciliation phase should align them by
3470                // using subledger totals as the authoritative source for BS Receivables.
3471                for company in &self.config.companies {
3472                    let ar_report = ARAgingReport::from_invoices(
3473                        company.code.clone(),
3474                        &subledger.ar_invoices,
3475                        as_of_date,
3476                    );
3477                    subledger.ar_aging_reports.push(ar_report);
3478
3479                    let ap_report = APAgingReport::from_invoices(
3480                        company.code.clone(),
3481                        &subledger.ap_invoices,
3482                        as_of_date,
3483                    );
3484                    subledger.ap_aging_reports.push(ap_report);
3485                }
3486                debug!(
3487                    "AR/AP aging reports built: {} AR, {} AP",
3488                    subledger.ar_aging_reports.len(),
3489                    subledger.ap_aging_reports.len()
3490                );
3491
3492                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3493                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3494                {
3495                    use datasynth_generators::DunningGenerator;
3496                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3497                    for company in &self.config.companies {
3498                        let currency = company.currency.as_str();
3499                        // Collect mutable references to AR invoices for this company
3500                        // (dunning generator updates dunning_info on invoices in-place).
3501                        let mut company_invoices: Vec<
3502                            datasynth_core::models::subledger::ar::ARInvoice,
3503                        > = subledger
3504                            .ar_invoices
3505                            .iter()
3506                            .filter(|inv| inv.company_code == company.code)
3507                            .cloned()
3508                            .collect();
3509
3510                        if company_invoices.is_empty() {
3511                            continue;
3512                        }
3513
3514                        let result = dunning_gen.execute_dunning_run(
3515                            &company.code,
3516                            as_of_date,
3517                            &mut company_invoices,
3518                            currency,
3519                        );
3520
3521                        // Write back updated dunning info to the main AR invoice list
3522                        for updated in &company_invoices {
3523                            if let Some(orig) = subledger
3524                                .ar_invoices
3525                                .iter_mut()
3526                                .find(|i| i.invoice_number == updated.invoice_number)
3527                            {
3528                                orig.dunning_info = updated.dunning_info.clone();
3529                            }
3530                        }
3531
3532                        subledger.dunning_runs.push(result.dunning_run);
3533                        subledger.dunning_letters.extend(result.letters);
3534                        // Dunning JEs (interest + charges) collected into local buffer.
3535                        dunning_journal_entries.extend(result.journal_entries);
3536                    }
3537                    debug!(
3538                        "Dunning runs complete: {} runs, {} letters",
3539                        subledger.dunning_runs.len(),
3540                        subledger.dunning_letters.len()
3541                    );
3542                }
3543            }
3544
3545            self.check_resources_with_log("post-document-flows")?;
3546        } else {
3547            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3548        }
3549
3550        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3551        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3552        if !self.master_data.assets.is_empty() {
3553            debug!("Generating FA subledger records");
3554            let company_code = self
3555                .config
3556                .companies
3557                .first()
3558                .map(|c| c.code.as_str())
3559                .unwrap_or("1000");
3560            let currency = self
3561                .config
3562                .companies
3563                .first()
3564                .map(|c| c.currency.as_str())
3565                .unwrap_or("USD");
3566
3567            let mut fa_gen = datasynth_generators::FAGenerator::new(
3568                datasynth_generators::FAGeneratorConfig::default(),
3569                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3570            );
3571
3572            for asset in &self.master_data.assets {
3573                let (record, je) = fa_gen.generate_asset_acquisition(
3574                    company_code,
3575                    &format!("{:?}", asset.asset_class),
3576                    &asset.description,
3577                    asset.acquisition_date,
3578                    currency,
3579                    asset.cost_center.as_deref(),
3580                );
3581                subledger.fa_records.push(record);
3582                fa_journal_entries.push(je);
3583            }
3584
3585            stats.fa_subledger_count = subledger.fa_records.len();
3586            debug!(
3587                "FA subledger records generated: {} (with {} acquisition JEs)",
3588                stats.fa_subledger_count,
3589                fa_journal_entries.len()
3590            );
3591        }
3592
3593        // Generate Inventory subledger records from master data materials
3594        if !self.master_data.materials.is_empty() {
3595            debug!("Generating Inventory subledger records");
3596            let first_company = self.config.companies.first();
3597            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3598            let inv_currency = first_company
3599                .map(|c| c.currency.clone())
3600                .unwrap_or_else(|| "USD".to_string());
3601
3602            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3603                datasynth_generators::InventoryGeneratorConfig::default(),
3604                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3605                inv_currency.clone(),
3606            );
3607
3608            for (i, material) in self.master_data.materials.iter().enumerate() {
3609                let plant = format!("PLANT{:02}", (i % 3) + 1);
3610                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3611                let initial_qty = rust_decimal::Decimal::from(
3612                    material
3613                        .safety_stock
3614                        .to_string()
3615                        .parse::<i64>()
3616                        .unwrap_or(100),
3617                );
3618
3619                let position = inv_gen.generate_position(
3620                    company_code,
3621                    &plant,
3622                    &storage_loc,
3623                    &material.material_id,
3624                    &material.description,
3625                    initial_qty,
3626                    Some(material.standard_cost),
3627                    &inv_currency,
3628                );
3629                subledger.inventory_positions.push(position);
3630            }
3631
3632            stats.inventory_subledger_count = subledger.inventory_positions.len();
3633            debug!(
3634                "Inventory subledger records generated: {}",
3635                stats.inventory_subledger_count
3636            );
3637        }
3638
3639        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3640        if !subledger.fa_records.is_empty() {
3641            if let Ok(start_date) =
3642                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3643            {
3644                let company_code = self
3645                    .config
3646                    .companies
3647                    .first()
3648                    .map(|c| c.code.as_str())
3649                    .unwrap_or("1000");
3650                let fiscal_year = start_date.year();
3651                let start_period = start_date.month();
3652                let end_period =
3653                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3654
3655                let depr_cfg = FaDepreciationScheduleConfig {
3656                    fiscal_year,
3657                    start_period,
3658                    end_period,
3659                    seed_offset: 800,
3660                };
3661                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3662                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3663                let run_count = runs.len();
3664                subledger.depreciation_runs = runs;
3665                debug!(
3666                    "Depreciation runs generated: {} runs for {} periods",
3667                    run_count, self.config.global.period_months
3668                );
3669            }
3670        }
3671
3672        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3673        if !subledger.inventory_positions.is_empty() {
3674            if let Ok(start_date) =
3675                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3676            {
3677                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3678                    - chrono::Days::new(1);
3679
3680                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3681                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3682
3683                for company in &self.config.companies {
3684                    let result = inv_val_gen.generate(
3685                        &company.code,
3686                        &subledger.inventory_positions,
3687                        as_of_date,
3688                    );
3689                    subledger.inventory_valuations.push(result);
3690                }
3691                debug!(
3692                    "Inventory valuations generated: {} company reports",
3693                    subledger.inventory_valuations.len()
3694                );
3695            }
3696        }
3697
3698        Ok((document_flows, subledger, fa_journal_entries))
3699    }
3700
3701    /// Phase 3c: Generate OCPM events from document flows.
3702    #[allow(clippy::too_many_arguments)]
3703    fn phase_ocpm_events(
3704        &mut self,
3705        document_flows: &DocumentFlowSnapshot,
3706        sourcing: &SourcingSnapshot,
3707        hr: &HrSnapshot,
3708        manufacturing: &ManufacturingSnapshot,
3709        banking: &BankingSnapshot,
3710        audit: &AuditSnapshot,
3711        financial_reporting: &FinancialReportingSnapshot,
3712        stats: &mut EnhancedGenerationStatistics,
3713    ) -> SynthResult<OcpmSnapshot> {
3714        let degradation = self.check_resources()?;
3715        if degradation >= DegradationLevel::Reduced {
3716            debug!(
3717                "Phase skipped due to resource pressure (degradation: {:?})",
3718                degradation
3719            );
3720            return Ok(OcpmSnapshot::default());
3721        }
3722        if self.phase_config.generate_ocpm_events {
3723            info!("Phase 3c: Generating OCPM Events");
3724            let ocpm_snapshot = self.generate_ocpm_events(
3725                document_flows,
3726                sourcing,
3727                hr,
3728                manufacturing,
3729                banking,
3730                audit,
3731                financial_reporting,
3732            )?;
3733            stats.ocpm_event_count = ocpm_snapshot.event_count;
3734            stats.ocpm_object_count = ocpm_snapshot.object_count;
3735            stats.ocpm_case_count = ocpm_snapshot.case_count;
3736            info!(
3737                "OCPM events generated: {} events, {} objects, {} cases",
3738                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3739            );
3740            self.check_resources_with_log("post-ocpm")?;
3741            Ok(ocpm_snapshot)
3742        } else {
3743            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3744            Ok(OcpmSnapshot::default())
3745        }
3746    }
3747
3748    /// Phase 4: Generate journal entries from document flows and standalone generation.
3749    fn phase_journal_entries(
3750        &mut self,
3751        coa: &Arc<ChartOfAccounts>,
3752        document_flows: &DocumentFlowSnapshot,
3753        _stats: &mut EnhancedGenerationStatistics,
3754    ) -> SynthResult<Vec<JournalEntry>> {
3755        let mut entries = Vec::new();
3756
3757        // Phase 4a: Generate JEs from document flows (for data coherence)
3758        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3759            debug!("Phase 4a: Generating JEs from document flows");
3760            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3761            debug!("Generated {} JEs from document flows", flow_entries.len());
3762            entries.extend(flow_entries);
3763        }
3764
3765        // Phase 4b: Generate standalone journal entries
3766        if self.phase_config.generate_journal_entries {
3767            info!("Phase 4: Generating Journal Entries");
3768            let je_entries = self.generate_journal_entries(coa)?;
3769            info!("Generated {} standalone journal entries", je_entries.len());
3770            entries.extend(je_entries);
3771        } else {
3772            debug!("Phase 4: Skipped (journal entry generation disabled)");
3773        }
3774
3775        if !entries.is_empty() {
3776            // Note: stats.total_entries/total_line_items are set in generate()
3777            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3778            self.check_resources_with_log("post-journal-entries")?;
3779        }
3780
3781        Ok(entries)
3782    }
3783
3784    /// Phase 5: Inject anomalies into journal entries.
3785    fn phase_anomaly_injection(
3786        &mut self,
3787        entries: &mut [JournalEntry],
3788        actions: &DegradationActions,
3789        stats: &mut EnhancedGenerationStatistics,
3790    ) -> SynthResult<AnomalyLabels> {
3791        if self.phase_config.inject_anomalies
3792            && !entries.is_empty()
3793            && !actions.skip_anomaly_injection
3794        {
3795            info!("Phase 5: Injecting Anomalies");
3796            let result = self.inject_anomalies(entries)?;
3797            stats.anomalies_injected = result.labels.len();
3798            info!("Injected {} anomalies", stats.anomalies_injected);
3799            self.check_resources_with_log("post-anomaly-injection")?;
3800            Ok(result)
3801        } else if actions.skip_anomaly_injection {
3802            warn!("Phase 5: Skipped due to resource degradation");
3803            Ok(AnomalyLabels::default())
3804        } else {
3805            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3806            Ok(AnomalyLabels::default())
3807        }
3808    }
3809
3810    /// Phase 6: Validate balance sheet equation on journal entries.
3811    fn phase_balance_validation(
3812        &mut self,
3813        entries: &[JournalEntry],
3814    ) -> SynthResult<BalanceValidationResult> {
3815        if self.phase_config.validate_balances && !entries.is_empty() {
3816            debug!("Phase 6: Validating Balances");
3817            let balance_validation = self.validate_journal_entries(entries)?;
3818            if balance_validation.is_balanced {
3819                debug!("Balance validation passed");
3820            } else {
3821                warn!(
3822                    "Balance validation found {} errors",
3823                    balance_validation.validation_errors.len()
3824                );
3825            }
3826            Ok(balance_validation)
3827        } else {
3828            Ok(BalanceValidationResult::default())
3829        }
3830    }
3831
3832    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3833    fn phase_data_quality_injection(
3834        &mut self,
3835        entries: &mut [JournalEntry],
3836        actions: &DegradationActions,
3837        stats: &mut EnhancedGenerationStatistics,
3838    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3839        if self.phase_config.inject_data_quality
3840            && !entries.is_empty()
3841            && !actions.skip_data_quality
3842        {
3843            info!("Phase 7: Injecting Data Quality Variations");
3844            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3845            stats.data_quality_issues = dq_stats.records_with_issues;
3846            info!("Injected {} data quality issues", stats.data_quality_issues);
3847            self.check_resources_with_log("post-data-quality")?;
3848            Ok((dq_stats, quality_issues))
3849        } else if actions.skip_data_quality {
3850            warn!("Phase 7: Skipped due to resource degradation");
3851            Ok((DataQualityStats::default(), Vec::new()))
3852        } else {
3853            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3854            Ok((DataQualityStats::default(), Vec::new()))
3855        }
3856    }
3857
3858    /// Phase 10b: Generate period-close journal entries.
3859    ///
3860    /// Generates:
3861    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3862    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3863    ///    for the configured period.
3864    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3865    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3866    ///    earnings via the Income Summary (3600) clearing account.
3867    fn phase_period_close(
3868        &mut self,
3869        entries: &mut Vec<JournalEntry>,
3870        subledger: &SubledgerSnapshot,
3871        stats: &mut EnhancedGenerationStatistics,
3872    ) -> SynthResult<()> {
3873        if !self.phase_config.generate_period_close || entries.is_empty() {
3874            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3875            return Ok(());
3876        }
3877
3878        info!("Phase 10b: Generating period-close journal entries");
3879
3880        use datasynth_core::accounts::{
3881            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3882        };
3883        use rust_decimal::Decimal;
3884
3885        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3886            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3887        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3888        // Posting date for close entries is the last day of the period
3889        let close_date = end_date - chrono::Days::new(1);
3890
3891        // Statutory tax rate (21% — configurable rates come in later tiers)
3892        let tax_rate = Decimal::new(21, 2); // 0.21
3893
3894        // Collect company codes from config
3895        let company_codes: Vec<String> = self
3896            .config
3897            .companies
3898            .iter()
3899            .map(|c| c.code.clone())
3900            .collect();
3901
3902        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3903        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3904        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3905
3906        // --- Depreciation JEs (per asset) ---
3907        // Compute period depreciation for each active fixed asset using straight-line method.
3908        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3909        let period_months = self.config.global.period_months;
3910        for asset in &subledger.fa_records {
3911            // Skip assets that are inactive / fully depreciated / non-depreciable
3912            use datasynth_core::models::subledger::fa::AssetStatus;
3913            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3914                continue;
3915            }
3916            let useful_life_months = asset.useful_life_months();
3917            if useful_life_months == 0 {
3918                // Land or CIP — not depreciated
3919                continue;
3920            }
3921            let salvage_value = asset.salvage_value();
3922            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3923            if depreciable_base == Decimal::ZERO {
3924                continue;
3925            }
3926            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3927                * Decimal::from(period_months))
3928            .round_dp(2);
3929            if period_depr <= Decimal::ZERO {
3930                continue;
3931            }
3932
3933            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3934            depr_header.document_type = "CL".to_string();
3935            depr_header.header_text = Some(format!(
3936                "Depreciation - {} {}",
3937                asset.asset_number, asset.description
3938            ));
3939            depr_header.created_by = "CLOSE_ENGINE".to_string();
3940            depr_header.source = TransactionSource::Automated;
3941            depr_header.business_process = Some(BusinessProcess::R2R);
3942
3943            let doc_id = depr_header.document_id;
3944            let mut depr_je = JournalEntry::new(depr_header);
3945
3946            // DR Depreciation Expense (6000)
3947            depr_je.add_line(JournalEntryLine::debit(
3948                doc_id,
3949                1,
3950                expense_accounts::DEPRECIATION.to_string(),
3951                period_depr,
3952            ));
3953            // CR Accumulated Depreciation (1510)
3954            depr_je.add_line(JournalEntryLine::credit(
3955                doc_id,
3956                2,
3957                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3958                period_depr,
3959            ));
3960
3961            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3962            close_jes.push(depr_je);
3963        }
3964
3965        if !subledger.fa_records.is_empty() {
3966            debug!(
3967                "Generated {} depreciation JEs from {} FA records",
3968                close_jes.len(),
3969                subledger.fa_records.len()
3970            );
3971        }
3972
3973        // --- Accrual entries (standard period-end accruals per company) ---
3974        // Generate standard accrued expense entries (utilities, rent, interest) using
3975        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3976        {
3977            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3978            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3979
3980            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3981            let accrual_items: &[(&str, &str, &str)] = &[
3982                ("Accrued Utilities", "6200", "2100"),
3983                ("Accrued Rent", "6300", "2100"),
3984                ("Accrued Interest", "6100", "2150"),
3985            ];
3986
3987            for company_code in &company_codes {
3988                // Estimate company revenue from existing JEs
3989                let company_revenue: Decimal = entries
3990                    .iter()
3991                    .filter(|e| e.header.company_code == *company_code)
3992                    .flat_map(|e| e.lines.iter())
3993                    .filter(|l| l.gl_account.starts_with('4'))
3994                    .map(|l| l.credit_amount - l.debit_amount)
3995                    .fold(Decimal::ZERO, |acc, v| acc + v);
3996
3997                if company_revenue <= Decimal::ZERO {
3998                    continue;
3999                }
4000
4001                // Use 0.5% of period revenue per accrual item as a proxy
4002                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4003                if accrual_base <= Decimal::ZERO {
4004                    continue;
4005                }
4006
4007                for (description, expense_acct, liability_acct) in accrual_items {
4008                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4009                        company_code,
4010                        description,
4011                        accrual_base,
4012                        expense_acct,
4013                        liability_acct,
4014                        close_date,
4015                        None,
4016                    );
4017                    close_jes.push(accrual_je);
4018                    if let Some(rev_je) = reversal_je {
4019                        close_jes.push(rev_je);
4020                    }
4021                }
4022            }
4023
4024            debug!(
4025                "Generated accrual entries for {} companies",
4026                company_codes.len()
4027            );
4028        }
4029
4030        for company_code in &company_codes {
4031            // Calculate net income for this company from existing JEs:
4032            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4033            // Revenue (4xxx): credit-normal, so net = credits - debits
4034            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4035            let mut total_revenue = Decimal::ZERO;
4036            let mut total_expenses = Decimal::ZERO;
4037
4038            for entry in entries.iter() {
4039                if entry.header.company_code != *company_code {
4040                    continue;
4041                }
4042                for line in &entry.lines {
4043                    let category = AccountCategory::from_account(&line.gl_account);
4044                    match category {
4045                        AccountCategory::Revenue => {
4046                            // Revenue is credit-normal: net revenue = credits - debits
4047                            total_revenue += line.credit_amount - line.debit_amount;
4048                        }
4049                        AccountCategory::Cogs
4050                        | AccountCategory::OperatingExpense
4051                        | AccountCategory::OtherIncomeExpense
4052                        | AccountCategory::Tax => {
4053                            // Expenses are debit-normal: net expense = debits - credits
4054                            total_expenses += line.debit_amount - line.credit_amount;
4055                        }
4056                        _ => {}
4057                    }
4058                }
4059            }
4060
4061            let pre_tax_income = total_revenue - total_expenses;
4062
4063            // Skip if no income statement activity
4064            if pre_tax_income == Decimal::ZERO {
4065                debug!(
4066                    "Company {}: no pre-tax income, skipping period close",
4067                    company_code
4068                );
4069                continue;
4070            }
4071
4072            // --- Tax provision / DTA JE ---
4073            if pre_tax_income > Decimal::ZERO {
4074                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4075                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4076
4077                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4078                tax_header.document_type = "CL".to_string();
4079                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4080                tax_header.created_by = "CLOSE_ENGINE".to_string();
4081                tax_header.source = TransactionSource::Automated;
4082                tax_header.business_process = Some(BusinessProcess::R2R);
4083
4084                let doc_id = tax_header.document_id;
4085                let mut tax_je = JournalEntry::new(tax_header);
4086
4087                // DR Tax Expense (8000)
4088                tax_je.add_line(JournalEntryLine::debit(
4089                    doc_id,
4090                    1,
4091                    tax_accounts::TAX_EXPENSE.to_string(),
4092                    tax_amount,
4093                ));
4094                // CR Income Tax Payable (2130)
4095                tax_je.add_line(JournalEntryLine::credit(
4096                    doc_id,
4097                    2,
4098                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4099                    tax_amount,
4100                ));
4101
4102                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4103                close_jes.push(tax_je);
4104            } else {
4105                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4106                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4107                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4108                if dta_amount > Decimal::ZERO {
4109                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4110                    dta_header.document_type = "CL".to_string();
4111                    dta_header.header_text =
4112                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4113                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4114                    dta_header.source = TransactionSource::Automated;
4115                    dta_header.business_process = Some(BusinessProcess::R2R);
4116
4117                    let doc_id = dta_header.document_id;
4118                    let mut dta_je = JournalEntry::new(dta_header);
4119
4120                    // DR Deferred Tax Asset (1600)
4121                    dta_je.add_line(JournalEntryLine::debit(
4122                        doc_id,
4123                        1,
4124                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4125                        dta_amount,
4126                    ));
4127                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4128                    // reflecting the benefit of the future deductible temporary difference.
4129                    dta_je.add_line(JournalEntryLine::credit(
4130                        doc_id,
4131                        2,
4132                        tax_accounts::TAX_EXPENSE.to_string(),
4133                        dta_amount,
4134                    ));
4135
4136                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4137                    close_jes.push(dta_je);
4138                    debug!(
4139                        "Company {}: loss year — recognised DTA of {}",
4140                        company_code, dta_amount
4141                    );
4142                }
4143            }
4144
4145            // --- Dividend JEs (v2.4) ---
4146            // If the entity is profitable after tax, declare a 10% dividend payout.
4147            // This runs AFTER tax provision so the dividend is based on post-tax income
4148            // but BEFORE the retained earnings close so the RE transfer reflects the
4149            // reduced balance.
4150            let tax_provision = if pre_tax_income > Decimal::ZERO {
4151                (pre_tax_income * tax_rate).round_dp(2)
4152            } else {
4153                Decimal::ZERO
4154            };
4155            let net_income = pre_tax_income - tax_provision;
4156
4157            if net_income > Decimal::ZERO {
4158                use datasynth_generators::DividendGenerator;
4159                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4160                let mut div_gen = DividendGenerator::new(self.seed + 460);
4161                let currency_str = self
4162                    .config
4163                    .companies
4164                    .iter()
4165                    .find(|c| c.code == *company_code)
4166                    .map(|c| c.currency.as_str())
4167                    .unwrap_or("USD");
4168                let div_result = div_gen.generate(
4169                    company_code,
4170                    close_date,
4171                    Decimal::new(1, 0), // $1 per share placeholder
4172                    dividend_amount,
4173                    currency_str,
4174                );
4175                let div_je_count = div_result.journal_entries.len();
4176                close_jes.extend(div_result.journal_entries);
4177                debug!(
4178                    "Company {}: declared dividend of {} ({} JEs)",
4179                    company_code, dividend_amount, div_je_count
4180                );
4181            }
4182
4183            // --- Income statement closing JE ---
4184            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4185            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4186            // close the pre-tax loss into Retained Earnings as-is.
4187            if net_income != Decimal::ZERO {
4188                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4189                close_header.document_type = "CL".to_string();
4190                close_header.header_text =
4191                    Some(format!("Income statement close - {}", company_code));
4192                close_header.created_by = "CLOSE_ENGINE".to_string();
4193                close_header.source = TransactionSource::Automated;
4194                close_header.business_process = Some(BusinessProcess::R2R);
4195
4196                let doc_id = close_header.document_id;
4197                let mut close_je = JournalEntry::new(close_header);
4198
4199                let abs_net_income = net_income.abs();
4200
4201                if net_income > Decimal::ZERO {
4202                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4203                    close_je.add_line(JournalEntryLine::debit(
4204                        doc_id,
4205                        1,
4206                        equity_accounts::INCOME_SUMMARY.to_string(),
4207                        abs_net_income,
4208                    ));
4209                    close_je.add_line(JournalEntryLine::credit(
4210                        doc_id,
4211                        2,
4212                        equity_accounts::RETAINED_EARNINGS.to_string(),
4213                        abs_net_income,
4214                    ));
4215                } else {
4216                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4217                    close_je.add_line(JournalEntryLine::debit(
4218                        doc_id,
4219                        1,
4220                        equity_accounts::RETAINED_EARNINGS.to_string(),
4221                        abs_net_income,
4222                    ));
4223                    close_je.add_line(JournalEntryLine::credit(
4224                        doc_id,
4225                        2,
4226                        equity_accounts::INCOME_SUMMARY.to_string(),
4227                        abs_net_income,
4228                    ));
4229                }
4230
4231                debug_assert!(
4232                    close_je.is_balanced(),
4233                    "Income statement closing JE must be balanced"
4234                );
4235                close_jes.push(close_je);
4236            }
4237        }
4238
4239        let close_count = close_jes.len();
4240        if close_count > 0 {
4241            info!("Generated {} period-close journal entries", close_count);
4242            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4243            entries.extend(close_jes);
4244            stats.period_close_je_count = close_count;
4245
4246            // Update total entry/line-item stats
4247            stats.total_entries = entries.len() as u64;
4248            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4249        } else {
4250            debug!("No period-close entries generated (no income statement activity)");
4251        }
4252
4253        Ok(())
4254    }
4255
4256    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4257    fn phase_audit_data(
4258        &mut self,
4259        entries: &[JournalEntry],
4260        stats: &mut EnhancedGenerationStatistics,
4261    ) -> SynthResult<AuditSnapshot> {
4262        if self.phase_config.generate_audit {
4263            info!("Phase 8: Generating Audit Data");
4264            let audit_snapshot = self.generate_audit_data(entries)?;
4265            stats.audit_engagement_count = audit_snapshot.engagements.len();
4266            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4267            stats.audit_evidence_count = audit_snapshot.evidence.len();
4268            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4269            stats.audit_finding_count = audit_snapshot.findings.len();
4270            stats.audit_judgment_count = audit_snapshot.judgments.len();
4271            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4272            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4273            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4274            stats.audit_sample_count = audit_snapshot.samples.len();
4275            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4276            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4277            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4278            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4279            stats.audit_related_party_transaction_count =
4280                audit_snapshot.related_party_transactions.len();
4281            info!(
4282                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4283                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4284                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4285                 {} RP transactions",
4286                stats.audit_engagement_count,
4287                stats.audit_workpaper_count,
4288                stats.audit_evidence_count,
4289                stats.audit_risk_count,
4290                stats.audit_finding_count,
4291                stats.audit_judgment_count,
4292                stats.audit_confirmation_count,
4293                stats.audit_procedure_step_count,
4294                stats.audit_sample_count,
4295                stats.audit_analytical_result_count,
4296                stats.audit_ia_function_count,
4297                stats.audit_ia_report_count,
4298                stats.audit_related_party_count,
4299                stats.audit_related_party_transaction_count,
4300            );
4301            self.check_resources_with_log("post-audit")?;
4302            Ok(audit_snapshot)
4303        } else {
4304            debug!("Phase 8: Skipped (audit generation disabled)");
4305            Ok(AuditSnapshot::default())
4306        }
4307    }
4308
4309    /// Phase 9: Generate banking KYC/AML data.
4310    fn phase_banking_data(
4311        &mut self,
4312        stats: &mut EnhancedGenerationStatistics,
4313    ) -> SynthResult<BankingSnapshot> {
4314        if self.phase_config.generate_banking {
4315            info!("Phase 9: Generating Banking KYC/AML Data");
4316            let banking_snapshot = self.generate_banking_data()?;
4317            stats.banking_customer_count = banking_snapshot.customers.len();
4318            stats.banking_account_count = banking_snapshot.accounts.len();
4319            stats.banking_transaction_count = banking_snapshot.transactions.len();
4320            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4321            info!(
4322                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4323                stats.banking_customer_count, stats.banking_account_count,
4324                stats.banking_transaction_count, stats.banking_suspicious_count
4325            );
4326            self.check_resources_with_log("post-banking")?;
4327            Ok(banking_snapshot)
4328        } else {
4329            debug!("Phase 9: Skipped (banking generation disabled)");
4330            Ok(BankingSnapshot::default())
4331        }
4332    }
4333
4334    /// Phase 10: Export accounting network graphs for ML training.
4335    fn phase_graph_export(
4336        &mut self,
4337        entries: &[JournalEntry],
4338        coa: &Arc<ChartOfAccounts>,
4339        stats: &mut EnhancedGenerationStatistics,
4340    ) -> SynthResult<GraphExportSnapshot> {
4341        if self.phase_config.generate_graph_export && !entries.is_empty() {
4342            info!("Phase 10: Exporting Accounting Network Graphs");
4343            match self.export_graphs(entries, coa, stats) {
4344                Ok(snapshot) => {
4345                    info!(
4346                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4347                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4348                    );
4349                    Ok(snapshot)
4350                }
4351                Err(e) => {
4352                    warn!("Phase 10: Graph export failed: {}", e);
4353                    Ok(GraphExportSnapshot::default())
4354                }
4355            }
4356        } else {
4357            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4358            Ok(GraphExportSnapshot::default())
4359        }
4360    }
4361
4362    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4363    #[allow(clippy::too_many_arguments)]
4364    fn phase_hypergraph_export(
4365        &self,
4366        coa: &Arc<ChartOfAccounts>,
4367        entries: &[JournalEntry],
4368        document_flows: &DocumentFlowSnapshot,
4369        sourcing: &SourcingSnapshot,
4370        hr: &HrSnapshot,
4371        manufacturing: &ManufacturingSnapshot,
4372        banking: &BankingSnapshot,
4373        audit: &AuditSnapshot,
4374        financial_reporting: &FinancialReportingSnapshot,
4375        ocpm: &OcpmSnapshot,
4376        compliance: &ComplianceRegulationsSnapshot,
4377        stats: &mut EnhancedGenerationStatistics,
4378    ) -> SynthResult<()> {
4379        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4380            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4381            match self.export_hypergraph(
4382                coa,
4383                entries,
4384                document_flows,
4385                sourcing,
4386                hr,
4387                manufacturing,
4388                banking,
4389                audit,
4390                financial_reporting,
4391                ocpm,
4392                compliance,
4393                stats,
4394            ) {
4395                Ok(info) => {
4396                    info!(
4397                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4398                        info.node_count, info.edge_count, info.hyperedge_count
4399                    );
4400                }
4401                Err(e) => {
4402                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4403                }
4404            }
4405        } else {
4406            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4407        }
4408        Ok(())
4409    }
4410
4411    /// Phase 11: LLM Enrichment.
4412    ///
4413    /// Uses an LLM provider (mock by default) to enrich vendor names with
4414    /// realistic, context-aware names. This phase is non-blocking: failures
4415    /// log a warning but do not stop the generation pipeline.
4416    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4417        if !self.config.llm.enabled {
4418            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4419            return;
4420        }
4421
4422        info!("Phase 11: Starting LLM Enrichment");
4423        let start = std::time::Instant::now();
4424
4425        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4426            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4427            // and the corresponding API key environment variable is present.
4428            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4429                let schema_provider = &self.config.llm.provider;
4430                let api_key_env = match schema_provider.as_str() {
4431                    "openai" => Some("OPENAI_API_KEY"),
4432                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4433                    "custom" => Some("LLM_API_KEY"),
4434                    _ => None,
4435                };
4436                if let Some(key_env) = api_key_env {
4437                    if std::env::var(key_env).is_ok() {
4438                        let llm_config = datasynth_core::llm::LlmConfig {
4439                            model: self.config.llm.model.clone(),
4440                            api_key_env: key_env.to_string(),
4441                            ..datasynth_core::llm::LlmConfig::default()
4442                        };
4443                        match HttpLlmProvider::new(llm_config) {
4444                            Ok(p) => Arc::new(p),
4445                            Err(e) => {
4446                                warn!(
4447                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4448                                    e
4449                                );
4450                                Arc::new(MockLlmProvider::new(self.seed))
4451                            }
4452                        }
4453                    } else {
4454                        Arc::new(MockLlmProvider::new(self.seed))
4455                    }
4456                } else {
4457                    Arc::new(MockLlmProvider::new(self.seed))
4458                }
4459            };
4460            let enricher = VendorLlmEnricher::new(provider);
4461
4462            let industry = format!("{:?}", self.config.global.industry);
4463            let max_enrichments = self
4464                .config
4465                .llm
4466                .max_vendor_enrichments
4467                .min(self.master_data.vendors.len());
4468
4469            let mut enriched_count = 0usize;
4470            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4471                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4472                    Ok(name) => {
4473                        vendor.name = name;
4474                        enriched_count += 1;
4475                    }
4476                    Err(e) => {
4477                        warn!(
4478                            "LLM vendor enrichment failed for {}: {}",
4479                            vendor.vendor_id, e
4480                        );
4481                    }
4482                }
4483            }
4484
4485            enriched_count
4486        }));
4487
4488        match result {
4489            Ok(enriched_count) => {
4490                stats.llm_vendors_enriched = enriched_count;
4491                let elapsed = start.elapsed();
4492                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4493                info!(
4494                    "Phase 11 complete: {} vendors enriched in {}ms",
4495                    enriched_count, stats.llm_enrichment_ms
4496                );
4497            }
4498            Err(_) => {
4499                let elapsed = start.elapsed();
4500                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4501                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4502            }
4503        }
4504    }
4505
4506    /// Phase 12: Diffusion Enhancement.
4507    ///
4508    /// Generates a sample set using the statistical diffusion backend to
4509    /// demonstrate distribution-matching data generation. This phase is
4510    /// non-blocking: failures log a warning but do not stop the pipeline.
4511    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4512        if !self.config.diffusion.enabled {
4513            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4514            return;
4515        }
4516
4517        info!("Phase 12: Starting Diffusion Enhancement");
4518        let start = std::time::Instant::now();
4519
4520        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4521            // Target distribution: transaction amounts (log-normal-like)
4522            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4523            let stds = vec![2000.0, 1.5, 1.0];
4524
4525            let diffusion_config = DiffusionConfig {
4526                n_steps: self.config.diffusion.n_steps,
4527                seed: self.seed,
4528                ..Default::default()
4529            };
4530
4531            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4532
4533            let n_samples = self.config.diffusion.sample_size;
4534            let n_features = 3; // amount, line_items, approval_level
4535            let samples = backend.generate(n_samples, n_features, self.seed);
4536
4537            samples.len()
4538        }));
4539
4540        match result {
4541            Ok(sample_count) => {
4542                stats.diffusion_samples_generated = sample_count;
4543                let elapsed = start.elapsed();
4544                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4545                info!(
4546                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4547                    sample_count, stats.diffusion_enhancement_ms
4548                );
4549            }
4550            Err(_) => {
4551                let elapsed = start.elapsed();
4552                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4553                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4554            }
4555        }
4556    }
4557
4558    /// Phase 13: Causal Overlay.
4559    ///
4560    /// Builds a structural causal model from a built-in template (e.g.,
4561    /// fraud_detection) and generates causal samples. Optionally validates
4562    /// that the output respects the causal structure. This phase is
4563    /// non-blocking: failures log a warning but do not stop the pipeline.
4564    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4565        if !self.config.causal.enabled {
4566            debug!("Phase 13: Skipped (causal generation disabled)");
4567            return;
4568        }
4569
4570        info!("Phase 13: Starting Causal Overlay");
4571        let start = std::time::Instant::now();
4572
4573        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4574            // Select template based on config
4575            let graph = match self.config.causal.template.as_str() {
4576                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4577                _ => CausalGraph::fraud_detection_template(),
4578            };
4579
4580            let scm = StructuralCausalModel::new(graph.clone())
4581                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4582
4583            let n_samples = self.config.causal.sample_size;
4584            let samples = scm
4585                .generate(n_samples, self.seed)
4586                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4587
4588            // Optionally validate causal structure
4589            let validation_passed = if self.config.causal.validate {
4590                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4591                if report.valid {
4592                    info!(
4593                        "Causal validation passed: all {} checks OK",
4594                        report.checks.len()
4595                    );
4596                } else {
4597                    warn!(
4598                        "Causal validation: {} violations detected: {:?}",
4599                        report.violations.len(),
4600                        report.violations
4601                    );
4602                }
4603                Some(report.valid)
4604            } else {
4605                None
4606            };
4607
4608            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4609        }));
4610
4611        match result {
4612            Ok(Ok((sample_count, validation_passed))) => {
4613                stats.causal_samples_generated = sample_count;
4614                stats.causal_validation_passed = validation_passed;
4615                let elapsed = start.elapsed();
4616                stats.causal_generation_ms = elapsed.as_millis() as u64;
4617                info!(
4618                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4619                    sample_count, stats.causal_generation_ms, validation_passed,
4620                );
4621            }
4622            Ok(Err(e)) => {
4623                let elapsed = start.elapsed();
4624                stats.causal_generation_ms = elapsed.as_millis() as u64;
4625                warn!("Phase 13: Causal generation failed: {}", e);
4626            }
4627            Err(_) => {
4628                let elapsed = start.elapsed();
4629                stats.causal_generation_ms = elapsed.as_millis() as u64;
4630                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4631            }
4632        }
4633    }
4634
4635    /// Phase 14: Generate S2C sourcing data.
4636    fn phase_sourcing_data(
4637        &mut self,
4638        stats: &mut EnhancedGenerationStatistics,
4639    ) -> SynthResult<SourcingSnapshot> {
4640        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4641            debug!("Phase 14: Skipped (sourcing generation disabled)");
4642            return Ok(SourcingSnapshot::default());
4643        }
4644        let degradation = self.check_resources()?;
4645        if degradation >= DegradationLevel::Reduced {
4646            debug!(
4647                "Phase skipped due to resource pressure (degradation: {:?})",
4648                degradation
4649            );
4650            return Ok(SourcingSnapshot::default());
4651        }
4652
4653        info!("Phase 14: Generating S2C Sourcing Data");
4654        let seed = self.seed;
4655
4656        // Gather vendor data from master data
4657        let vendor_ids: Vec<String> = self
4658            .master_data
4659            .vendors
4660            .iter()
4661            .map(|v| v.vendor_id.clone())
4662            .collect();
4663        if vendor_ids.is_empty() {
4664            debug!("Phase 14: Skipped (no vendors available)");
4665            return Ok(SourcingSnapshot::default());
4666        }
4667
4668        let categories: Vec<(String, String)> = vec![
4669            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4670            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4671            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4672            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4673            ("CAT-LOG".to_string(), "Logistics".to_string()),
4674        ];
4675        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4676            .iter()
4677            .map(|(id, name)| {
4678                (
4679                    id.clone(),
4680                    name.clone(),
4681                    rust_decimal::Decimal::from(100_000),
4682                )
4683            })
4684            .collect();
4685
4686        let company_code = self
4687            .config
4688            .companies
4689            .first()
4690            .map(|c| c.code.as_str())
4691            .unwrap_or("1000");
4692        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4693            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4694        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4695        let fiscal_year = start_date.year() as u16;
4696        let owner_ids: Vec<String> = self
4697            .master_data
4698            .employees
4699            .iter()
4700            .take(5)
4701            .map(|e| e.employee_id.clone())
4702            .collect();
4703        let owner_id = owner_ids
4704            .first()
4705            .map(std::string::String::as_str)
4706            .unwrap_or("BUYER-001");
4707
4708        // Step 1: Spend Analysis
4709        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4710        let spend_analyses =
4711            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4712
4713        // Step 2: Sourcing Projects
4714        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4715        let sourcing_projects = if owner_ids.is_empty() {
4716            Vec::new()
4717        } else {
4718            project_gen.generate(
4719                company_code,
4720                &categories_with_spend,
4721                &owner_ids,
4722                start_date,
4723                self.config.global.period_months,
4724            )
4725        };
4726        stats.sourcing_project_count = sourcing_projects.len();
4727
4728        // Step 3: Qualifications
4729        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4730        let mut qual_gen = QualificationGenerator::new(seed + 2);
4731        let qualifications = qual_gen.generate(
4732            company_code,
4733            &qual_vendor_ids,
4734            sourcing_projects.first().map(|p| p.project_id.as_str()),
4735            owner_id,
4736            start_date,
4737        );
4738
4739        // Step 4: RFx Events
4740        let mut rfx_gen = RfxGenerator::new(seed + 3);
4741        let rfx_events: Vec<RfxEvent> = sourcing_projects
4742            .iter()
4743            .map(|proj| {
4744                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4745                rfx_gen.generate(
4746                    company_code,
4747                    &proj.project_id,
4748                    &proj.category_id,
4749                    &qualified_vids,
4750                    owner_id,
4751                    start_date,
4752                    50000.0,
4753                )
4754            })
4755            .collect();
4756        stats.rfx_event_count = rfx_events.len();
4757
4758        // Step 5: Bids
4759        let mut bid_gen = BidGenerator::new(seed + 4);
4760        let mut all_bids = Vec::new();
4761        for rfx in &rfx_events {
4762            let bidder_count = vendor_ids.len().clamp(2, 5);
4763            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4764            let bids = bid_gen.generate(rfx, &responding, start_date);
4765            all_bids.extend(bids);
4766        }
4767        stats.bid_count = all_bids.len();
4768
4769        // Step 6: Bid Evaluations
4770        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4771        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4772            .iter()
4773            .map(|rfx| {
4774                let rfx_bids: Vec<SupplierBid> = all_bids
4775                    .iter()
4776                    .filter(|b| b.rfx_id == rfx.rfx_id)
4777                    .cloned()
4778                    .collect();
4779                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4780            })
4781            .collect();
4782
4783        // Step 7: Contracts from winning bids
4784        let mut contract_gen = ContractGenerator::new(seed + 6);
4785        let contracts: Vec<ProcurementContract> = bid_evaluations
4786            .iter()
4787            .zip(rfx_events.iter())
4788            .filter_map(|(eval, rfx)| {
4789                eval.ranked_bids.first().and_then(|winner| {
4790                    all_bids
4791                        .iter()
4792                        .find(|b| b.bid_id == winner.bid_id)
4793                        .map(|winning_bid| {
4794                            contract_gen.generate_from_bid(
4795                                winning_bid,
4796                                Some(&rfx.sourcing_project_id),
4797                                &rfx.category_id,
4798                                owner_id,
4799                                start_date,
4800                            )
4801                        })
4802                })
4803            })
4804            .collect();
4805        stats.contract_count = contracts.len();
4806
4807        // Step 8: Catalog Items
4808        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4809        let catalog_items = catalog_gen.generate(&contracts);
4810        stats.catalog_item_count = catalog_items.len();
4811
4812        // Step 9: Scorecards
4813        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4814        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4815            .iter()
4816            .fold(
4817                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4818                |mut acc, c| {
4819                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4820                    acc
4821                },
4822            )
4823            .into_iter()
4824            .collect();
4825        let scorecards = scorecard_gen.generate(
4826            company_code,
4827            &vendor_contracts,
4828            start_date,
4829            end_date,
4830            owner_id,
4831        );
4832        stats.scorecard_count = scorecards.len();
4833
4834        // Back-populate cross-references on sourcing projects (Task 35)
4835        // Link each project to its RFx events, contracts, and spend analyses
4836        let mut sourcing_projects = sourcing_projects;
4837        for project in &mut sourcing_projects {
4838            // Link RFx events generated for this project
4839            project.rfx_ids = rfx_events
4840                .iter()
4841                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4842                .map(|rfx| rfx.rfx_id.clone())
4843                .collect();
4844
4845            // Link contract awarded from this project's RFx
4846            project.contract_id = contracts
4847                .iter()
4848                .find(|c| {
4849                    c.sourcing_project_id
4850                        .as_deref()
4851                        .is_some_and(|sp| sp == project.project_id)
4852                })
4853                .map(|c| c.contract_id.clone());
4854
4855            // Link spend analysis for matching category (use category_id as the reference)
4856            project.spend_analysis_id = spend_analyses
4857                .iter()
4858                .find(|sa| sa.category_id == project.category_id)
4859                .map(|sa| sa.category_id.clone());
4860        }
4861
4862        info!(
4863            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4864            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4865            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4866        );
4867        self.check_resources_with_log("post-sourcing")?;
4868
4869        Ok(SourcingSnapshot {
4870            spend_analyses,
4871            sourcing_projects,
4872            qualifications,
4873            rfx_events,
4874            bids: all_bids,
4875            bid_evaluations,
4876            contracts,
4877            catalog_items,
4878            scorecards,
4879        })
4880    }
4881
4882    /// Build a [`GroupStructure`] from the current company configuration.
4883    ///
4884    /// The first company in the configuration is treated as the ultimate parent.
4885    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4886    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4887    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4888        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4889
4890        let parent_code = self
4891            .config
4892            .companies
4893            .first()
4894            .map(|c| c.code.clone())
4895            .unwrap_or_else(|| "PARENT".to_string());
4896
4897        let mut group = GroupStructure::new(parent_code);
4898
4899        for company in self.config.companies.iter().skip(1) {
4900            let sub =
4901                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4902            group.add_subsidiary(sub);
4903        }
4904
4905        group
4906    }
4907
4908    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4909    fn phase_intercompany(
4910        &mut self,
4911        journal_entries: &[JournalEntry],
4912        stats: &mut EnhancedGenerationStatistics,
4913    ) -> SynthResult<IntercompanySnapshot> {
4914        // Skip if intercompany is disabled in config
4915        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4916            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4917            return Ok(IntercompanySnapshot::default());
4918        }
4919
4920        // Intercompany requires at least 2 companies
4921        if self.config.companies.len() < 2 {
4922            debug!(
4923                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4924                self.config.companies.len()
4925            );
4926            return Ok(IntercompanySnapshot::default());
4927        }
4928
4929        info!("Phase 14b: Generating Intercompany Transactions");
4930
4931        // Build the group structure early — used by ISA 600 component auditor scope
4932        // and consolidated financial statement generators downstream.
4933        let group_structure = self.build_group_structure();
4934        debug!(
4935            "Group structure built: parent={}, subsidiaries={}",
4936            group_structure.parent_entity,
4937            group_structure.subsidiaries.len()
4938        );
4939
4940        let seed = self.seed;
4941        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4942            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4943        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4944
4945        // Build ownership structure from company configs
4946        // First company is treated as the parent, remaining are subsidiaries
4947        let parent_code = self.config.companies[0].code.clone();
4948        let mut ownership_structure =
4949            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4950
4951        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4952            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4953                format!("REL{:03}", i + 1),
4954                parent_code.clone(),
4955                company.code.clone(),
4956                rust_decimal::Decimal::from(100), // Default 100% ownership
4957                start_date,
4958            );
4959            ownership_structure.add_relationship(relationship);
4960        }
4961
4962        // Convert config transfer pricing method to core model enum
4963        let tp_method = match self.config.intercompany.transfer_pricing_method {
4964            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4965                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4966            }
4967            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4968                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4969            }
4970            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4971                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4972            }
4973            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4974                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4975            }
4976            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4977                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4978            }
4979        };
4980
4981        // Build IC generator config from schema config
4982        let ic_currency = self
4983            .config
4984            .companies
4985            .first()
4986            .map(|c| c.currency.clone())
4987            .unwrap_or_else(|| "USD".to_string());
4988        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4989            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4990            transfer_pricing_method: tp_method,
4991            markup_percent: rust_decimal::Decimal::from_f64_retain(
4992                self.config.intercompany.markup_percent,
4993            )
4994            .unwrap_or(rust_decimal::Decimal::from(5)),
4995            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4996            default_currency: ic_currency,
4997            ..Default::default()
4998        };
4999
5000        // Create IC generator
5001        let mut ic_generator = datasynth_generators::ICGenerator::new(
5002            ic_gen_config,
5003            ownership_structure.clone(),
5004            seed + 50,
5005        );
5006
5007        // Generate IC transactions for the period
5008        // Use ~3 transactions per day as a reasonable default
5009        let transactions_per_day = 3;
5010        let matched_pairs = ic_generator.generate_transactions_for_period(
5011            start_date,
5012            end_date,
5013            transactions_per_day,
5014        );
5015
5016        // Generate IC source P2P/O2C documents
5017        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5018        debug!(
5019            "Generated {} IC seller invoices, {} IC buyer POs",
5020            ic_doc_chains.seller_invoices.len(),
5021            ic_doc_chains.buyer_orders.len()
5022        );
5023
5024        // Generate journal entries from matched pairs
5025        let mut seller_entries = Vec::new();
5026        let mut buyer_entries = Vec::new();
5027        let fiscal_year = start_date.year();
5028
5029        for pair in &matched_pairs {
5030            let fiscal_period = pair.posting_date.month();
5031            let (seller_je, buyer_je) =
5032                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5033            seller_entries.push(seller_je);
5034            buyer_entries.push(buyer_je);
5035        }
5036
5037        // Run matching engine
5038        let matching_config = datasynth_generators::ICMatchingConfig {
5039            base_currency: self
5040                .config
5041                .companies
5042                .first()
5043                .map(|c| c.currency.clone())
5044                .unwrap_or_else(|| "USD".to_string()),
5045            ..Default::default()
5046        };
5047        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5048        matching_engine.load_matched_pairs(&matched_pairs);
5049        let matching_result = matching_engine.run_matching(end_date);
5050
5051        // Generate elimination entries if configured
5052        let mut elimination_entries = Vec::new();
5053        if self.config.intercompany.generate_eliminations {
5054            let elim_config = datasynth_generators::EliminationConfig {
5055                consolidation_entity: "GROUP".to_string(),
5056                base_currency: self
5057                    .config
5058                    .companies
5059                    .first()
5060                    .map(|c| c.currency.clone())
5061                    .unwrap_or_else(|| "USD".to_string()),
5062                ..Default::default()
5063            };
5064
5065            let mut elim_generator =
5066                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5067
5068            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5069            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5070                matching_result
5071                    .matched_balances
5072                    .iter()
5073                    .chain(matching_result.unmatched_balances.iter())
5074                    .cloned()
5075                    .collect();
5076
5077            // Build investment and equity maps from the group structure so that the
5078            // elimination generator can produce equity-investment elimination entries
5079            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5080            //
5081            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5082            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5083            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5084            //
5085            // Net assets are derived from the journal entries using account-range heuristics:
5086            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5087            // no JE data is available (IC phase runs early in the generation pipeline).
5088            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5089                std::collections::HashMap::new();
5090            let mut equity_amounts: std::collections::HashMap<
5091                String,
5092                std::collections::HashMap<String, rust_decimal::Decimal>,
5093            > = std::collections::HashMap::new();
5094            {
5095                use rust_decimal::Decimal;
5096                let hundred = Decimal::from(100u32);
5097                let ten_pct = Decimal::new(10, 2); // 0.10
5098                let thirty_pct = Decimal::new(30, 2); // 0.30
5099                let sixty_pct = Decimal::new(60, 2); // 0.60
5100                let parent_code = &group_structure.parent_entity;
5101                for sub in &group_structure.subsidiaries {
5102                    let net_assets = {
5103                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5104                        if na > Decimal::ZERO {
5105                            na
5106                        } else {
5107                            Decimal::from(1_000_000u64)
5108                        }
5109                    };
5110                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5111                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5112                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5113
5114                    // Split subsidiary equity into conventional components:
5115                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5116                    let mut eq_map = std::collections::HashMap::new();
5117                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5118                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5119                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5120                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5121                }
5122            }
5123
5124            let journal = elim_generator.generate_eliminations(
5125                &fiscal_period,
5126                end_date,
5127                &all_balances,
5128                &matched_pairs,
5129                &investment_amounts,
5130                &equity_amounts,
5131            );
5132
5133            elimination_entries = journal.entries.clone();
5134        }
5135
5136        let matched_pair_count = matched_pairs.len();
5137        let elimination_entry_count = elimination_entries.len();
5138        let match_rate = matching_result.match_rate;
5139
5140        stats.ic_matched_pair_count = matched_pair_count;
5141        stats.ic_elimination_count = elimination_entry_count;
5142        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5143
5144        info!(
5145            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5146            matched_pair_count,
5147            stats.ic_transaction_count,
5148            seller_entries.len(),
5149            buyer_entries.len(),
5150            elimination_entry_count,
5151            match_rate * 100.0
5152        );
5153        self.check_resources_with_log("post-intercompany")?;
5154
5155        // ----------------------------------------------------------------
5156        // NCI measurements: derive from group structure ownership percentages
5157        // ----------------------------------------------------------------
5158        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5159            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5160            use rust_decimal::Decimal;
5161
5162            let eight_pct = Decimal::new(8, 2); // 0.08
5163
5164            group_structure
5165                .subsidiaries
5166                .iter()
5167                .filter(|sub| {
5168                    sub.nci_percentage > Decimal::ZERO
5169                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5170                })
5171                .map(|sub| {
5172                    // Compute net assets from actual journal entries for this subsidiary.
5173                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5174                    // IC phase runs before the main JE batch has been populated).
5175                    let net_assets_from_jes =
5176                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5177
5178                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5179                        net_assets_from_jes.round_dp(2)
5180                    } else {
5181                        // Fallback: use a plausible base amount
5182                        Decimal::from(1_000_000u64)
5183                    };
5184
5185                    // Net income approximated as 8% of net assets
5186                    let net_income = (net_assets * eight_pct).round_dp(2);
5187
5188                    NciMeasurement::compute(
5189                        sub.entity_code.clone(),
5190                        sub.nci_percentage,
5191                        net_assets,
5192                        net_income,
5193                    )
5194                })
5195                .collect()
5196        };
5197
5198        if !nci_measurements.is_empty() {
5199            info!(
5200                "NCI measurements: {} subsidiaries with non-controlling interests",
5201                nci_measurements.len()
5202            );
5203        }
5204
5205        Ok(IntercompanySnapshot {
5206            group_structure: Some(group_structure),
5207            matched_pairs,
5208            seller_journal_entries: seller_entries,
5209            buyer_journal_entries: buyer_entries,
5210            elimination_entries,
5211            nci_measurements,
5212            ic_document_chains: Some(ic_doc_chains),
5213            matched_pair_count,
5214            elimination_entry_count,
5215            match_rate,
5216        })
5217    }
5218
5219    /// Phase 15: Generate bank reconciliations and financial statements.
5220    fn phase_financial_reporting(
5221        &mut self,
5222        document_flows: &DocumentFlowSnapshot,
5223        journal_entries: &[JournalEntry],
5224        coa: &Arc<ChartOfAccounts>,
5225        _hr: &HrSnapshot,
5226        _audit: &AuditSnapshot,
5227        stats: &mut EnhancedGenerationStatistics,
5228    ) -> SynthResult<FinancialReportingSnapshot> {
5229        let fs_enabled = self.phase_config.generate_financial_statements
5230            || self.config.financial_reporting.enabled;
5231        let br_enabled = self.phase_config.generate_bank_reconciliation;
5232
5233        if !fs_enabled && !br_enabled {
5234            debug!("Phase 15: Skipped (financial reporting disabled)");
5235            return Ok(FinancialReportingSnapshot::default());
5236        }
5237
5238        info!("Phase 15: Generating Financial Reporting Data");
5239
5240        let seed = self.seed;
5241        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5242            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5243
5244        let mut financial_statements = Vec::new();
5245        let mut bank_reconciliations = Vec::new();
5246        let mut trial_balances = Vec::new();
5247        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
5248        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
5249            Vec::new();
5250        // Standalone statements keyed by entity code
5251        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
5252            std::collections::HashMap::new();
5253        // Consolidated statements (one per period)
5254        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
5255        // Consolidation schedules (one per period)
5256        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
5257
5258        // Generate financial statements from JE-derived trial balances.
5259        //
5260        // When journal entries are available, we use cumulative trial balances for
5261        // balance sheet accounts and current-period trial balances for income
5262        // statement accounts. We also track prior-period trial balances so the
5263        // generator can produce comparative amounts, and we build a proper
5264        // cash flow statement from working capital changes rather than random data.
5265        if fs_enabled {
5266            let has_journal_entries = !journal_entries.is_empty();
5267
5268            // Use FinancialStatementGenerator for balance sheet and income statement,
5269            // but build cash flow ourselves from TB data when JEs are available.
5270            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
5271            // Separate generator for consolidated statements (different seed offset)
5272            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
5273
5274            // Collect elimination JEs once (reused across periods)
5275            let elimination_entries: Vec<&JournalEntry> = journal_entries
5276                .iter()
5277                .filter(|je| je.header.is_elimination)
5278                .collect();
5279
5280            // Generate one set of statements per period, per entity
5281            for period in 0..self.config.global.period_months {
5282                let period_start = start_date + chrono::Months::new(period);
5283                let period_end =
5284                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5285                let fiscal_year = period_end.year() as u16;
5286                let fiscal_period = period_end.month() as u8;
5287                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5288
5289                // Build per-entity trial balances for this period (non-elimination JEs)
5290                // We accumulate them for the consolidation step.
5291                let mut entity_tb_map: std::collections::HashMap<
5292                    String,
5293                    std::collections::HashMap<String, rust_decimal::Decimal>,
5294                > = std::collections::HashMap::new();
5295
5296                // --- Standalone: one set of statements per company ---
5297                for (company_idx, company) in self.config.companies.iter().enumerate() {
5298                    let company_code = company.code.as_str();
5299                    let currency = company.currency.as_str();
5300                    // Use a unique seed offset per company to keep statements deterministic
5301                    // and distinct across companies
5302                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
5303                    let mut company_fs_gen =
5304                        FinancialStatementGenerator::new(seed + company_seed_offset);
5305
5306                    if has_journal_entries {
5307                        let tb_entries = Self::build_cumulative_trial_balance(
5308                            journal_entries,
5309                            coa,
5310                            company_code,
5311                            start_date,
5312                            period_end,
5313                            fiscal_year,
5314                            fiscal_period,
5315                        );
5316
5317                        // Accumulate per-entity category balances for consolidation
5318                        let entity_cat_map =
5319                            entity_tb_map.entry(company_code.to_string()).or_default();
5320                        for tb_entry in &tb_entries {
5321                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
5322                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
5323                        }
5324
5325                        let stmts = company_fs_gen.generate(
5326                            company_code,
5327                            currency,
5328                            &tb_entries,
5329                            period_start,
5330                            period_end,
5331                            fiscal_year,
5332                            fiscal_period,
5333                            None,
5334                            "SYS-AUTOCLOSE",
5335                        );
5336
5337                        let mut entity_stmts = Vec::new();
5338                        for stmt in stmts {
5339                            if stmt.statement_type == StatementType::CashFlowStatement {
5340                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
5341                                let cf_items = Self::build_cash_flow_from_trial_balances(
5342                                    &tb_entries,
5343                                    None,
5344                                    net_income,
5345                                );
5346                                entity_stmts.push(FinancialStatement {
5347                                    cash_flow_items: cf_items,
5348                                    ..stmt
5349                                });
5350                            } else {
5351                                entity_stmts.push(stmt);
5352                            }
5353                        }
5354
5355                        // Add to the flat financial_statements list (used by KPI/budget)
5356                        financial_statements.extend(entity_stmts.clone());
5357
5358                        // Store standalone per-entity
5359                        standalone_statements
5360                            .entry(company_code.to_string())
5361                            .or_default()
5362                            .extend(entity_stmts);
5363
5364                        // Only store trial balance for the first company in the period
5365                        // to avoid duplicates in the trial_balances list
5366                        if company_idx == 0 {
5367                            trial_balances.push(PeriodTrialBalance {
5368                                fiscal_year,
5369                                fiscal_period,
5370                                period_start,
5371                                period_end,
5372                                entries: tb_entries,
5373                            });
5374                        }
5375                    } else {
5376                        // Fallback: no JEs available
5377                        let tb_entries = Self::build_trial_balance_from_entries(
5378                            journal_entries,
5379                            coa,
5380                            company_code,
5381                            fiscal_year,
5382                            fiscal_period,
5383                        );
5384
5385                        let stmts = company_fs_gen.generate(
5386                            company_code,
5387                            currency,
5388                            &tb_entries,
5389                            period_start,
5390                            period_end,
5391                            fiscal_year,
5392                            fiscal_period,
5393                            None,
5394                            "SYS-AUTOCLOSE",
5395                        );
5396                        financial_statements.extend(stmts.clone());
5397                        standalone_statements
5398                            .entry(company_code.to_string())
5399                            .or_default()
5400                            .extend(stmts);
5401
5402                        if company_idx == 0 && !tb_entries.is_empty() {
5403                            trial_balances.push(PeriodTrialBalance {
5404                                fiscal_year,
5405                                fiscal_period,
5406                                period_start,
5407                                period_end,
5408                                entries: tb_entries,
5409                            });
5410                        }
5411                    }
5412                }
5413
5414                // --- Consolidated: aggregate all entities + apply eliminations ---
5415                // Use the primary (first) company's currency for the consolidated statement
5416                let group_currency = self
5417                    .config
5418                    .companies
5419                    .first()
5420                    .map(|c| c.currency.as_str())
5421                    .unwrap_or("USD");
5422
5423                // Build owned elimination entries for this period
5424                let period_eliminations: Vec<JournalEntry> = elimination_entries
5425                    .iter()
5426                    .filter(|je| {
5427                        je.header.fiscal_year == fiscal_year
5428                            && je.header.fiscal_period == fiscal_period
5429                    })
5430                    .map(|je| (*je).clone())
5431                    .collect();
5432
5433                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5434                    &entity_tb_map,
5435                    &period_eliminations,
5436                    &period_label,
5437                );
5438
5439                // Build a pseudo trial balance from consolidated line items for the
5440                // FinancialStatementGenerator to use (only for cash flow direction).
5441                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5442                    .line_items
5443                    .iter()
5444                    .map(|li| {
5445                        let net = li.post_elimination_total;
5446                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5447                            (net, rust_decimal::Decimal::ZERO)
5448                        } else {
5449                            (rust_decimal::Decimal::ZERO, -net)
5450                        };
5451                        datasynth_generators::TrialBalanceEntry {
5452                            account_code: li.account_category.clone(),
5453                            account_name: li.account_category.clone(),
5454                            category: li.account_category.clone(),
5455                            debit_balance: debit,
5456                            credit_balance: credit,
5457                        }
5458                    })
5459                    .collect();
5460
5461                let mut cons_stmts = cons_gen.generate(
5462                    "GROUP",
5463                    group_currency,
5464                    &cons_tb,
5465                    period_start,
5466                    period_end,
5467                    fiscal_year,
5468                    fiscal_period,
5469                    None,
5470                    "SYS-AUTOCLOSE",
5471                );
5472
5473                // Split consolidated line items by statement type.
5474                // The consolidation generator returns BS items first, then IS items,
5475                // identified by their CONS- prefix and category.
5476                let bs_categories: &[&str] = &[
5477                    "CASH",
5478                    "RECEIVABLES",
5479                    "INVENTORY",
5480                    "FIXEDASSETS",
5481                    "PAYABLES",
5482                    "ACCRUEDLIABILITIES",
5483                    "LONGTERMDEBT",
5484                    "EQUITY",
5485                ];
5486                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5487                    cons_line_items.into_iter().partition(|li| {
5488                        let upper = li.label.to_uppercase();
5489                        bs_categories.iter().any(|c| upper == *c)
5490                    });
5491
5492                for stmt in &mut cons_stmts {
5493                    stmt.is_consolidated = true;
5494                    match stmt.statement_type {
5495                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5496                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5497                        _ => {} // CF and equity change statements keep generator output
5498                    }
5499                }
5500
5501                consolidated_statements.extend(cons_stmts);
5502                consolidation_schedules.push(schedule);
5503            }
5504
5505            // Backward compat: if only 1 company, use existing code path logic
5506            // (prior_cumulative_tb for comparative amounts). Already handled above;
5507            // the prior_ref is omitted to keep this change minimal.
5508            let _ = &mut fs_gen; // suppress unused warning
5509
5510            stats.financial_statement_count = financial_statements.len();
5511            info!(
5512                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5513                stats.financial_statement_count,
5514                consolidated_statements.len(),
5515                has_journal_entries
5516            );
5517
5518            // ----------------------------------------------------------------
5519            // IFRS 8 / ASC 280: Operating Segment Reporting
5520            // ----------------------------------------------------------------
5521            // Build entity seeds from the company configuration.
5522            let entity_seeds: Vec<SegmentSeed> = self
5523                .config
5524                .companies
5525                .iter()
5526                .map(|c| SegmentSeed {
5527                    code: c.code.clone(),
5528                    name: c.name.clone(),
5529                    currency: c.currency.clone(),
5530                })
5531                .collect();
5532
5533            let mut seg_gen = SegmentGenerator::new(seed + 30);
5534
5535            // Generate one set of segment reports per period.
5536            // We extract consolidated revenue / profit / assets from the consolidated
5537            // financial statements produced above, falling back to simple sums when
5538            // no consolidated statements were generated (single-entity path).
5539            for period in 0..self.config.global.period_months {
5540                let period_end =
5541                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5542                let fiscal_year = period_end.year() as u16;
5543                let fiscal_period = period_end.month() as u8;
5544                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5545
5546                use datasynth_core::models::StatementType;
5547
5548                // Try to find consolidated income statement for this period
5549                let cons_is = consolidated_statements.iter().find(|s| {
5550                    s.fiscal_year == fiscal_year
5551                        && s.fiscal_period == fiscal_period
5552                        && s.statement_type == StatementType::IncomeStatement
5553                });
5554                let cons_bs = consolidated_statements.iter().find(|s| {
5555                    s.fiscal_year == fiscal_year
5556                        && s.fiscal_period == fiscal_period
5557                        && s.statement_type == StatementType::BalanceSheet
5558                });
5559
5560                // If consolidated statements not available fall back to the flat list
5561                let is_stmt = cons_is.or_else(|| {
5562                    financial_statements.iter().find(|s| {
5563                        s.fiscal_year == fiscal_year
5564                            && s.fiscal_period == fiscal_period
5565                            && s.statement_type == StatementType::IncomeStatement
5566                    })
5567                });
5568                let bs_stmt = cons_bs.or_else(|| {
5569                    financial_statements.iter().find(|s| {
5570                        s.fiscal_year == fiscal_year
5571                            && s.fiscal_period == fiscal_period
5572                            && s.statement_type == StatementType::BalanceSheet
5573                    })
5574                });
5575
5576                let consolidated_revenue = is_stmt
5577                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5578                    .map(|li| -li.amount) // revenue is stored as negative in IS
5579                    .unwrap_or(rust_decimal::Decimal::ZERO);
5580
5581                let consolidated_profit = is_stmt
5582                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5583                    .map(|li| li.amount)
5584                    .unwrap_or(rust_decimal::Decimal::ZERO);
5585
5586                let consolidated_assets = bs_stmt
5587                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5588                    .map(|li| li.amount)
5589                    .unwrap_or(rust_decimal::Decimal::ZERO);
5590
5591                // Skip periods where we have no financial data
5592                if consolidated_revenue == rust_decimal::Decimal::ZERO
5593                    && consolidated_assets == rust_decimal::Decimal::ZERO
5594                {
5595                    continue;
5596                }
5597
5598                let group_code = self
5599                    .config
5600                    .companies
5601                    .first()
5602                    .map(|c| c.code.as_str())
5603                    .unwrap_or("GROUP");
5604
5605                // Compute period depreciation from JEs with document type "CL" hitting account
5606                // 6000 (depreciation expense).  These are generated by phase_period_close.
5607                let total_depr: rust_decimal::Decimal = journal_entries
5608                    .iter()
5609                    .filter(|je| je.header.document_type == "CL")
5610                    .flat_map(|je| je.lines.iter())
5611                    .filter(|l| l.gl_account.starts_with("6000"))
5612                    .map(|l| l.debit_amount)
5613                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5614                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5615                    Some(total_depr)
5616                } else {
5617                    None
5618                };
5619
5620                let (segs, recon) = seg_gen.generate(
5621                    group_code,
5622                    &period_label,
5623                    consolidated_revenue,
5624                    consolidated_profit,
5625                    consolidated_assets,
5626                    &entity_seeds,
5627                    depr_param,
5628                );
5629                segment_reports.extend(segs);
5630                segment_reconciliations.push(recon);
5631            }
5632
5633            info!(
5634                "Segment reports generated: {} segments, {} reconciliations",
5635                segment_reports.len(),
5636                segment_reconciliations.len()
5637            );
5638        }
5639
5640        // Generate bank reconciliations from payment data
5641        if br_enabled && !document_flows.payments.is_empty() {
5642            let employee_ids: Vec<String> = self
5643                .master_data
5644                .employees
5645                .iter()
5646                .map(|e| e.employee_id.clone())
5647                .collect();
5648            let mut br_gen =
5649                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5650
5651            // Group payments by company code and period
5652            for company in &self.config.companies {
5653                let company_payments: Vec<PaymentReference> = document_flows
5654                    .payments
5655                    .iter()
5656                    .filter(|p| p.header.company_code == company.code)
5657                    .map(|p| PaymentReference {
5658                        id: p.header.document_id.clone(),
5659                        amount: if p.is_vendor { p.amount } else { -p.amount },
5660                        date: p.header.document_date,
5661                        reference: p
5662                            .check_number
5663                            .clone()
5664                            .or_else(|| p.wire_reference.clone())
5665                            .unwrap_or_else(|| p.header.document_id.clone()),
5666                    })
5667                    .collect();
5668
5669                if company_payments.is_empty() {
5670                    continue;
5671                }
5672
5673                let bank_account_id = format!("{}-MAIN", company.code);
5674
5675                // Generate one reconciliation per period
5676                for period in 0..self.config.global.period_months {
5677                    let period_start = start_date + chrono::Months::new(period);
5678                    let period_end =
5679                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5680
5681                    let period_payments: Vec<PaymentReference> = company_payments
5682                        .iter()
5683                        .filter(|p| p.date >= period_start && p.date <= period_end)
5684                        .cloned()
5685                        .collect();
5686
5687                    let recon = br_gen.generate(
5688                        &company.code,
5689                        &bank_account_id,
5690                        period_start,
5691                        period_end,
5692                        &company.currency,
5693                        &period_payments,
5694                    );
5695                    bank_reconciliations.push(recon);
5696                }
5697            }
5698            info!(
5699                "Bank reconciliations generated: {} reconciliations",
5700                bank_reconciliations.len()
5701            );
5702        }
5703
5704        stats.bank_reconciliation_count = bank_reconciliations.len();
5705        self.check_resources_with_log("post-financial-reporting")?;
5706
5707        if !trial_balances.is_empty() {
5708            info!(
5709                "Period-close trial balances captured: {} periods",
5710                trial_balances.len()
5711            );
5712        }
5713
5714        // Notes to financial statements are generated in a separate post-processing step
5715        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5716        // phases have completed, so that deferred tax and provision data can be wired in.
5717        let notes_to_financial_statements = Vec::new();
5718
5719        Ok(FinancialReportingSnapshot {
5720            financial_statements,
5721            standalone_statements,
5722            consolidated_statements,
5723            consolidation_schedules,
5724            bank_reconciliations,
5725            trial_balances,
5726            segment_reports,
5727            segment_reconciliations,
5728            notes_to_financial_statements,
5729        })
5730    }
5731
5732    /// Populate notes to financial statements using fully-resolved snapshots.
5733    ///
5734    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5735    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5736    /// can be wired into the notes context.  The method mutates
5737    /// `financial_reporting.notes_to_financial_statements` in-place.
5738    fn generate_notes_to_financial_statements(
5739        &self,
5740        financial_reporting: &mut FinancialReportingSnapshot,
5741        accounting_standards: &AccountingStandardsSnapshot,
5742        tax: &TaxSnapshot,
5743        hr: &HrSnapshot,
5744        audit: &AuditSnapshot,
5745        treasury: &TreasurySnapshot,
5746    ) {
5747        use datasynth_config::schema::AccountingFrameworkConfig;
5748        use datasynth_core::models::StatementType;
5749        use datasynth_generators::period_close::notes_generator::{
5750            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5751        };
5752
5753        let seed = self.seed;
5754        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5755        {
5756            Ok(d) => d,
5757            Err(_) => return,
5758        };
5759
5760        let mut notes_gen = NotesGenerator::new(seed + 4235);
5761
5762        for company in &self.config.companies {
5763            let last_period_end = start_date
5764                + chrono::Months::new(self.config.global.period_months)
5765                - chrono::Days::new(1);
5766            let fiscal_year = last_period_end.year() as u16;
5767
5768            // Extract relevant amounts from the already-generated financial statements
5769            let entity_is = financial_reporting
5770                .standalone_statements
5771                .get(&company.code)
5772                .and_then(|stmts| {
5773                    stmts.iter().find(|s| {
5774                        s.fiscal_year == fiscal_year
5775                            && s.statement_type == StatementType::IncomeStatement
5776                    })
5777                });
5778            let entity_bs = financial_reporting
5779                .standalone_statements
5780                .get(&company.code)
5781                .and_then(|stmts| {
5782                    stmts.iter().find(|s| {
5783                        s.fiscal_year == fiscal_year
5784                            && s.statement_type == StatementType::BalanceSheet
5785                    })
5786                });
5787
5788            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5789            let revenue_amount = entity_is
5790                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5791                .map(|li| li.amount);
5792            let ppe_gross = entity_bs
5793                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5794                .map(|li| li.amount);
5795
5796            let framework = match self
5797                .config
5798                .accounting_standards
5799                .framework
5800                .unwrap_or_default()
5801            {
5802                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5803                    "IFRS".to_string()
5804                }
5805                _ => "US GAAP".to_string(),
5806            };
5807
5808            // ---- Deferred tax (IAS 12 / ASC 740) ----
5809            // Sum closing DTA and DTL from rollforward entries for this entity.
5810            let (entity_dta, entity_dtl) = {
5811                let mut dta = rust_decimal::Decimal::ZERO;
5812                let mut dtl = rust_decimal::Decimal::ZERO;
5813                for rf in &tax.deferred_tax.rollforwards {
5814                    if rf.entity_code == company.code {
5815                        dta += rf.closing_dta;
5816                        dtl += rf.closing_dtl;
5817                    }
5818                }
5819                (
5820                    if dta > rust_decimal::Decimal::ZERO {
5821                        Some(dta)
5822                    } else {
5823                        None
5824                    },
5825                    if dtl > rust_decimal::Decimal::ZERO {
5826                        Some(dtl)
5827                    } else {
5828                        None
5829                    },
5830                )
5831            };
5832
5833            // ---- Provisions (IAS 37 / ASC 450) ----
5834            // Filter provisions to this entity; sum best_estimate amounts.
5835            let entity_provisions: Vec<_> = accounting_standards
5836                .provisions
5837                .iter()
5838                .filter(|p| p.entity_code == company.code)
5839                .collect();
5840            let provision_count = entity_provisions.len();
5841            let total_provisions = if provision_count > 0 {
5842                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5843            } else {
5844                None
5845            };
5846
5847            // ---- Pension data from HR snapshot ----
5848            let entity_pension_plan_count = hr
5849                .pension_plans
5850                .iter()
5851                .filter(|p| p.entity_code == company.code)
5852                .count();
5853            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5854                let sum: rust_decimal::Decimal = hr
5855                    .pension_disclosures
5856                    .iter()
5857                    .filter(|d| {
5858                        hr.pension_plans
5859                            .iter()
5860                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5861                    })
5862                    .map(|d| d.net_pension_liability)
5863                    .sum();
5864                let plan_assets_sum: rust_decimal::Decimal = hr
5865                    .pension_plan_assets
5866                    .iter()
5867                    .filter(|a| {
5868                        hr.pension_plans
5869                            .iter()
5870                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5871                    })
5872                    .map(|a| a.fair_value_closing)
5873                    .sum();
5874                if entity_pension_plan_count > 0 {
5875                    Some(sum + plan_assets_sum)
5876                } else {
5877                    None
5878                }
5879            };
5880            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5881                let sum: rust_decimal::Decimal = hr
5882                    .pension_plan_assets
5883                    .iter()
5884                    .filter(|a| {
5885                        hr.pension_plans
5886                            .iter()
5887                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5888                    })
5889                    .map(|a| a.fair_value_closing)
5890                    .sum();
5891                if entity_pension_plan_count > 0 {
5892                    Some(sum)
5893                } else {
5894                    None
5895                }
5896            };
5897
5898            // ---- Audit data: related parties + subsequent events ----
5899            // Audit snapshot covers all entities; use total counts (common case = single entity).
5900            let rp_count = audit.related_party_transactions.len();
5901            let se_count = audit.subsequent_events.len();
5902            let adjusting_count = audit
5903                .subsequent_events
5904                .iter()
5905                .filter(|e| {
5906                    matches!(
5907                        e.classification,
5908                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5909                    )
5910                })
5911                .count();
5912
5913            let ctx = NotesGeneratorContext {
5914                entity_code: company.code.clone(),
5915                framework,
5916                period: format!("FY{}", fiscal_year),
5917                period_end: last_period_end,
5918                currency: company.currency.clone(),
5919                revenue_amount,
5920                total_ppe_gross: ppe_gross,
5921                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5922                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5923                deferred_tax_asset: entity_dta,
5924                deferred_tax_liability: entity_dtl,
5925                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5926                provision_count,
5927                total_provisions,
5928                // Pension data from HR snapshot
5929                pension_plan_count: entity_pension_plan_count,
5930                total_dbo: entity_total_dbo,
5931                total_plan_assets: entity_total_plan_assets,
5932                // Audit data
5933                related_party_transaction_count: rp_count,
5934                subsequent_event_count: se_count,
5935                adjusting_event_count: adjusting_count,
5936                ..NotesGeneratorContext::default()
5937            };
5938
5939            let entity_notes = notes_gen.generate(&ctx);
5940            let standard_note_count = entity_notes.len() as u32;
5941            info!(
5942                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5943                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5944            );
5945            financial_reporting
5946                .notes_to_financial_statements
5947                .extend(entity_notes);
5948
5949            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5950            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5951                .debt_instruments
5952                .iter()
5953                .filter(|d| d.entity_id == company.code)
5954                .map(|d| {
5955                    (
5956                        format!("{:?}", d.instrument_type),
5957                        d.principal,
5958                        d.maturity_date.to_string(),
5959                    )
5960                })
5961                .collect();
5962
5963            let hedge_count = treasury.hedge_relationships.len();
5964            let effective_hedges = treasury
5965                .hedge_relationships
5966                .iter()
5967                .filter(|h| h.is_effective)
5968                .count();
5969            let total_notional: rust_decimal::Decimal = treasury
5970                .hedging_instruments
5971                .iter()
5972                .map(|h| h.notional_amount)
5973                .sum();
5974            let total_fair_value: rust_decimal::Decimal = treasury
5975                .hedging_instruments
5976                .iter()
5977                .map(|h| h.fair_value)
5978                .sum();
5979
5980            // Join provision_movements with provisions to get entity/type info
5981            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5982                .provisions
5983                .iter()
5984                .filter(|p| p.entity_code == company.code)
5985                .map(|p| p.id.as_str())
5986                .collect();
5987            let provision_movements: Vec<(
5988                String,
5989                rust_decimal::Decimal,
5990                rust_decimal::Decimal,
5991                rust_decimal::Decimal,
5992            )> = accounting_standards
5993                .provision_movements
5994                .iter()
5995                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5996                .map(|m| {
5997                    let prov_type = accounting_standards
5998                        .provisions
5999                        .iter()
6000                        .find(|p| p.id == m.provision_id)
6001                        .map(|p| format!("{:?}", p.provision_type))
6002                        .unwrap_or_else(|| "Unknown".to_string());
6003                    (prov_type, m.opening, m.additions, m.closing)
6004                })
6005                .collect();
6006
6007            let enhanced_ctx = EnhancedNotesContext {
6008                entity_code: company.code.clone(),
6009                period: format!("FY{}", fiscal_year),
6010                currency: company.currency.clone(),
6011                // Inventory breakdown: best-effort using zero (would need balance tracker)
6012                finished_goods_value: rust_decimal::Decimal::ZERO,
6013                wip_value: rust_decimal::Decimal::ZERO,
6014                raw_materials_value: rust_decimal::Decimal::ZERO,
6015                debt_instruments,
6016                hedge_count,
6017                effective_hedges,
6018                total_notional,
6019                total_fair_value,
6020                provision_movements,
6021            };
6022
6023            let enhanced_notes =
6024                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6025            if !enhanced_notes.is_empty() {
6026                info!(
6027                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6028                    company.code,
6029                    enhanced_notes.len(),
6030                    enhanced_ctx.debt_instruments.len(),
6031                    hedge_count,
6032                    enhanced_ctx.provision_movements.len(),
6033                );
6034                financial_reporting
6035                    .notes_to_financial_statements
6036                    .extend(enhanced_notes);
6037            }
6038        }
6039    }
6040
6041    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6042    ///
6043    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6044    /// posted in the journal entries flows through to the trial balance, using the real
6045    /// GL account numbers from the CoA.
6046    fn build_trial_balance_from_entries(
6047        journal_entries: &[JournalEntry],
6048        coa: &ChartOfAccounts,
6049        company_code: &str,
6050        fiscal_year: u16,
6051        fiscal_period: u8,
6052    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6053        use rust_decimal::Decimal;
6054
6055        // Accumulate total debits and credits per GL account
6056        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6057        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6058
6059        for je in journal_entries {
6060            // Filter to matching company, fiscal year, and period
6061            if je.header.company_code != company_code
6062                || je.header.fiscal_year != fiscal_year
6063                || je.header.fiscal_period != fiscal_period
6064            {
6065                continue;
6066            }
6067
6068            for line in &je.lines {
6069                let acct = &line.gl_account;
6070                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6071                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6072            }
6073        }
6074
6075        // Build a TrialBalanceEntry for each account that had activity
6076        let mut all_accounts: Vec<&String> = account_debits
6077            .keys()
6078            .chain(account_credits.keys())
6079            .collect::<std::collections::HashSet<_>>()
6080            .into_iter()
6081            .collect();
6082        all_accounts.sort();
6083
6084        let mut entries = Vec::new();
6085
6086        for acct_number in all_accounts {
6087            let debit = account_debits
6088                .get(acct_number)
6089                .copied()
6090                .unwrap_or(Decimal::ZERO);
6091            let credit = account_credits
6092                .get(acct_number)
6093                .copied()
6094                .unwrap_or(Decimal::ZERO);
6095
6096            if debit.is_zero() && credit.is_zero() {
6097                continue;
6098            }
6099
6100            // Look up account name from CoA, fall back to "Account {code}"
6101            let account_name = coa
6102                .get_account(acct_number)
6103                .map(|gl| gl.short_description.clone())
6104                .unwrap_or_else(|| format!("Account {acct_number}"));
6105
6106            // Map account code prefix to the category strings expected by
6107            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6108            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6109            // OperatingExpenses).
6110            let category = Self::category_from_account_code(acct_number);
6111
6112            entries.push(datasynth_generators::TrialBalanceEntry {
6113                account_code: acct_number.clone(),
6114                account_name,
6115                category,
6116                debit_balance: debit,
6117                credit_balance: credit,
6118            });
6119        }
6120
6121        entries
6122    }
6123
6124    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6125    /// (and including) the given period end date.
6126    ///
6127    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6128    /// while income statement accounts (revenue, expenses) show only the current period.
6129    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6130    fn build_cumulative_trial_balance(
6131        journal_entries: &[JournalEntry],
6132        coa: &ChartOfAccounts,
6133        company_code: &str,
6134        start_date: NaiveDate,
6135        period_end: NaiveDate,
6136        fiscal_year: u16,
6137        fiscal_period: u8,
6138    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6139        use rust_decimal::Decimal;
6140
6141        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6142        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6143        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6144
6145        // Accumulate debits/credits for income statement accounts (current period only)
6146        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6147        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6148
6149        for je in journal_entries {
6150            if je.header.company_code != company_code {
6151                continue;
6152            }
6153
6154            for line in &je.lines {
6155                let acct = &line.gl_account;
6156                let category = Self::category_from_account_code(acct);
6157                let is_bs_account = matches!(
6158                    category.as_str(),
6159                    "Cash"
6160                        | "Receivables"
6161                        | "Inventory"
6162                        | "FixedAssets"
6163                        | "Payables"
6164                        | "AccruedLiabilities"
6165                        | "LongTermDebt"
6166                        | "Equity"
6167                );
6168
6169                if is_bs_account {
6170                    // Balance sheet: accumulate from start through period_end
6171                    if je.header.document_date <= period_end
6172                        && je.header.document_date >= start_date
6173                    {
6174                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6175                            line.debit_amount;
6176                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6177                            line.credit_amount;
6178                    }
6179                } else {
6180                    // Income statement: current period only
6181                    if je.header.fiscal_year == fiscal_year
6182                        && je.header.fiscal_period == fiscal_period
6183                    {
6184                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6185                            line.debit_amount;
6186                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6187                            line.credit_amount;
6188                    }
6189                }
6190            }
6191        }
6192
6193        // Merge all accounts
6194        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6195        all_accounts.extend(bs_debits.keys().cloned());
6196        all_accounts.extend(bs_credits.keys().cloned());
6197        all_accounts.extend(is_debits.keys().cloned());
6198        all_accounts.extend(is_credits.keys().cloned());
6199
6200        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6201        sorted_accounts.sort();
6202
6203        let mut entries = Vec::new();
6204
6205        for acct_number in &sorted_accounts {
6206            let category = Self::category_from_account_code(acct_number);
6207            let is_bs_account = matches!(
6208                category.as_str(),
6209                "Cash"
6210                    | "Receivables"
6211                    | "Inventory"
6212                    | "FixedAssets"
6213                    | "Payables"
6214                    | "AccruedLiabilities"
6215                    | "LongTermDebt"
6216                    | "Equity"
6217            );
6218
6219            let (debit, credit) = if is_bs_account {
6220                (
6221                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6222                    bs_credits
6223                        .get(acct_number)
6224                        .copied()
6225                        .unwrap_or(Decimal::ZERO),
6226                )
6227            } else {
6228                (
6229                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
6230                    is_credits
6231                        .get(acct_number)
6232                        .copied()
6233                        .unwrap_or(Decimal::ZERO),
6234                )
6235            };
6236
6237            if debit.is_zero() && credit.is_zero() {
6238                continue;
6239            }
6240
6241            let account_name = coa
6242                .get_account(acct_number)
6243                .map(|gl| gl.short_description.clone())
6244                .unwrap_or_else(|| format!("Account {acct_number}"));
6245
6246            entries.push(datasynth_generators::TrialBalanceEntry {
6247                account_code: acct_number.clone(),
6248                account_name,
6249                category,
6250                debit_balance: debit,
6251                credit_balance: credit,
6252            });
6253        }
6254
6255        entries
6256    }
6257
6258    /// Build a JE-derived cash flow statement using the indirect method.
6259    ///
6260    /// Compares current and prior cumulative trial balances to derive working capital
6261    /// changes, producing a coherent cash flow statement tied to actual journal entries.
6262    fn build_cash_flow_from_trial_balances(
6263        current_tb: &[datasynth_generators::TrialBalanceEntry],
6264        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
6265        net_income: rust_decimal::Decimal,
6266    ) -> Vec<CashFlowItem> {
6267        use rust_decimal::Decimal;
6268
6269        // Helper: aggregate a TB by category and return net (debit - credit)
6270        let aggregate =
6271            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
6272                let mut map: HashMap<String, Decimal> = HashMap::new();
6273                for entry in tb {
6274                    let net = entry.debit_balance - entry.credit_balance;
6275                    *map.entry(entry.category.clone()).or_default() += net;
6276                }
6277                map
6278            };
6279
6280        let current = aggregate(current_tb);
6281        let prior = prior_tb.map(aggregate);
6282
6283        // Get balance for a category, defaulting to zero
6284        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
6285            *map.get(key).unwrap_or(&Decimal::ZERO)
6286        };
6287
6288        // Compute change: current - prior (or current if no prior)
6289        let change = |key: &str| -> Decimal {
6290            let curr = get(&current, key);
6291            match &prior {
6292                Some(p) => curr - get(p, key),
6293                None => curr,
6294            }
6295        };
6296
6297        // Operating activities (indirect method)
6298        // Depreciation add-back: approximate from FixedAssets decrease
6299        let fixed_asset_change = change("FixedAssets");
6300        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
6301            -fixed_asset_change
6302        } else {
6303            Decimal::ZERO
6304        };
6305
6306        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
6307        let ar_change = change("Receivables");
6308        let inventory_change = change("Inventory");
6309        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
6310        let ap_change = change("Payables");
6311        let accrued_change = change("AccruedLiabilities");
6312
6313        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
6314            + (-ap_change)
6315            + (-accrued_change);
6316
6317        // Investing activities
6318        let capex = if fixed_asset_change > Decimal::ZERO {
6319            -fixed_asset_change
6320        } else {
6321            Decimal::ZERO
6322        };
6323        let investing_cf = capex;
6324
6325        // Financing activities
6326        let debt_change = -change("LongTermDebt");
6327        let equity_change = -change("Equity");
6328        let financing_cf = debt_change + equity_change;
6329
6330        let net_change = operating_cf + investing_cf + financing_cf;
6331
6332        vec![
6333            CashFlowItem {
6334                item_code: "CF-NI".to_string(),
6335                label: "Net Income".to_string(),
6336                category: CashFlowCategory::Operating,
6337                amount: net_income,
6338                amount_prior: None,
6339                sort_order: 1,
6340                is_total: false,
6341            },
6342            CashFlowItem {
6343                item_code: "CF-DEP".to_string(),
6344                label: "Depreciation & Amortization".to_string(),
6345                category: CashFlowCategory::Operating,
6346                amount: depreciation_addback,
6347                amount_prior: None,
6348                sort_order: 2,
6349                is_total: false,
6350            },
6351            CashFlowItem {
6352                item_code: "CF-AR".to_string(),
6353                label: "Change in Accounts Receivable".to_string(),
6354                category: CashFlowCategory::Operating,
6355                amount: -ar_change,
6356                amount_prior: None,
6357                sort_order: 3,
6358                is_total: false,
6359            },
6360            CashFlowItem {
6361                item_code: "CF-AP".to_string(),
6362                label: "Change in Accounts Payable".to_string(),
6363                category: CashFlowCategory::Operating,
6364                amount: -ap_change,
6365                amount_prior: None,
6366                sort_order: 4,
6367                is_total: false,
6368            },
6369            CashFlowItem {
6370                item_code: "CF-INV".to_string(),
6371                label: "Change in Inventory".to_string(),
6372                category: CashFlowCategory::Operating,
6373                amount: -inventory_change,
6374                amount_prior: None,
6375                sort_order: 5,
6376                is_total: false,
6377            },
6378            CashFlowItem {
6379                item_code: "CF-OP".to_string(),
6380                label: "Net Cash from Operating Activities".to_string(),
6381                category: CashFlowCategory::Operating,
6382                amount: operating_cf,
6383                amount_prior: None,
6384                sort_order: 6,
6385                is_total: true,
6386            },
6387            CashFlowItem {
6388                item_code: "CF-CAPEX".to_string(),
6389                label: "Capital Expenditures".to_string(),
6390                category: CashFlowCategory::Investing,
6391                amount: capex,
6392                amount_prior: None,
6393                sort_order: 7,
6394                is_total: false,
6395            },
6396            CashFlowItem {
6397                item_code: "CF-INV-T".to_string(),
6398                label: "Net Cash from Investing Activities".to_string(),
6399                category: CashFlowCategory::Investing,
6400                amount: investing_cf,
6401                amount_prior: None,
6402                sort_order: 8,
6403                is_total: true,
6404            },
6405            CashFlowItem {
6406                item_code: "CF-DEBT".to_string(),
6407                label: "Net Borrowings / (Repayments)".to_string(),
6408                category: CashFlowCategory::Financing,
6409                amount: debt_change,
6410                amount_prior: None,
6411                sort_order: 9,
6412                is_total: false,
6413            },
6414            CashFlowItem {
6415                item_code: "CF-EQ".to_string(),
6416                label: "Equity Changes".to_string(),
6417                category: CashFlowCategory::Financing,
6418                amount: equity_change,
6419                amount_prior: None,
6420                sort_order: 10,
6421                is_total: false,
6422            },
6423            CashFlowItem {
6424                item_code: "CF-FIN-T".to_string(),
6425                label: "Net Cash from Financing Activities".to_string(),
6426                category: CashFlowCategory::Financing,
6427                amount: financing_cf,
6428                amount_prior: None,
6429                sort_order: 11,
6430                is_total: true,
6431            },
6432            CashFlowItem {
6433                item_code: "CF-NET".to_string(),
6434                label: "Net Change in Cash".to_string(),
6435                category: CashFlowCategory::Operating,
6436                amount: net_change,
6437                amount_prior: None,
6438                sort_order: 12,
6439                is_total: true,
6440            },
6441        ]
6442    }
6443
6444    /// Calculate net income from a set of trial balance entries.
6445    ///
6446    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6447    fn calculate_net_income_from_tb(
6448        tb: &[datasynth_generators::TrialBalanceEntry],
6449    ) -> rust_decimal::Decimal {
6450        use rust_decimal::Decimal;
6451
6452        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6453        for entry in tb {
6454            let net = entry.debit_balance - entry.credit_balance;
6455            *aggregated.entry(entry.category.clone()).or_default() += net;
6456        }
6457
6458        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6459        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6460        let opex = *aggregated
6461            .get("OperatingExpenses")
6462            .unwrap_or(&Decimal::ZERO);
6463        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6464        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6465
6466        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6467        // other_income is typically negative (credit), other_expenses is typically positive
6468        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6469        let tax_rate = Decimal::new(25, 2); // 0.25
6470        let tax = operating_income * tax_rate;
6471        operating_income - tax
6472    }
6473
6474    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6475    ///
6476    /// Uses the first two digits of the account code to classify into the categories
6477    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6478    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6479    /// OperatingExpenses, OtherIncome, OtherExpenses.
6480    fn category_from_account_code(code: &str) -> String {
6481        let prefix: String = code.chars().take(2).collect();
6482        match prefix.as_str() {
6483            "10" => "Cash",
6484            "11" => "Receivables",
6485            "12" | "13" | "14" => "Inventory",
6486            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6487            "20" => "Payables",
6488            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6489            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6490            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6491            "40" | "41" | "42" | "43" | "44" => "Revenue",
6492            "50" | "51" | "52" => "CostOfSales",
6493            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6494                "OperatingExpenses"
6495            }
6496            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6497            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6498            _ => "OperatingExpenses",
6499        }
6500        .to_string()
6501    }
6502
6503    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6504    fn phase_hr_data(
6505        &mut self,
6506        stats: &mut EnhancedGenerationStatistics,
6507    ) -> SynthResult<HrSnapshot> {
6508        if !self.phase_config.generate_hr {
6509            debug!("Phase 16: Skipped (HR generation disabled)");
6510            return Ok(HrSnapshot::default());
6511        }
6512
6513        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6514
6515        let seed = self.seed;
6516        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6517            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6518        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6519        let company_code = self
6520            .config
6521            .companies
6522            .first()
6523            .map(|c| c.code.as_str())
6524            .unwrap_or("1000");
6525        let currency = self
6526            .config
6527            .companies
6528            .first()
6529            .map(|c| c.currency.as_str())
6530            .unwrap_or("USD");
6531
6532        let employee_ids: Vec<String> = self
6533            .master_data
6534            .employees
6535            .iter()
6536            .map(|e| e.employee_id.clone())
6537            .collect();
6538
6539        if employee_ids.is_empty() {
6540            debug!("Phase 16: Skipped (no employees available)");
6541            return Ok(HrSnapshot::default());
6542        }
6543
6544        // Extract cost-center pool from master data employees for cross-reference
6545        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6546        let cost_center_ids: Vec<String> = self
6547            .master_data
6548            .employees
6549            .iter()
6550            .filter_map(|e| e.cost_center.clone())
6551            .collect::<std::collections::HashSet<_>>()
6552            .into_iter()
6553            .collect();
6554
6555        let mut snapshot = HrSnapshot::default();
6556
6557        // Generate payroll runs (one per month)
6558        if self.config.hr.payroll.enabled {
6559            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6560                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6561
6562            // Look up country pack for payroll deductions and labels
6563            let payroll_pack = self.primary_pack();
6564
6565            // Store the pack on the generator so generate() resolves
6566            // localized deduction rates and labels from it.
6567            payroll_gen.set_country_pack(payroll_pack.clone());
6568
6569            let employees_with_salary: Vec<(
6570                String,
6571                rust_decimal::Decimal,
6572                Option<String>,
6573                Option<String>,
6574            )> = self
6575                .master_data
6576                .employees
6577                .iter()
6578                .map(|e| {
6579                    // Use the employee's actual annual base salary.
6580                    // Fall back to $60,000 / yr if somehow zero.
6581                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6582                        e.base_salary
6583                    } else {
6584                        rust_decimal::Decimal::from(60_000)
6585                    };
6586                    (
6587                        e.employee_id.clone(),
6588                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6589                        e.cost_center.clone(),
6590                        e.department_id.clone(),
6591                    )
6592                })
6593                .collect();
6594
6595            // Use generate_with_changes when employee change history is available
6596            // so that salary adjustments, transfers, etc. are reflected in payroll.
6597            let change_history = &self.master_data.employee_change_history;
6598            let has_changes = !change_history.is_empty();
6599            if has_changes {
6600                debug!(
6601                    "Payroll will incorporate {} employee change events",
6602                    change_history.len()
6603                );
6604            }
6605
6606            for month in 0..self.config.global.period_months {
6607                let period_start = start_date + chrono::Months::new(month);
6608                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6609                let (run, items) = if has_changes {
6610                    payroll_gen.generate_with_changes(
6611                        company_code,
6612                        &employees_with_salary,
6613                        period_start,
6614                        period_end,
6615                        currency,
6616                        change_history,
6617                    )
6618                } else {
6619                    payroll_gen.generate(
6620                        company_code,
6621                        &employees_with_salary,
6622                        period_start,
6623                        period_end,
6624                        currency,
6625                    )
6626                };
6627                snapshot.payroll_runs.push(run);
6628                snapshot.payroll_run_count += 1;
6629                snapshot.payroll_line_item_count += items.len();
6630                snapshot.payroll_line_items.extend(items);
6631            }
6632        }
6633
6634        // Generate time entries
6635        if self.config.hr.time_attendance.enabled {
6636            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6637                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6638            let entries = time_gen.generate(
6639                &employee_ids,
6640                start_date,
6641                end_date,
6642                &self.config.hr.time_attendance,
6643            );
6644            snapshot.time_entry_count = entries.len();
6645            snapshot.time_entries = entries;
6646        }
6647
6648        // Generate expense reports
6649        if self.config.hr.expenses.enabled {
6650            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6651                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6652            expense_gen.set_country_pack(self.primary_pack().clone());
6653            let company_currency = self
6654                .config
6655                .companies
6656                .first()
6657                .map(|c| c.currency.as_str())
6658                .unwrap_or("USD");
6659            let reports = expense_gen.generate_with_currency(
6660                &employee_ids,
6661                start_date,
6662                end_date,
6663                &self.config.hr.expenses,
6664                company_currency,
6665            );
6666            snapshot.expense_report_count = reports.len();
6667            snapshot.expense_reports = reports;
6668        }
6669
6670        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6671        if self.config.hr.payroll.enabled {
6672            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6673            let employee_pairs: Vec<(String, String)> = self
6674                .master_data
6675                .employees
6676                .iter()
6677                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6678                .collect();
6679            let enrollments =
6680                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6681            snapshot.benefit_enrollment_count = enrollments.len();
6682            snapshot.benefit_enrollments = enrollments;
6683        }
6684
6685        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6686        if self.phase_config.generate_hr {
6687            let entity_name = self
6688                .config
6689                .companies
6690                .first()
6691                .map(|c| c.name.as_str())
6692                .unwrap_or("Entity");
6693            let period_months = self.config.global.period_months;
6694            let period_label = {
6695                let y = start_date.year();
6696                let m = start_date.month();
6697                if period_months >= 12 {
6698                    format!("FY{y}")
6699                } else {
6700                    format!("{y}-{m:02}")
6701                }
6702            };
6703            let reporting_date =
6704                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6705
6706            // Compute average annual salary from actual payroll data when available.
6707            // PayrollRun.total_gross covers all employees for one pay period; we sum
6708            // across all runs and divide by employee_count to get per-employee total,
6709            // then annualise for sub-annual periods.
6710            let avg_salary: Option<rust_decimal::Decimal> = {
6711                let employee_count = employee_ids.len();
6712                if self.config.hr.payroll.enabled
6713                    && employee_count > 0
6714                    && !snapshot.payroll_runs.is_empty()
6715                {
6716                    // Sum total gross pay across all payroll runs for this company
6717                    let total_gross: rust_decimal::Decimal = snapshot
6718                        .payroll_runs
6719                        .iter()
6720                        .filter(|r| r.company_code == company_code)
6721                        .map(|r| r.total_gross)
6722                        .sum();
6723                    if total_gross > rust_decimal::Decimal::ZERO {
6724                        // Annualise: total_gross covers `period_months` months of pay
6725                        let annual_total = if period_months > 0 && period_months < 12 {
6726                            total_gross * rust_decimal::Decimal::from(12u32)
6727                                / rust_decimal::Decimal::from(period_months)
6728                        } else {
6729                            total_gross
6730                        };
6731                        Some(
6732                            (annual_total / rust_decimal::Decimal::from(employee_count))
6733                                .round_dp(2),
6734                        )
6735                    } else {
6736                        None
6737                    }
6738                } else {
6739                    None
6740                }
6741            };
6742
6743            let mut pension_gen =
6744                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6745            let pension_snap = pension_gen.generate(
6746                company_code,
6747                entity_name,
6748                &period_label,
6749                reporting_date,
6750                employee_ids.len(),
6751                currency,
6752                avg_salary,
6753                period_months,
6754            );
6755            snapshot.pension_plan_count = pension_snap.plans.len();
6756            snapshot.pension_plans = pension_snap.plans;
6757            snapshot.pension_obligations = pension_snap.obligations;
6758            snapshot.pension_plan_assets = pension_snap.plan_assets;
6759            snapshot.pension_disclosures = pension_snap.disclosures;
6760            // Pension JEs are returned here so they can be added to entries
6761            // in the caller (stored temporarily on snapshot for transfer).
6762            // We embed them in the hr snapshot for simplicity; the orchestrator
6763            // will extract and extend `entries`.
6764            snapshot.pension_journal_entries = pension_snap.journal_entries;
6765        }
6766
6767        // Generate stock-based compensation (ASC 718 / IFRS 2)
6768        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6769            let period_months = self.config.global.period_months;
6770            let period_label = {
6771                let y = start_date.year();
6772                let m = start_date.month();
6773                if period_months >= 12 {
6774                    format!("FY{y}")
6775                } else {
6776                    format!("{y}-{m:02}")
6777                }
6778            };
6779            let reporting_date =
6780                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6781
6782            let mut stock_comp_gen =
6783                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6784            let stock_snap = stock_comp_gen.generate(
6785                company_code,
6786                &employee_ids,
6787                start_date,
6788                &period_label,
6789                reporting_date,
6790                currency,
6791            );
6792            snapshot.stock_grant_count = stock_snap.grants.len();
6793            snapshot.stock_grants = stock_snap.grants;
6794            snapshot.stock_comp_expenses = stock_snap.expenses;
6795            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6796        }
6797
6798        stats.payroll_run_count = snapshot.payroll_run_count;
6799        stats.time_entry_count = snapshot.time_entry_count;
6800        stats.expense_report_count = snapshot.expense_report_count;
6801        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6802        stats.pension_plan_count = snapshot.pension_plan_count;
6803        stats.stock_grant_count = snapshot.stock_grant_count;
6804
6805        info!(
6806            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6807            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6808            snapshot.time_entry_count, snapshot.expense_report_count,
6809            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6810            snapshot.stock_grant_count
6811        );
6812        self.check_resources_with_log("post-hr")?;
6813
6814        Ok(snapshot)
6815    }
6816
6817    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6818    fn phase_accounting_standards(
6819        &mut self,
6820        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6821        journal_entries: &[JournalEntry],
6822        stats: &mut EnhancedGenerationStatistics,
6823    ) -> SynthResult<AccountingStandardsSnapshot> {
6824        if !self.phase_config.generate_accounting_standards {
6825            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6826            return Ok(AccountingStandardsSnapshot::default());
6827        }
6828        info!("Phase 17: Generating Accounting Standards Data");
6829
6830        let seed = self.seed;
6831        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6832            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6833        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6834        let company_code = self
6835            .config
6836            .companies
6837            .first()
6838            .map(|c| c.code.as_str())
6839            .unwrap_or("1000");
6840        let currency = self
6841            .config
6842            .companies
6843            .first()
6844            .map(|c| c.currency.as_str())
6845            .unwrap_or("USD");
6846
6847        // Convert config framework to standards framework.
6848        // If the user explicitly set a framework in the YAML config, use that.
6849        // Otherwise, fall back to the country pack's accounting.framework field,
6850        // and if that is also absent or unrecognised, default to US GAAP.
6851        let framework = match self.config.accounting_standards.framework {
6852            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6853                datasynth_standards::framework::AccountingFramework::UsGaap
6854            }
6855            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6856                datasynth_standards::framework::AccountingFramework::Ifrs
6857            }
6858            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6859                datasynth_standards::framework::AccountingFramework::DualReporting
6860            }
6861            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6862                datasynth_standards::framework::AccountingFramework::FrenchGaap
6863            }
6864            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6865                datasynth_standards::framework::AccountingFramework::GermanGaap
6866            }
6867            None => {
6868                // Derive framework from the primary company's country pack
6869                let pack = self.primary_pack();
6870                let pack_fw = pack.accounting.framework.as_str();
6871                match pack_fw {
6872                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6873                    "dual_reporting" => {
6874                        datasynth_standards::framework::AccountingFramework::DualReporting
6875                    }
6876                    "french_gaap" => {
6877                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6878                    }
6879                    "german_gaap" | "hgb" => {
6880                        datasynth_standards::framework::AccountingFramework::GermanGaap
6881                    }
6882                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6883                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6884                }
6885            }
6886        };
6887
6888        let mut snapshot = AccountingStandardsSnapshot::default();
6889
6890        // Revenue recognition
6891        if self.config.accounting_standards.revenue_recognition.enabled {
6892            let customer_ids: Vec<String> = self
6893                .master_data
6894                .customers
6895                .iter()
6896                .map(|c| c.customer_id.clone())
6897                .collect();
6898
6899            if !customer_ids.is_empty() {
6900                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6901                let contracts = rev_gen.generate(
6902                    company_code,
6903                    &customer_ids,
6904                    start_date,
6905                    end_date,
6906                    currency,
6907                    &self.config.accounting_standards.revenue_recognition,
6908                    framework,
6909                );
6910                snapshot.revenue_contract_count = contracts.len();
6911                snapshot.contracts = contracts;
6912            }
6913        }
6914
6915        // Impairment testing
6916        if self.config.accounting_standards.impairment.enabled {
6917            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6918                .master_data
6919                .assets
6920                .iter()
6921                .map(|a| {
6922                    (
6923                        a.asset_id.clone(),
6924                        a.description.clone(),
6925                        a.acquisition_cost,
6926                    )
6927                })
6928                .collect();
6929
6930            if !asset_data.is_empty() {
6931                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6932                let tests = imp_gen.generate(
6933                    company_code,
6934                    &asset_data,
6935                    end_date,
6936                    &self.config.accounting_standards.impairment,
6937                    framework,
6938                );
6939                snapshot.impairment_test_count = tests.len();
6940                snapshot.impairment_tests = tests;
6941            }
6942        }
6943
6944        // Business combinations (IFRS 3 / ASC 805)
6945        if self
6946            .config
6947            .accounting_standards
6948            .business_combinations
6949            .enabled
6950        {
6951            let bc_config = &self.config.accounting_standards.business_combinations;
6952            let framework_str = match framework {
6953                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6954                _ => "US_GAAP",
6955            };
6956            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6957            let bc_snap = bc_gen.generate(
6958                company_code,
6959                currency,
6960                start_date,
6961                end_date,
6962                bc_config.acquisition_count,
6963                framework_str,
6964            );
6965            snapshot.business_combination_count = bc_snap.combinations.len();
6966            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6967            snapshot.business_combinations = bc_snap.combinations;
6968        }
6969
6970        // Expected Credit Loss (IFRS 9 / ASC 326)
6971        if self
6972            .config
6973            .accounting_standards
6974            .expected_credit_loss
6975            .enabled
6976        {
6977            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6978            let framework_str = match framework {
6979                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6980                _ => "ASC_326",
6981            };
6982
6983            // Use AR aging data from the subledger snapshot if available;
6984            // otherwise generate synthetic bucket exposures.
6985            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6986
6987            let mut ecl_gen = EclGenerator::new(seed + 43);
6988
6989            // Collect combined bucket totals across all company AR aging reports.
6990            let bucket_exposures: Vec<(
6991                datasynth_core::models::subledger::ar::AgingBucket,
6992                rust_decimal::Decimal,
6993            )> = if ar_aging_reports.is_empty() {
6994                // No AR aging data — synthesise plausible bucket exposures.
6995                use datasynth_core::models::subledger::ar::AgingBucket;
6996                vec![
6997                    (
6998                        AgingBucket::Current,
6999                        rust_decimal::Decimal::from(500_000_u32),
7000                    ),
7001                    (
7002                        AgingBucket::Days1To30,
7003                        rust_decimal::Decimal::from(120_000_u32),
7004                    ),
7005                    (
7006                        AgingBucket::Days31To60,
7007                        rust_decimal::Decimal::from(45_000_u32),
7008                    ),
7009                    (
7010                        AgingBucket::Days61To90,
7011                        rust_decimal::Decimal::from(15_000_u32),
7012                    ),
7013                    (
7014                        AgingBucket::Over90Days,
7015                        rust_decimal::Decimal::from(8_000_u32),
7016                    ),
7017                ]
7018            } else {
7019                use datasynth_core::models::subledger::ar::AgingBucket;
7020                // Sum bucket totals from all reports.
7021                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7022                    std::collections::HashMap::new();
7023                for report in ar_aging_reports {
7024                    for (bucket, amount) in &report.bucket_totals {
7025                        *totals.entry(*bucket).or_default() += amount;
7026                    }
7027                }
7028                AgingBucket::all()
7029                    .into_iter()
7030                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7031                    .collect()
7032            };
7033
7034            let ecl_snap = ecl_gen.generate(
7035                company_code,
7036                end_date,
7037                &bucket_exposures,
7038                ecl_config,
7039                &period_label,
7040                framework_str,
7041            );
7042
7043            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7044            snapshot.ecl_models = ecl_snap.ecl_models;
7045            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7046            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7047        }
7048
7049        // Provisions and contingencies (IAS 37 / ASC 450)
7050        {
7051            let framework_str = match framework {
7052                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7053                _ => "US_GAAP",
7054            };
7055
7056            // Compute actual revenue from the journal entries generated so far.
7057            // The `journal_entries` slice passed to this phase contains all GL entries
7058            // up to and including Period Close. Fall back to a minimum of 100_000 to
7059            // avoid degenerate zero-based provision amounts on first-period datasets.
7060            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7061                .max(rust_decimal::Decimal::from(100_000_u32));
7062
7063            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7064
7065            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7066            let prov_snap = prov_gen.generate(
7067                company_code,
7068                currency,
7069                revenue_proxy,
7070                end_date,
7071                &period_label,
7072                framework_str,
7073                None, // prior_opening: no carry-forward data in single-period runs
7074            );
7075
7076            snapshot.provision_count = prov_snap.provisions.len();
7077            snapshot.provisions = prov_snap.provisions;
7078            snapshot.provision_movements = prov_snap.movements;
7079            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7080            snapshot.provision_journal_entries = prov_snap.journal_entries;
7081        }
7082
7083        // IAS 21 Functional Currency Translation
7084        // For each company whose functional currency differs from the presentation
7085        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7086        {
7087            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7088
7089            let presentation_currency = self
7090                .config
7091                .global
7092                .presentation_currency
7093                .clone()
7094                .unwrap_or_else(|| self.config.global.group_currency.clone());
7095
7096            // Build a minimal rate table populated with approximate rates from
7097            // the FX model base rates (USD-based) so we can do the translation.
7098            let mut rate_table = FxRateTable::new(&presentation_currency);
7099
7100            // Populate with base rates against USD; if presentation_currency is
7101            // not USD we do a best-effort two-step conversion using the table's
7102            // triangulation support.
7103            let base_rates = base_rates_usd();
7104            for (ccy, rate) in &base_rates {
7105                rate_table.add_rate(FxRate::new(
7106                    ccy,
7107                    "USD",
7108                    RateType::Closing,
7109                    end_date,
7110                    *rate,
7111                    "SYNTHETIC",
7112                ));
7113                // Average rate = 98% of closing (approximation).
7114                // 0.98 = 98/100 = Decimal::new(98, 2)
7115                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7116                rate_table.add_rate(FxRate::new(
7117                    ccy,
7118                    "USD",
7119                    RateType::Average,
7120                    end_date,
7121                    avg,
7122                    "SYNTHETIC",
7123                ));
7124            }
7125
7126            let mut translation_results = Vec::new();
7127            for company in &self.config.companies {
7128                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7129                // to ensure the translation produces non-trivial CTA amounts.
7130                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7131                    .max(rust_decimal::Decimal::from(100_000_u32));
7132
7133                let func_ccy = company
7134                    .functional_currency
7135                    .clone()
7136                    .unwrap_or_else(|| company.currency.clone());
7137
7138                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7139                    &company.code,
7140                    &func_ccy,
7141                    &presentation_currency,
7142                    &ias21_period_label,
7143                    end_date,
7144                    company_revenue,
7145                    &rate_table,
7146                );
7147                translation_results.push(result);
7148            }
7149
7150            snapshot.currency_translation_count = translation_results.len();
7151            snapshot.currency_translation_results = translation_results;
7152        }
7153
7154        stats.revenue_contract_count = snapshot.revenue_contract_count;
7155        stats.impairment_test_count = snapshot.impairment_test_count;
7156        stats.business_combination_count = snapshot.business_combination_count;
7157        stats.ecl_model_count = snapshot.ecl_model_count;
7158        stats.provision_count = snapshot.provision_count;
7159
7160        info!(
7161            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
7162            snapshot.revenue_contract_count,
7163            snapshot.impairment_test_count,
7164            snapshot.business_combination_count,
7165            snapshot.ecl_model_count,
7166            snapshot.provision_count,
7167            snapshot.currency_translation_count
7168        );
7169        self.check_resources_with_log("post-accounting-standards")?;
7170
7171        Ok(snapshot)
7172    }
7173
7174    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
7175    fn phase_manufacturing(
7176        &mut self,
7177        stats: &mut EnhancedGenerationStatistics,
7178    ) -> SynthResult<ManufacturingSnapshot> {
7179        if !self.phase_config.generate_manufacturing {
7180            debug!("Phase 18: Skipped (manufacturing generation disabled)");
7181            return Ok(ManufacturingSnapshot::default());
7182        }
7183        info!("Phase 18: Generating Manufacturing Data");
7184
7185        let seed = self.seed;
7186        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7187            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7188        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7189        let company_code = self
7190            .config
7191            .companies
7192            .first()
7193            .map(|c| c.code.as_str())
7194            .unwrap_or("1000");
7195
7196        let material_data: Vec<(String, String)> = self
7197            .master_data
7198            .materials
7199            .iter()
7200            .map(|m| (m.material_id.clone(), m.description.clone()))
7201            .collect();
7202
7203        if material_data.is_empty() {
7204            debug!("Phase 18: Skipped (no materials available)");
7205            return Ok(ManufacturingSnapshot::default());
7206        }
7207
7208        let mut snapshot = ManufacturingSnapshot::default();
7209
7210        // Generate production orders
7211        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
7212        let production_orders = prod_gen.generate(
7213            company_code,
7214            &material_data,
7215            start_date,
7216            end_date,
7217            &self.config.manufacturing.production_orders,
7218            &self.config.manufacturing.costing,
7219            &self.config.manufacturing.routing,
7220        );
7221        snapshot.production_order_count = production_orders.len();
7222
7223        // Generate quality inspections from production orders
7224        let inspection_data: Vec<(String, String, String)> = production_orders
7225            .iter()
7226            .map(|po| {
7227                (
7228                    po.order_id.clone(),
7229                    po.material_id.clone(),
7230                    po.material_description.clone(),
7231                )
7232            })
7233            .collect();
7234
7235        snapshot.production_orders = production_orders;
7236
7237        if !inspection_data.is_empty() {
7238            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
7239            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
7240            snapshot.quality_inspection_count = inspections.len();
7241            snapshot.quality_inspections = inspections;
7242        }
7243
7244        // Generate cycle counts (one per month)
7245        let storage_locations: Vec<(String, String)> = material_data
7246            .iter()
7247            .enumerate()
7248            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
7249            .collect();
7250
7251        let employee_ids: Vec<String> = self
7252            .master_data
7253            .employees
7254            .iter()
7255            .map(|e| e.employee_id.clone())
7256            .collect();
7257        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
7258            .with_employee_pool(employee_ids);
7259        let mut cycle_count_total = 0usize;
7260        for month in 0..self.config.global.period_months {
7261            let count_date = start_date + chrono::Months::new(month);
7262            let items_per_count = storage_locations.len().clamp(10, 50);
7263            let cc = cc_gen.generate(
7264                company_code,
7265                &storage_locations,
7266                count_date,
7267                items_per_count,
7268            );
7269            snapshot.cycle_counts.push(cc);
7270            cycle_count_total += 1;
7271        }
7272        snapshot.cycle_count_count = cycle_count_total;
7273
7274        // Generate BOM components
7275        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
7276        let bom_components = bom_gen.generate(company_code, &material_data);
7277        snapshot.bom_component_count = bom_components.len();
7278        snapshot.bom_components = bom_components;
7279
7280        // Generate inventory movements — link GoodsIssue movements to real production order IDs
7281        let currency = self
7282            .config
7283            .companies
7284            .first()
7285            .map(|c| c.currency.as_str())
7286            .unwrap_or("USD");
7287        let production_order_ids: Vec<String> = snapshot
7288            .production_orders
7289            .iter()
7290            .map(|po| po.order_id.clone())
7291            .collect();
7292        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
7293        let inventory_movements = inv_mov_gen.generate_with_production_orders(
7294            company_code,
7295            &material_data,
7296            start_date,
7297            end_date,
7298            2,
7299            currency,
7300            &production_order_ids,
7301        );
7302        snapshot.inventory_movement_count = inventory_movements.len();
7303        snapshot.inventory_movements = inventory_movements;
7304
7305        stats.production_order_count = snapshot.production_order_count;
7306        stats.quality_inspection_count = snapshot.quality_inspection_count;
7307        stats.cycle_count_count = snapshot.cycle_count_count;
7308        stats.bom_component_count = snapshot.bom_component_count;
7309        stats.inventory_movement_count = snapshot.inventory_movement_count;
7310
7311        info!(
7312            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
7313            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
7314            snapshot.bom_component_count, snapshot.inventory_movement_count
7315        );
7316        self.check_resources_with_log("post-manufacturing")?;
7317
7318        Ok(snapshot)
7319    }
7320
7321    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
7322    fn phase_sales_kpi_budgets(
7323        &mut self,
7324        coa: &Arc<ChartOfAccounts>,
7325        financial_reporting: &FinancialReportingSnapshot,
7326        stats: &mut EnhancedGenerationStatistics,
7327    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
7328        if !self.phase_config.generate_sales_kpi_budgets {
7329            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
7330            return Ok(SalesKpiBudgetsSnapshot::default());
7331        }
7332        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
7333
7334        let seed = self.seed;
7335        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7336            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7337        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7338        let company_code = self
7339            .config
7340            .companies
7341            .first()
7342            .map(|c| c.code.as_str())
7343            .unwrap_or("1000");
7344
7345        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7346
7347        // Sales Quotes
7348        if self.config.sales_quotes.enabled {
7349            let customer_data: Vec<(String, String)> = self
7350                .master_data
7351                .customers
7352                .iter()
7353                .map(|c| (c.customer_id.clone(), c.name.clone()))
7354                .collect();
7355            let material_data: Vec<(String, String)> = self
7356                .master_data
7357                .materials
7358                .iter()
7359                .map(|m| (m.material_id.clone(), m.description.clone()))
7360                .collect();
7361
7362            if !customer_data.is_empty() && !material_data.is_empty() {
7363                let employee_ids: Vec<String> = self
7364                    .master_data
7365                    .employees
7366                    .iter()
7367                    .map(|e| e.employee_id.clone())
7368                    .collect();
7369                let customer_ids: Vec<String> = self
7370                    .master_data
7371                    .customers
7372                    .iter()
7373                    .map(|c| c.customer_id.clone())
7374                    .collect();
7375                let company_currency = self
7376                    .config
7377                    .companies
7378                    .first()
7379                    .map(|c| c.currency.as_str())
7380                    .unwrap_or("USD");
7381
7382                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7383                    .with_pools(employee_ids, customer_ids);
7384                let quotes = quote_gen.generate_with_currency(
7385                    company_code,
7386                    &customer_data,
7387                    &material_data,
7388                    start_date,
7389                    end_date,
7390                    &self.config.sales_quotes,
7391                    company_currency,
7392                );
7393                snapshot.sales_quote_count = quotes.len();
7394                snapshot.sales_quotes = quotes;
7395            }
7396        }
7397
7398        // Management KPIs
7399        if self.config.financial_reporting.management_kpis.enabled {
7400            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7401            let mut kpis = kpi_gen.generate(
7402                company_code,
7403                start_date,
7404                end_date,
7405                &self.config.financial_reporting.management_kpis,
7406            );
7407
7408            // Override financial KPIs with actual data from financial statements
7409            {
7410                use rust_decimal::Decimal;
7411
7412                if let Some(income_stmt) =
7413                    financial_reporting.financial_statements.iter().find(|fs| {
7414                        fs.statement_type == StatementType::IncomeStatement
7415                            && fs.company_code == company_code
7416                    })
7417                {
7418                    // Extract revenue and COGS from income statement line items
7419                    let total_revenue: Decimal = income_stmt
7420                        .line_items
7421                        .iter()
7422                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7423                        .map(|li| li.amount)
7424                        .sum();
7425                    let total_cogs: Decimal = income_stmt
7426                        .line_items
7427                        .iter()
7428                        .filter(|li| {
7429                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7430                                && !li.is_total
7431                        })
7432                        .map(|li| li.amount.abs())
7433                        .sum();
7434                    let total_opex: Decimal = income_stmt
7435                        .line_items
7436                        .iter()
7437                        .filter(|li| {
7438                            li.section.contains("Expense")
7439                                && !li.is_total
7440                                && !li.section.contains("Cost")
7441                        })
7442                        .map(|li| li.amount.abs())
7443                        .sum();
7444
7445                    if total_revenue > Decimal::ZERO {
7446                        let hundred = Decimal::from(100);
7447                        let gross_margin_pct =
7448                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7449                        let operating_income = total_revenue - total_cogs - total_opex;
7450                        let op_margin_pct =
7451                            (operating_income * hundred / total_revenue).round_dp(2);
7452
7453                        // Override gross margin and operating margin KPIs
7454                        for kpi in &mut kpis {
7455                            if kpi.name == "Gross Margin" {
7456                                kpi.value = gross_margin_pct;
7457                            } else if kpi.name == "Operating Margin" {
7458                                kpi.value = op_margin_pct;
7459                            }
7460                        }
7461                    }
7462                }
7463
7464                // Override Current Ratio from balance sheet
7465                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7466                    fs.statement_type == StatementType::BalanceSheet
7467                        && fs.company_code == company_code
7468                }) {
7469                    let current_assets: Decimal = bs
7470                        .line_items
7471                        .iter()
7472                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7473                        .map(|li| li.amount)
7474                        .sum();
7475                    let current_liabilities: Decimal = bs
7476                        .line_items
7477                        .iter()
7478                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7479                        .map(|li| li.amount.abs())
7480                        .sum();
7481
7482                    if current_liabilities > Decimal::ZERO {
7483                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7484                        for kpi in &mut kpis {
7485                            if kpi.name == "Current Ratio" {
7486                                kpi.value = current_ratio;
7487                            }
7488                        }
7489                    }
7490                }
7491            }
7492
7493            snapshot.kpi_count = kpis.len();
7494            snapshot.kpis = kpis;
7495        }
7496
7497        // Budgets
7498        if self.config.financial_reporting.budgets.enabled {
7499            let account_data: Vec<(String, String)> = coa
7500                .accounts
7501                .iter()
7502                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7503                .collect();
7504
7505            if !account_data.is_empty() {
7506                let fiscal_year = start_date.year() as u32;
7507                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7508                let budget = budget_gen.generate(
7509                    company_code,
7510                    fiscal_year,
7511                    &account_data,
7512                    &self.config.financial_reporting.budgets,
7513                );
7514                snapshot.budget_line_count = budget.line_items.len();
7515                snapshot.budgets.push(budget);
7516            }
7517        }
7518
7519        stats.sales_quote_count = snapshot.sales_quote_count;
7520        stats.kpi_count = snapshot.kpi_count;
7521        stats.budget_line_count = snapshot.budget_line_count;
7522
7523        info!(
7524            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7525            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7526        );
7527        self.check_resources_with_log("post-sales-kpi-budgets")?;
7528
7529        Ok(snapshot)
7530    }
7531
7532    /// Compute pre-tax income for a single company from actual journal entries.
7533    ///
7534    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7535    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7536    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7537    /// and the period-close engine so that all three use a consistent definition.
7538    fn compute_pre_tax_income(
7539        company_code: &str,
7540        journal_entries: &[JournalEntry],
7541    ) -> rust_decimal::Decimal {
7542        use datasynth_core::accounts::AccountCategory;
7543        use rust_decimal::Decimal;
7544
7545        let mut total_revenue = Decimal::ZERO;
7546        let mut total_expenses = Decimal::ZERO;
7547
7548        for je in journal_entries {
7549            if je.header.company_code != company_code {
7550                continue;
7551            }
7552            for line in &je.lines {
7553                let cat = AccountCategory::from_account(&line.gl_account);
7554                match cat {
7555                    AccountCategory::Revenue => {
7556                        total_revenue += line.credit_amount - line.debit_amount;
7557                    }
7558                    AccountCategory::Cogs
7559                    | AccountCategory::OperatingExpense
7560                    | AccountCategory::OtherIncomeExpense => {
7561                        total_expenses += line.debit_amount - line.credit_amount;
7562                    }
7563                    _ => {}
7564                }
7565            }
7566        }
7567
7568        let pti = (total_revenue - total_expenses).round_dp(2);
7569        if pti == rust_decimal::Decimal::ZERO {
7570            // No income statement activity yet — fall back to a synthetic value so the
7571            // tax provision generator can still produce meaningful output.
7572            rust_decimal::Decimal::from(1_000_000u32)
7573        } else {
7574            pti
7575        }
7576    }
7577
7578    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7579    fn phase_tax_generation(
7580        &mut self,
7581        document_flows: &DocumentFlowSnapshot,
7582        journal_entries: &[JournalEntry],
7583        stats: &mut EnhancedGenerationStatistics,
7584    ) -> SynthResult<TaxSnapshot> {
7585        if !self.phase_config.generate_tax {
7586            debug!("Phase 20: Skipped (tax generation disabled)");
7587            return Ok(TaxSnapshot::default());
7588        }
7589        info!("Phase 20: Generating Tax Data");
7590
7591        let seed = self.seed;
7592        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7593            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7594        let fiscal_year = start_date.year();
7595        let company_code = self
7596            .config
7597            .companies
7598            .first()
7599            .map(|c| c.code.as_str())
7600            .unwrap_or("1000");
7601
7602        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7603            seed + 370,
7604            self.config.tax.clone(),
7605        );
7606
7607        let pack = self.primary_pack().clone();
7608        let (jurisdictions, codes) =
7609            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7610
7611        // Generate tax provisions for each company
7612        let mut provisions = Vec::new();
7613        if self.config.tax.provisions.enabled {
7614            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7615            for company in &self.config.companies {
7616                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7617                let statutory_rate = rust_decimal::Decimal::new(
7618                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7619                    2,
7620                );
7621                let provision = provision_gen.generate(
7622                    &company.code,
7623                    start_date,
7624                    pre_tax_income,
7625                    statutory_rate,
7626                );
7627                provisions.push(provision);
7628            }
7629        }
7630
7631        // Generate tax lines from document invoices
7632        let mut tax_lines = Vec::new();
7633        if !codes.is_empty() {
7634            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7635                datasynth_generators::TaxLineGeneratorConfig::default(),
7636                codes.clone(),
7637                seed + 372,
7638            );
7639
7640            // Tax lines from vendor invoices (input tax)
7641            // Use the first company's country as buyer country
7642            let buyer_country = self
7643                .config
7644                .companies
7645                .first()
7646                .map(|c| c.country.as_str())
7647                .unwrap_or("US");
7648            for vi in &document_flows.vendor_invoices {
7649                let lines = tax_line_gen.generate_for_document(
7650                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7651                    &vi.header.document_id,
7652                    buyer_country, // seller approx same country
7653                    buyer_country,
7654                    vi.payable_amount,
7655                    vi.header.document_date,
7656                    None,
7657                );
7658                tax_lines.extend(lines);
7659            }
7660
7661            // Tax lines from customer invoices (output tax)
7662            for ci in &document_flows.customer_invoices {
7663                let lines = tax_line_gen.generate_for_document(
7664                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7665                    &ci.header.document_id,
7666                    buyer_country, // seller is the company
7667                    buyer_country,
7668                    ci.total_gross_amount,
7669                    ci.header.document_date,
7670                    None,
7671                );
7672                tax_lines.extend(lines);
7673            }
7674        }
7675
7676        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7677        let deferred_tax = {
7678            let companies: Vec<(&str, &str)> = self
7679                .config
7680                .companies
7681                .iter()
7682                .map(|c| (c.code.as_str(), c.country.as_str()))
7683                .collect();
7684            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7685            deferred_gen.generate(&companies, start_date, journal_entries)
7686        };
7687
7688        // Build a document_id → posting_date map so each tax JE uses its
7689        // source document's date rather than a blanket period-end date.
7690        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7691            std::collections::HashMap::new();
7692        for vi in &document_flows.vendor_invoices {
7693            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7694        }
7695        for ci in &document_flows.customer_invoices {
7696            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7697        }
7698
7699        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7700        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7701        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7702            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7703                &tax_lines,
7704                company_code,
7705                &doc_dates,
7706                end_date,
7707            );
7708            debug!("Generated {} tax posting JEs", jes.len());
7709            jes
7710        } else {
7711            Vec::new()
7712        };
7713
7714        let snapshot = TaxSnapshot {
7715            jurisdiction_count: jurisdictions.len(),
7716            code_count: codes.len(),
7717            jurisdictions,
7718            codes,
7719            tax_provisions: provisions,
7720            tax_lines,
7721            tax_returns: Vec::new(),
7722            withholding_records: Vec::new(),
7723            tax_anomaly_labels: Vec::new(),
7724            deferred_tax,
7725            tax_posting_journal_entries,
7726        };
7727
7728        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7729        stats.tax_code_count = snapshot.code_count;
7730        stats.tax_provision_count = snapshot.tax_provisions.len();
7731        stats.tax_line_count = snapshot.tax_lines.len();
7732
7733        info!(
7734            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7735            snapshot.jurisdiction_count,
7736            snapshot.code_count,
7737            snapshot.tax_provisions.len(),
7738            snapshot.deferred_tax.temporary_differences.len(),
7739            snapshot.deferred_tax.journal_entries.len(),
7740            snapshot.tax_posting_journal_entries.len(),
7741        );
7742        self.check_resources_with_log("post-tax")?;
7743
7744        Ok(snapshot)
7745    }
7746
7747    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7748    fn phase_esg_generation(
7749        &mut self,
7750        document_flows: &DocumentFlowSnapshot,
7751        manufacturing: &ManufacturingSnapshot,
7752        stats: &mut EnhancedGenerationStatistics,
7753    ) -> SynthResult<EsgSnapshot> {
7754        if !self.phase_config.generate_esg {
7755            debug!("Phase 21: Skipped (ESG generation disabled)");
7756            return Ok(EsgSnapshot::default());
7757        }
7758        let degradation = self.check_resources()?;
7759        if degradation >= DegradationLevel::Reduced {
7760            debug!(
7761                "Phase skipped due to resource pressure (degradation: {:?})",
7762                degradation
7763            );
7764            return Ok(EsgSnapshot::default());
7765        }
7766        info!("Phase 21: Generating ESG Data");
7767
7768        let seed = self.seed;
7769        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7770            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7771        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7772        let entity_id = self
7773            .config
7774            .companies
7775            .first()
7776            .map(|c| c.code.as_str())
7777            .unwrap_or("1000");
7778
7779        let esg_cfg = &self.config.esg;
7780        let mut snapshot = EsgSnapshot::default();
7781
7782        // Energy consumption (feeds into scope 1 & 2 emissions)
7783        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7784            esg_cfg.environmental.energy.clone(),
7785            seed + 80,
7786        );
7787        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7788
7789        // Water usage
7790        let facility_count = esg_cfg.environmental.energy.facility_count;
7791        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7792        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7793
7794        // Waste
7795        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7796            seed + 82,
7797            esg_cfg.environmental.waste.diversion_target,
7798            facility_count,
7799        );
7800        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7801
7802        // Emissions (scope 1, 2, 3)
7803        let mut emission_gen =
7804            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7805
7806        // Build EnergyInput from energy_records
7807        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7808            .iter()
7809            .map(|e| datasynth_generators::EnergyInput {
7810                facility_id: e.facility_id.clone(),
7811                energy_type: match e.energy_source {
7812                    EnergySourceType::NaturalGas => {
7813                        datasynth_generators::EnergyInputType::NaturalGas
7814                    }
7815                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7816                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7817                    _ => datasynth_generators::EnergyInputType::Electricity,
7818                },
7819                consumption_kwh: e.consumption_kwh,
7820                period: e.period,
7821            })
7822            .collect();
7823
7824        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7825        if !manufacturing.production_orders.is_empty() {
7826            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7827                &manufacturing.production_orders,
7828                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7829                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7830            );
7831            if !mfg_energy.is_empty() {
7832                info!(
7833                    "ESG: {} energy inputs derived from {} production orders",
7834                    mfg_energy.len(),
7835                    manufacturing.production_orders.len(),
7836                );
7837                energy_inputs.extend(mfg_energy);
7838            }
7839        }
7840
7841        let mut emissions = Vec::new();
7842        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7843        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7844
7845        // Scope 3: use vendor spend data from actual payments
7846        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7847            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7848            for payment in &document_flows.payments {
7849                if payment.is_vendor {
7850                    *totals
7851                        .entry(payment.business_partner_id.clone())
7852                        .or_default() += payment.amount;
7853                }
7854            }
7855            totals
7856        };
7857        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7858            .master_data
7859            .vendors
7860            .iter()
7861            .map(|v| {
7862                let spend = vendor_payment_totals
7863                    .get(&v.vendor_id)
7864                    .copied()
7865                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7866                datasynth_generators::VendorSpendInput {
7867                    vendor_id: v.vendor_id.clone(),
7868                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7869                    spend,
7870                    country: v.country.clone(),
7871                }
7872            })
7873            .collect();
7874        if !vendor_spend.is_empty() {
7875            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7876                entity_id,
7877                &vendor_spend,
7878                start_date,
7879                end_date,
7880            ));
7881        }
7882
7883        // Business travel & commuting (scope 3)
7884        let headcount = self.master_data.employees.len() as u32;
7885        if headcount > 0 {
7886            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7887            emissions.extend(emission_gen.generate_scope3_business_travel(
7888                entity_id,
7889                travel_spend,
7890                start_date,
7891            ));
7892            emissions
7893                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7894        }
7895
7896        snapshot.emission_count = emissions.len();
7897        snapshot.emissions = emissions;
7898        snapshot.energy = energy_records;
7899
7900        // Social: Workforce diversity, pay equity, safety
7901        let mut workforce_gen =
7902            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7903        let total_headcount = headcount.max(100);
7904        snapshot.diversity =
7905            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7906        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7907
7908        // v2.4: Derive additional workforce diversity metrics from actual employee data
7909        if !self.master_data.employees.is_empty() {
7910            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7911                entity_id,
7912                &self.master_data.employees,
7913                end_date,
7914            );
7915            if !hr_diversity.is_empty() {
7916                info!(
7917                    "ESG: {} diversity metrics derived from {} actual employees",
7918                    hr_diversity.len(),
7919                    self.master_data.employees.len(),
7920                );
7921                snapshot.diversity.extend(hr_diversity);
7922            }
7923        }
7924
7925        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7926            entity_id,
7927            facility_count,
7928            start_date,
7929            end_date,
7930        );
7931
7932        // Compute safety metrics
7933        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7934        let safety_metric = workforce_gen.compute_safety_metrics(
7935            entity_id,
7936            &snapshot.safety_incidents,
7937            total_hours,
7938            start_date,
7939        );
7940        snapshot.safety_metrics = vec![safety_metric];
7941
7942        // Governance
7943        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7944            seed + 85,
7945            esg_cfg.governance.board_size,
7946            esg_cfg.governance.independence_target,
7947        );
7948        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7949
7950        // Supplier ESG assessments
7951        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7952            esg_cfg.supply_chain_esg.clone(),
7953            seed + 86,
7954        );
7955        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7956            .master_data
7957            .vendors
7958            .iter()
7959            .map(|v| datasynth_generators::VendorInput {
7960                vendor_id: v.vendor_id.clone(),
7961                country: v.country.clone(),
7962                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7963                quality_score: None,
7964            })
7965            .collect();
7966        snapshot.supplier_assessments =
7967            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7968
7969        // Disclosures
7970        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7971            seed + 87,
7972            esg_cfg.reporting.clone(),
7973            esg_cfg.climate_scenarios.clone(),
7974        );
7975        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7976        snapshot.disclosures = disclosure_gen.generate_disclosures(
7977            entity_id,
7978            &snapshot.materiality,
7979            start_date,
7980            end_date,
7981        );
7982        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7983        snapshot.disclosure_count = snapshot.disclosures.len();
7984
7985        // Anomaly injection
7986        if esg_cfg.anomaly_rate > 0.0 {
7987            let mut anomaly_injector =
7988                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7989            let mut labels = Vec::new();
7990            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7991            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7992            labels.extend(
7993                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7994            );
7995            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7996            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7997            snapshot.anomaly_labels = labels;
7998        }
7999
8000        stats.esg_emission_count = snapshot.emission_count;
8001        stats.esg_disclosure_count = snapshot.disclosure_count;
8002
8003        info!(
8004            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8005            snapshot.emission_count,
8006            snapshot.disclosure_count,
8007            snapshot.supplier_assessments.len()
8008        );
8009        self.check_resources_with_log("post-esg")?;
8010
8011        Ok(snapshot)
8012    }
8013
8014    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8015    fn phase_treasury_data(
8016        &mut self,
8017        document_flows: &DocumentFlowSnapshot,
8018        subledger: &SubledgerSnapshot,
8019        intercompany: &IntercompanySnapshot,
8020        stats: &mut EnhancedGenerationStatistics,
8021    ) -> SynthResult<TreasurySnapshot> {
8022        if !self.phase_config.generate_treasury {
8023            debug!("Phase 22: Skipped (treasury generation disabled)");
8024            return Ok(TreasurySnapshot::default());
8025        }
8026        let degradation = self.check_resources()?;
8027        if degradation >= DegradationLevel::Reduced {
8028            debug!(
8029                "Phase skipped due to resource pressure (degradation: {:?})",
8030                degradation
8031            );
8032            return Ok(TreasurySnapshot::default());
8033        }
8034        info!("Phase 22: Generating Treasury Data");
8035
8036        let seed = self.seed;
8037        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8038            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8039        let currency = self
8040            .config
8041            .companies
8042            .first()
8043            .map(|c| c.currency.as_str())
8044            .unwrap_or("USD");
8045        let entity_id = self
8046            .config
8047            .companies
8048            .first()
8049            .map(|c| c.code.as_str())
8050            .unwrap_or("1000");
8051
8052        let mut snapshot = TreasurySnapshot::default();
8053
8054        // Generate debt instruments
8055        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8056            self.config.treasury.debt.clone(),
8057            seed + 90,
8058        );
8059        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8060
8061        // Generate hedging instruments (IR swaps for floating-rate debt)
8062        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8063            self.config.treasury.hedging.clone(),
8064            seed + 91,
8065        );
8066        for debt in &snapshot.debt_instruments {
8067            if debt.rate_type == InterestRateType::Variable {
8068                let swap = hedge_gen.generate_ir_swap(
8069                    currency,
8070                    debt.principal,
8071                    debt.origination_date,
8072                    debt.maturity_date,
8073                );
8074                snapshot.hedging_instruments.push(swap);
8075            }
8076        }
8077
8078        // Build FX exposures from foreign-currency payments and generate
8079        // FX forwards + hedge relationship designations via generate() API.
8080        {
8081            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8082            for payment in &document_flows.payments {
8083                if payment.currency != currency {
8084                    let entry = fx_map
8085                        .entry(payment.currency.clone())
8086                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8087                    entry.0 += payment.amount;
8088                    // Use the latest settlement date among grouped payments
8089                    if payment.header.document_date > entry.1 {
8090                        entry.1 = payment.header.document_date;
8091                    }
8092                }
8093            }
8094            if !fx_map.is_empty() {
8095                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
8096                    .into_iter()
8097                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
8098                        datasynth_generators::treasury::FxExposure {
8099                            currency_pair: format!("{foreign_ccy}/{currency}"),
8100                            foreign_currency: foreign_ccy,
8101                            net_amount,
8102                            settlement_date,
8103                            description: "AP payment FX exposure".to_string(),
8104                        }
8105                    })
8106                    .collect();
8107                let (fx_instruments, fx_relationships) =
8108                    hedge_gen.generate(start_date, &fx_exposures);
8109                snapshot.hedging_instruments.extend(fx_instruments);
8110                snapshot.hedge_relationships.extend(fx_relationships);
8111            }
8112        }
8113
8114        // Inject anomalies if configured
8115        if self.config.treasury.anomaly_rate > 0.0 {
8116            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
8117                seed + 92,
8118                self.config.treasury.anomaly_rate,
8119            );
8120            let mut labels = Vec::new();
8121            labels.extend(
8122                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
8123            );
8124            snapshot.treasury_anomaly_labels = labels;
8125        }
8126
8127        // Generate cash positions from payment flows
8128        if self.config.treasury.cash_positioning.enabled {
8129            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
8130
8131            // AP payments as outflows
8132            for payment in &document_flows.payments {
8133                cash_flows.push(datasynth_generators::treasury::CashFlow {
8134                    date: payment.header.document_date,
8135                    account_id: format!("{entity_id}-MAIN"),
8136                    amount: payment.amount,
8137                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
8138                });
8139            }
8140
8141            // Customer receipts (from O2C chains) as inflows
8142            for chain in &document_flows.o2c_chains {
8143                if let Some(ref receipt) = chain.customer_receipt {
8144                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8145                        date: receipt.header.document_date,
8146                        account_id: format!("{entity_id}-MAIN"),
8147                        amount: receipt.amount,
8148                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8149                    });
8150                }
8151                // Remainder receipts (follow-up to partial payments)
8152                for receipt in &chain.remainder_receipts {
8153                    cash_flows.push(datasynth_generators::treasury::CashFlow {
8154                        date: receipt.header.document_date,
8155                        account_id: format!("{entity_id}-MAIN"),
8156                        amount: receipt.amount,
8157                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
8158                    });
8159                }
8160            }
8161
8162            if !cash_flows.is_empty() {
8163                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
8164                    self.config.treasury.cash_positioning.clone(),
8165                    seed + 93,
8166                );
8167                let account_id = format!("{entity_id}-MAIN");
8168                snapshot.cash_positions = cash_gen.generate(
8169                    entity_id,
8170                    &account_id,
8171                    currency,
8172                    &cash_flows,
8173                    start_date,
8174                    start_date + chrono::Months::new(self.config.global.period_months),
8175                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
8176                );
8177            }
8178        }
8179
8180        // Generate cash forecasts from AR/AP aging
8181        if self.config.treasury.cash_forecasting.enabled {
8182            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8183
8184            // Build AR aging items from subledger AR invoices
8185            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
8186                .ar_invoices
8187                .iter()
8188                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8189                .map(|inv| {
8190                    let days_past_due = if inv.due_date < end_date {
8191                        (end_date - inv.due_date).num_days().max(0) as u32
8192                    } else {
8193                        0
8194                    };
8195                    datasynth_generators::treasury::ArAgingItem {
8196                        expected_date: inv.due_date,
8197                        amount: inv.amount_remaining,
8198                        days_past_due,
8199                        document_id: inv.invoice_number.clone(),
8200                    }
8201                })
8202                .collect();
8203
8204            // Build AP aging items from subledger AP invoices
8205            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
8206                .ap_invoices
8207                .iter()
8208                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
8209                .map(|inv| datasynth_generators::treasury::ApAgingItem {
8210                    payment_date: inv.due_date,
8211                    amount: inv.amount_remaining,
8212                    document_id: inv.invoice_number.clone(),
8213                })
8214                .collect();
8215
8216            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
8217                self.config.treasury.cash_forecasting.clone(),
8218                seed + 94,
8219            );
8220            let forecast = forecast_gen.generate(
8221                entity_id,
8222                currency,
8223                end_date,
8224                &ar_items,
8225                &ap_items,
8226                &[], // scheduled disbursements - empty for now
8227            );
8228            snapshot.cash_forecasts.push(forecast);
8229        }
8230
8231        // Generate cash pools and sweeps
8232        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
8233            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8234            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
8235                self.config.treasury.cash_pooling.clone(),
8236                seed + 95,
8237            );
8238
8239            // Create a pool from available accounts
8240            let account_ids: Vec<String> = snapshot
8241                .cash_positions
8242                .iter()
8243                .map(|cp| cp.bank_account_id.clone())
8244                .collect::<std::collections::HashSet<_>>()
8245                .into_iter()
8246                .collect();
8247
8248            if let Some(pool) =
8249                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
8250            {
8251                // Generate sweeps - build participant balances from last cash position per account
8252                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8253                for cp in &snapshot.cash_positions {
8254                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
8255                }
8256
8257                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
8258                    latest_balances
8259                        .into_iter()
8260                        .filter(|(id, _)| pool.participant_accounts.contains(id))
8261                        .map(
8262                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
8263                                account_id: id,
8264                                balance,
8265                            },
8266                        )
8267                        .collect();
8268
8269                let sweeps =
8270                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
8271                snapshot.cash_pool_sweeps = sweeps;
8272                snapshot.cash_pools.push(pool);
8273            }
8274        }
8275
8276        // Generate bank guarantees
8277        if self.config.treasury.bank_guarantees.enabled {
8278            let vendor_names: Vec<String> = self
8279                .master_data
8280                .vendors
8281                .iter()
8282                .map(|v| v.name.clone())
8283                .collect();
8284            if !vendor_names.is_empty() {
8285                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
8286                    self.config.treasury.bank_guarantees.clone(),
8287                    seed + 96,
8288                );
8289                snapshot.bank_guarantees =
8290                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
8291            }
8292        }
8293
8294        // Generate netting runs from intercompany matched pairs
8295        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
8296            let entity_ids: Vec<String> = self
8297                .config
8298                .companies
8299                .iter()
8300                .map(|c| c.code.clone())
8301                .collect();
8302            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
8303                .matched_pairs
8304                .iter()
8305                .map(|mp| {
8306                    (
8307                        mp.seller_company.clone(),
8308                        mp.buyer_company.clone(),
8309                        mp.amount,
8310                    )
8311                })
8312                .collect();
8313            if entity_ids.len() >= 2 {
8314                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
8315                    self.config.treasury.netting.clone(),
8316                    seed + 97,
8317                );
8318                snapshot.netting_runs = netting_gen.generate(
8319                    &entity_ids,
8320                    currency,
8321                    start_date,
8322                    self.config.global.period_months,
8323                    &ic_amounts,
8324                );
8325            }
8326        }
8327
8328        // Generate treasury journal entries from the instruments we just created.
8329        {
8330            use datasynth_generators::treasury::TreasuryAccounting;
8331
8332            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8333            let mut treasury_jes = Vec::new();
8334
8335            // Debt interest accrual JEs
8336            if !snapshot.debt_instruments.is_empty() {
8337                let debt_jes =
8338                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
8339                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
8340                treasury_jes.extend(debt_jes);
8341            }
8342
8343            // Hedge mark-to-market JEs
8344            if !snapshot.hedging_instruments.is_empty() {
8345                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8346                    &snapshot.hedging_instruments,
8347                    &snapshot.hedge_relationships,
8348                    end_date,
8349                    entity_id,
8350                );
8351                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8352                treasury_jes.extend(hedge_jes);
8353            }
8354
8355            // Cash pool sweep JEs
8356            if !snapshot.cash_pool_sweeps.is_empty() {
8357                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8358                    &snapshot.cash_pool_sweeps,
8359                    entity_id,
8360                );
8361                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8362                treasury_jes.extend(sweep_jes);
8363            }
8364
8365            if !treasury_jes.is_empty() {
8366                debug!("Total treasury journal entries: {}", treasury_jes.len());
8367            }
8368            snapshot.journal_entries = treasury_jes;
8369        }
8370
8371        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8372        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8373        stats.cash_position_count = snapshot.cash_positions.len();
8374        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8375        stats.cash_pool_count = snapshot.cash_pools.len();
8376
8377        info!(
8378            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8379            snapshot.debt_instruments.len(),
8380            snapshot.hedging_instruments.len(),
8381            snapshot.cash_positions.len(),
8382            snapshot.cash_forecasts.len(),
8383            snapshot.cash_pools.len(),
8384            snapshot.bank_guarantees.len(),
8385            snapshot.netting_runs.len(),
8386            snapshot.journal_entries.len(),
8387        );
8388        self.check_resources_with_log("post-treasury")?;
8389
8390        Ok(snapshot)
8391    }
8392
8393    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8394    fn phase_project_accounting(
8395        &mut self,
8396        document_flows: &DocumentFlowSnapshot,
8397        hr: &HrSnapshot,
8398        stats: &mut EnhancedGenerationStatistics,
8399    ) -> SynthResult<ProjectAccountingSnapshot> {
8400        if !self.phase_config.generate_project_accounting {
8401            debug!("Phase 23: Skipped (project accounting disabled)");
8402            return Ok(ProjectAccountingSnapshot::default());
8403        }
8404        let degradation = self.check_resources()?;
8405        if degradation >= DegradationLevel::Reduced {
8406            debug!(
8407                "Phase skipped due to resource pressure (degradation: {:?})",
8408                degradation
8409            );
8410            return Ok(ProjectAccountingSnapshot::default());
8411        }
8412        info!("Phase 23: Generating Project Accounting Data");
8413
8414        let seed = self.seed;
8415        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8416            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8417        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8418        let company_code = self
8419            .config
8420            .companies
8421            .first()
8422            .map(|c| c.code.as_str())
8423            .unwrap_or("1000");
8424
8425        let mut snapshot = ProjectAccountingSnapshot::default();
8426
8427        // Generate projects with WBS hierarchies
8428        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8429            self.config.project_accounting.clone(),
8430            seed + 95,
8431        );
8432        let pool = project_gen.generate(company_code, start_date, end_date);
8433        snapshot.projects = pool.projects.clone();
8434
8435        // Link source documents to projects for cost allocation
8436        {
8437            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8438                Vec::new();
8439
8440            // Time entries
8441            for te in &hr.time_entries {
8442                let total_hours = te.hours_regular + te.hours_overtime;
8443                if total_hours > 0.0 {
8444                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8445                        id: te.entry_id.clone(),
8446                        entity_id: company_code.to_string(),
8447                        date: te.date,
8448                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8449                            .unwrap_or(rust_decimal::Decimal::ZERO),
8450                        source_type: CostSourceType::TimeEntry,
8451                        hours: Some(
8452                            rust_decimal::Decimal::from_f64_retain(total_hours)
8453                                .unwrap_or(rust_decimal::Decimal::ZERO),
8454                        ),
8455                    });
8456                }
8457            }
8458
8459            // Expense reports
8460            for er in &hr.expense_reports {
8461                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8462                    id: er.report_id.clone(),
8463                    entity_id: company_code.to_string(),
8464                    date: er.submission_date,
8465                    amount: er.total_amount,
8466                    source_type: CostSourceType::ExpenseReport,
8467                    hours: None,
8468                });
8469            }
8470
8471            // Purchase orders
8472            for po in &document_flows.purchase_orders {
8473                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8474                    id: po.header.document_id.clone(),
8475                    entity_id: company_code.to_string(),
8476                    date: po.header.document_date,
8477                    amount: po.total_net_amount,
8478                    source_type: CostSourceType::PurchaseOrder,
8479                    hours: None,
8480                });
8481            }
8482
8483            // Vendor invoices
8484            for vi in &document_flows.vendor_invoices {
8485                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8486                    id: vi.header.document_id.clone(),
8487                    entity_id: company_code.to_string(),
8488                    date: vi.header.document_date,
8489                    amount: vi.payable_amount,
8490                    source_type: CostSourceType::VendorInvoice,
8491                    hours: None,
8492                });
8493            }
8494
8495            if !source_docs.is_empty() && !pool.projects.is_empty() {
8496                let mut cost_gen =
8497                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8498                        self.config.project_accounting.cost_allocation.clone(),
8499                        seed + 99,
8500                    );
8501                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8502            }
8503        }
8504
8505        // Generate change orders
8506        if self.config.project_accounting.change_orders.enabled {
8507            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8508                self.config.project_accounting.change_orders.clone(),
8509                seed + 96,
8510            );
8511            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8512        }
8513
8514        // Generate milestones
8515        if self.config.project_accounting.milestones.enabled {
8516            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8517                self.config.project_accounting.milestones.clone(),
8518                seed + 97,
8519            );
8520            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8521        }
8522
8523        // Generate earned value metrics (needs cost lines, so only if we have projects)
8524        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8525            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8526                self.config.project_accounting.earned_value.clone(),
8527                seed + 98,
8528            );
8529            snapshot.earned_value_metrics =
8530                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8531        }
8532
8533        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8534        if self.config.project_accounting.revenue_recognition.enabled
8535            && !snapshot.projects.is_empty()
8536            && !snapshot.cost_lines.is_empty()
8537        {
8538            use datasynth_generators::project_accounting::RevenueGenerator;
8539            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8540            let avg_contract_value =
8541                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8542                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8543
8544            // Build contract value tuples: only customer-type projects get revenue recognition.
8545            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8546            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8547                snapshot
8548                    .projects
8549                    .iter()
8550                    .filter(|p| {
8551                        matches!(
8552                            p.project_type,
8553                            datasynth_core::models::ProjectType::Customer
8554                        )
8555                    })
8556                    .map(|p| {
8557                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8558                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8559                        // budget × 1.25 → contract value
8560                        } else {
8561                            avg_contract_value
8562                        };
8563                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8564                        (p.project_id.clone(), cv, etc)
8565                    })
8566                    .collect();
8567
8568            if !contract_values.is_empty() {
8569                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8570                snapshot.revenue_records = rev_gen.generate(
8571                    &snapshot.projects,
8572                    &snapshot.cost_lines,
8573                    &contract_values,
8574                    start_date,
8575                    end_date,
8576                );
8577                debug!(
8578                    "Generated {} revenue recognition records for {} customer projects",
8579                    snapshot.revenue_records.len(),
8580                    contract_values.len()
8581                );
8582            }
8583        }
8584
8585        stats.project_count = snapshot.projects.len();
8586        stats.project_change_order_count = snapshot.change_orders.len();
8587        stats.project_cost_line_count = snapshot.cost_lines.len();
8588
8589        info!(
8590            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8591            snapshot.projects.len(),
8592            snapshot.change_orders.len(),
8593            snapshot.milestones.len(),
8594            snapshot.earned_value_metrics.len()
8595        );
8596        self.check_resources_with_log("post-project-accounting")?;
8597
8598        Ok(snapshot)
8599    }
8600
8601    /// Phase 24: Generate process evolution and organizational events.
8602    fn phase_evolution_events(
8603        &mut self,
8604        stats: &mut EnhancedGenerationStatistics,
8605    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8606        if !self.phase_config.generate_evolution_events {
8607            debug!("Phase 24: Skipped (evolution events disabled)");
8608            return Ok((Vec::new(), Vec::new()));
8609        }
8610        info!("Phase 24: Generating Process Evolution + Organizational Events");
8611
8612        let seed = self.seed;
8613        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8614            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8615        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8616
8617        // Process evolution events
8618        let mut proc_gen =
8619            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8620                seed + 100,
8621            );
8622        let process_events = proc_gen.generate_events(start_date, end_date);
8623
8624        // Organizational events
8625        let company_codes: Vec<String> = self
8626            .config
8627            .companies
8628            .iter()
8629            .map(|c| c.code.clone())
8630            .collect();
8631        let mut org_gen =
8632            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8633                seed + 101,
8634            );
8635        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8636
8637        stats.process_evolution_event_count = process_events.len();
8638        stats.organizational_event_count = org_events.len();
8639
8640        info!(
8641            "Evolution events generated: {} process evolution, {} organizational",
8642            process_events.len(),
8643            org_events.len()
8644        );
8645        self.check_resources_with_log("post-evolution-events")?;
8646
8647        Ok((process_events, org_events))
8648    }
8649
8650    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8651    /// data recovery, and regulatory changes).
8652    fn phase_disruption_events(
8653        &self,
8654        stats: &mut EnhancedGenerationStatistics,
8655    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8656        if !self.config.organizational_events.enabled {
8657            debug!("Phase 24b: Skipped (organizational events disabled)");
8658            return Ok(Vec::new());
8659        }
8660        info!("Phase 24b: Generating Disruption Events");
8661
8662        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8663            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8664        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8665
8666        let company_codes: Vec<String> = self
8667            .config
8668            .companies
8669            .iter()
8670            .map(|c| c.code.clone())
8671            .collect();
8672
8673        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8674        let events = gen.generate(start_date, end_date, &company_codes);
8675
8676        stats.disruption_event_count = events.len();
8677        info!("Disruption events generated: {} events", events.len());
8678        self.check_resources_with_log("post-disruption-events")?;
8679
8680        Ok(events)
8681    }
8682
8683    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8684    ///
8685    /// Produces paired examples where each pair contains the original clean JE
8686    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8687    /// split transaction). Useful for training anomaly detection models with
8688    /// known ground truth.
8689    fn phase_counterfactuals(
8690        &self,
8691        journal_entries: &[JournalEntry],
8692        stats: &mut EnhancedGenerationStatistics,
8693    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8694        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8695            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8696            return Ok(Vec::new());
8697        }
8698        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8699
8700        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8701
8702        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8703
8704        // Rotating set of specs to produce diverse mutation types
8705        let specs = [
8706            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8707            CounterfactualSpec::ShiftDate { days: -14 },
8708            CounterfactualSpec::SelfApprove,
8709            CounterfactualSpec::SplitTransaction { split_count: 3 },
8710        ];
8711
8712        let pairs: Vec<_> = journal_entries
8713            .iter()
8714            .enumerate()
8715            .map(|(i, je)| {
8716                let spec = &specs[i % specs.len()];
8717                gen.generate(je, spec)
8718            })
8719            .collect();
8720
8721        stats.counterfactual_pair_count = pairs.len();
8722        info!(
8723            "Counterfactual pairs generated: {} pairs from {} journal entries",
8724            pairs.len(),
8725            journal_entries.len()
8726        );
8727        self.check_resources_with_log("post-counterfactuals")?;
8728
8729        Ok(pairs)
8730    }
8731
8732    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8733    ///
8734    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8735    /// fraudulent, then generates probabilistic red flags on all chain documents.
8736    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8737    /// to produce realistic ML training data.
8738    fn phase_red_flags(
8739        &self,
8740        anomaly_labels: &AnomalyLabels,
8741        document_flows: &DocumentFlowSnapshot,
8742        stats: &mut EnhancedGenerationStatistics,
8743    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8744        if !self.config.fraud.enabled {
8745            debug!("Phase 26: Skipped (fraud generation disabled)");
8746            return Ok(Vec::new());
8747        }
8748        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8749
8750        use datasynth_generators::fraud::RedFlagGenerator;
8751
8752        let generator = RedFlagGenerator::new();
8753        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8754
8755        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8756        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8757            .labels
8758            .iter()
8759            .filter(|label| label.anomaly_type.is_intentional())
8760            .map(|label| label.document_id.as_str())
8761            .collect();
8762
8763        let mut flags = Vec::new();
8764
8765        // Iterate P2P chains: use the purchase order document ID as the chain key.
8766        for chain in &document_flows.p2p_chains {
8767            let doc_id = &chain.purchase_order.header.document_id;
8768            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8769            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8770        }
8771
8772        // Iterate O2C chains: use the sales order document ID as the chain key.
8773        for chain in &document_flows.o2c_chains {
8774            let doc_id = &chain.sales_order.header.document_id;
8775            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8776            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8777        }
8778
8779        stats.red_flag_count = flags.len();
8780        info!(
8781            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8782            flags.len(),
8783            document_flows.p2p_chains.len(),
8784            document_flows.o2c_chains.len(),
8785            fraud_doc_ids.len()
8786        );
8787        self.check_resources_with_log("post-red-flags")?;
8788
8789        Ok(flags)
8790    }
8791
8792    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8793    ///
8794    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8795    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8796    /// advance them over the simulation period.
8797    fn phase_collusion_rings(
8798        &mut self,
8799        stats: &mut EnhancedGenerationStatistics,
8800    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8801        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8802            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8803            return Ok(Vec::new());
8804        }
8805        info!("Phase 26b: Generating Collusion Rings");
8806
8807        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8808            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8809        let months = self.config.global.period_months;
8810
8811        let employee_ids: Vec<String> = self
8812            .master_data
8813            .employees
8814            .iter()
8815            .map(|e| e.employee_id.clone())
8816            .collect();
8817        let vendor_ids: Vec<String> = self
8818            .master_data
8819            .vendors
8820            .iter()
8821            .map(|v| v.vendor_id.clone())
8822            .collect();
8823
8824        let mut generator =
8825            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8826        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8827
8828        stats.collusion_ring_count = rings.len();
8829        info!(
8830            "Collusion rings generated: {} rings, total members: {}",
8831            rings.len(),
8832            rings
8833                .iter()
8834                .map(datasynth_generators::fraud::CollusionRing::size)
8835                .sum::<usize>()
8836        );
8837        self.check_resources_with_log("post-collusion-rings")?;
8838
8839        Ok(rings)
8840    }
8841
8842    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8843    ///
8844    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8845    /// master data changes over time, supporting bi-temporal audit queries.
8846    fn phase_temporal_attributes(
8847        &mut self,
8848        stats: &mut EnhancedGenerationStatistics,
8849    ) -> SynthResult<
8850        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8851    > {
8852        if !self.config.temporal_attributes.enabled {
8853            debug!("Phase 27: Skipped (temporal attributes disabled)");
8854            return Ok(Vec::new());
8855        }
8856        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8857
8858        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8859            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8860
8861        // Build a TemporalAttributeConfig from the user's config.
8862        // Since Phase 27 is already gated on temporal_attributes.enabled,
8863        // default to enabling version chains so users get actual mutations.
8864        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8865            || self.config.temporal_attributes.enabled;
8866        let temporal_config = {
8867            let ta = &self.config.temporal_attributes;
8868            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8869                .enabled(ta.enabled)
8870                .closed_probability(ta.valid_time.closed_probability)
8871                .avg_validity_days(ta.valid_time.avg_validity_days)
8872                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8873                .with_version_chains(if generate_version_chains {
8874                    ta.avg_versions_per_entity
8875                } else {
8876                    1.0
8877                })
8878                .build()
8879        };
8880        // Apply backdating settings if configured
8881        let temporal_config = if self
8882            .config
8883            .temporal_attributes
8884            .transaction_time
8885            .allow_backdating
8886        {
8887            let mut c = temporal_config;
8888            c.transaction_time.allow_backdating = true;
8889            c.transaction_time.backdating_probability = self
8890                .config
8891                .temporal_attributes
8892                .transaction_time
8893                .backdating_probability;
8894            c.transaction_time.max_backdate_days = self
8895                .config
8896                .temporal_attributes
8897                .transaction_time
8898                .max_backdate_days;
8899            c
8900        } else {
8901            temporal_config
8902        };
8903        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8904            temporal_config,
8905            self.seed + 130,
8906            start_date,
8907        );
8908
8909        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8910            self.seed + 130,
8911            datasynth_core::GeneratorType::Vendor,
8912        );
8913
8914        let chains: Vec<_> = self
8915            .master_data
8916            .vendors
8917            .iter()
8918            .map(|vendor| {
8919                let id = uuid_factory.next();
8920                gen.generate_version_chain(vendor.clone(), id)
8921            })
8922            .collect();
8923
8924        stats.temporal_version_chain_count = chains.len();
8925        info!("Temporal version chains generated: {} chains", chains.len());
8926        self.check_resources_with_log("post-temporal-attributes")?;
8927
8928        Ok(chains)
8929    }
8930
8931    /// Phase 28: Build entity relationship graph and cross-process links.
8932    ///
8933    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8934    /// `EntityGraph` from master-data vendor/customer entities and
8935    /// journal-entry-derived transaction summaries.
8936    ///
8937    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8938    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8939    /// generates inventory-movement cross-process links.
8940    fn phase_entity_relationships(
8941        &self,
8942        journal_entries: &[JournalEntry],
8943        document_flows: &DocumentFlowSnapshot,
8944        stats: &mut EnhancedGenerationStatistics,
8945    ) -> SynthResult<(
8946        Option<datasynth_core::models::EntityGraph>,
8947        Vec<datasynth_core::models::CrossProcessLink>,
8948    )> {
8949        use datasynth_generators::relationships::{
8950            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8951            TransactionSummary,
8952        };
8953
8954        let rs_enabled = self.config.relationship_strength.enabled;
8955        let cpl_enabled = self.config.cross_process_links.enabled
8956            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8957
8958        if !rs_enabled && !cpl_enabled {
8959            debug!(
8960                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8961            );
8962            return Ok((None, Vec::new()));
8963        }
8964
8965        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8966
8967        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8968            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8969
8970        let company_code = self
8971            .config
8972            .companies
8973            .first()
8974            .map(|c| c.code.as_str())
8975            .unwrap_or("1000");
8976
8977        // Build the generator with matching config flags
8978        let gen_config = EntityGraphConfig {
8979            enabled: rs_enabled,
8980            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8981                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8982                enable_return_flows: false,
8983                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8984                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8985                // Use higher link rate for small datasets to avoid probabilistic empty results
8986                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8987                    1.0
8988                } else {
8989                    0.30
8990                },
8991                ..Default::default()
8992            },
8993            strength_config: datasynth_generators::relationships::StrengthConfig {
8994                transaction_volume_weight: self
8995                    .config
8996                    .relationship_strength
8997                    .calculation
8998                    .transaction_volume_weight,
8999                transaction_count_weight: self
9000                    .config
9001                    .relationship_strength
9002                    .calculation
9003                    .transaction_count_weight,
9004                duration_weight: self
9005                    .config
9006                    .relationship_strength
9007                    .calculation
9008                    .relationship_duration_weight,
9009                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9010                mutual_connections_weight: self
9011                    .config
9012                    .relationship_strength
9013                    .calculation
9014                    .mutual_connections_weight,
9015                recency_half_life_days: self
9016                    .config
9017                    .relationship_strength
9018                    .calculation
9019                    .recency_half_life_days,
9020            },
9021            ..Default::default()
9022        };
9023
9024        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9025
9026        // --- Part 1: Entity Relationship Graph ---
9027        let entity_graph = if rs_enabled {
9028            // Build EntitySummary lists from master data
9029            let vendor_summaries: Vec<EntitySummary> = self
9030                .master_data
9031                .vendors
9032                .iter()
9033                .map(|v| {
9034                    EntitySummary::new(
9035                        &v.vendor_id,
9036                        &v.name,
9037                        datasynth_core::models::GraphEntityType::Vendor,
9038                        start_date,
9039                    )
9040                })
9041                .collect();
9042
9043            let customer_summaries: Vec<EntitySummary> = self
9044                .master_data
9045                .customers
9046                .iter()
9047                .map(|c| {
9048                    EntitySummary::new(
9049                        &c.customer_id,
9050                        &c.name,
9051                        datasynth_core::models::GraphEntityType::Customer,
9052                        start_date,
9053                    )
9054                })
9055                .collect();
9056
9057            // Build transaction summaries from journal entries.
9058            // Key = (company_code, trading_partner) for entries that have a
9059            // trading partner.  This captures intercompany flows and any JE
9060            // whose line items carry a trading_partner reference.
9061            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9062                std::collections::HashMap::new();
9063
9064            for je in journal_entries {
9065                let cc = je.header.company_code.clone();
9066                let posting_date = je.header.posting_date;
9067                for line in &je.lines {
9068                    if let Some(ref tp) = line.trading_partner {
9069                        let amount = if line.debit_amount > line.credit_amount {
9070                            line.debit_amount
9071                        } else {
9072                            line.credit_amount
9073                        };
9074                        let entry = txn_summaries
9075                            .entry((cc.clone(), tp.clone()))
9076                            .or_insert_with(|| TransactionSummary {
9077                                total_volume: rust_decimal::Decimal::ZERO,
9078                                transaction_count: 0,
9079                                first_transaction_date: posting_date,
9080                                last_transaction_date: posting_date,
9081                                related_entities: std::collections::HashSet::new(),
9082                            });
9083                        entry.total_volume += amount;
9084                        entry.transaction_count += 1;
9085                        if posting_date < entry.first_transaction_date {
9086                            entry.first_transaction_date = posting_date;
9087                        }
9088                        if posting_date > entry.last_transaction_date {
9089                            entry.last_transaction_date = posting_date;
9090                        }
9091                        entry.related_entities.insert(cc.clone());
9092                    }
9093                }
9094            }
9095
9096            // Also extract transaction relationships from document flow chains.
9097            // P2P chains: Company → Vendor relationships
9098            for chain in &document_flows.p2p_chains {
9099                let cc = chain.purchase_order.header.company_code.clone();
9100                let vendor_id = chain.purchase_order.vendor_id.clone();
9101                let po_date = chain.purchase_order.header.document_date;
9102                let amount = chain.purchase_order.total_net_amount;
9103
9104                let entry = txn_summaries
9105                    .entry((cc.clone(), vendor_id))
9106                    .or_insert_with(|| TransactionSummary {
9107                        total_volume: rust_decimal::Decimal::ZERO,
9108                        transaction_count: 0,
9109                        first_transaction_date: po_date,
9110                        last_transaction_date: po_date,
9111                        related_entities: std::collections::HashSet::new(),
9112                    });
9113                entry.total_volume += amount;
9114                entry.transaction_count += 1;
9115                if po_date < entry.first_transaction_date {
9116                    entry.first_transaction_date = po_date;
9117                }
9118                if po_date > entry.last_transaction_date {
9119                    entry.last_transaction_date = po_date;
9120                }
9121                entry.related_entities.insert(cc);
9122            }
9123
9124            // O2C chains: Company → Customer relationships
9125            for chain in &document_flows.o2c_chains {
9126                let cc = chain.sales_order.header.company_code.clone();
9127                let customer_id = chain.sales_order.customer_id.clone();
9128                let so_date = chain.sales_order.header.document_date;
9129                let amount = chain.sales_order.total_net_amount;
9130
9131                let entry = txn_summaries
9132                    .entry((cc.clone(), customer_id))
9133                    .or_insert_with(|| TransactionSummary {
9134                        total_volume: rust_decimal::Decimal::ZERO,
9135                        transaction_count: 0,
9136                        first_transaction_date: so_date,
9137                        last_transaction_date: so_date,
9138                        related_entities: std::collections::HashSet::new(),
9139                    });
9140                entry.total_volume += amount;
9141                entry.transaction_count += 1;
9142                if so_date < entry.first_transaction_date {
9143                    entry.first_transaction_date = so_date;
9144                }
9145                if so_date > entry.last_transaction_date {
9146                    entry.last_transaction_date = so_date;
9147                }
9148                entry.related_entities.insert(cc);
9149            }
9150
9151            let as_of_date = journal_entries
9152                .last()
9153                .map(|je| je.header.posting_date)
9154                .unwrap_or(start_date);
9155
9156            let graph = gen.generate_entity_graph(
9157                company_code,
9158                as_of_date,
9159                &vendor_summaries,
9160                &customer_summaries,
9161                &txn_summaries,
9162            );
9163
9164            info!(
9165                "Entity relationship graph: {} nodes, {} edges",
9166                graph.nodes.len(),
9167                graph.edges.len()
9168            );
9169            stats.entity_relationship_node_count = graph.nodes.len();
9170            stats.entity_relationship_edge_count = graph.edges.len();
9171            Some(graph)
9172        } else {
9173            None
9174        };
9175
9176        // --- Part 2: Cross-Process Links ---
9177        let cross_process_links = if cpl_enabled {
9178            // Build GoodsReceiptRef from P2P chains
9179            let gr_refs: Vec<GoodsReceiptRef> = document_flows
9180                .p2p_chains
9181                .iter()
9182                .flat_map(|chain| {
9183                    let vendor_id = chain.purchase_order.vendor_id.clone();
9184                    let cc = chain.purchase_order.header.company_code.clone();
9185                    chain.goods_receipts.iter().flat_map(move |gr| {
9186                        gr.items.iter().filter_map({
9187                            let doc_id = gr.header.document_id.clone();
9188                            let v_id = vendor_id.clone();
9189                            let company = cc.clone();
9190                            let receipt_date = gr.header.document_date;
9191                            move |item| {
9192                                item.base
9193                                    .material_id
9194                                    .as_ref()
9195                                    .map(|mat_id| GoodsReceiptRef {
9196                                        document_id: doc_id.clone(),
9197                                        material_id: mat_id.clone(),
9198                                        quantity: item.base.quantity,
9199                                        receipt_date,
9200                                        vendor_id: v_id.clone(),
9201                                        company_code: company.clone(),
9202                                    })
9203                            }
9204                        })
9205                    })
9206                })
9207                .collect();
9208
9209            // Build DeliveryRef from O2C chains
9210            let del_refs: Vec<DeliveryRef> = document_flows
9211                .o2c_chains
9212                .iter()
9213                .flat_map(|chain| {
9214                    let customer_id = chain.sales_order.customer_id.clone();
9215                    let cc = chain.sales_order.header.company_code.clone();
9216                    chain.deliveries.iter().flat_map(move |del| {
9217                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
9218                        del.items.iter().filter_map({
9219                            let doc_id = del.header.document_id.clone();
9220                            let c_id = customer_id.clone();
9221                            let company = cc.clone();
9222                            move |item| {
9223                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
9224                                    document_id: doc_id.clone(),
9225                                    material_id: mat_id.clone(),
9226                                    quantity: item.base.quantity,
9227                                    delivery_date,
9228                                    customer_id: c_id.clone(),
9229                                    company_code: company.clone(),
9230                                })
9231                            }
9232                        })
9233                    })
9234                })
9235                .collect();
9236
9237            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
9238            info!("Cross-process links generated: {} links", links.len());
9239            stats.cross_process_link_count = links.len();
9240            links
9241        } else {
9242            Vec::new()
9243        };
9244
9245        self.check_resources_with_log("post-entity-relationships")?;
9246        Ok((entity_graph, cross_process_links))
9247    }
9248
9249    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
9250    fn phase_industry_data(
9251        &self,
9252        stats: &mut EnhancedGenerationStatistics,
9253    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
9254        if !self.config.industry_specific.enabled {
9255            return None;
9256        }
9257        info!("Phase 29: Generating industry-specific data");
9258        let output = datasynth_generators::industry::factory::generate_industry_output(
9259            self.config.global.industry,
9260        );
9261        stats.industry_gl_account_count = output.gl_accounts.len();
9262        info!(
9263            "Industry data generated: {} GL accounts for {:?}",
9264            output.gl_accounts.len(),
9265            self.config.global.industry
9266        );
9267        Some(output)
9268    }
9269
9270    /// Phase 3b: Generate opening balances for each company.
9271    fn phase_opening_balances(
9272        &mut self,
9273        coa: &Arc<ChartOfAccounts>,
9274        stats: &mut EnhancedGenerationStatistics,
9275    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
9276        if !self.config.balance.generate_opening_balances {
9277            debug!("Phase 3b: Skipped (opening balance generation disabled)");
9278            return Ok(Vec::new());
9279        }
9280        info!("Phase 3b: Generating Opening Balances");
9281
9282        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9283            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9284        let fiscal_year = start_date.year();
9285
9286        let industry = match self.config.global.industry {
9287            IndustrySector::Manufacturing => IndustryType::Manufacturing,
9288            IndustrySector::Retail => IndustryType::Retail,
9289            IndustrySector::FinancialServices => IndustryType::Financial,
9290            IndustrySector::Healthcare => IndustryType::Healthcare,
9291            IndustrySector::Technology => IndustryType::Technology,
9292            _ => IndustryType::Manufacturing,
9293        };
9294
9295        let config = datasynth_generators::OpeningBalanceConfig {
9296            industry,
9297            ..Default::default()
9298        };
9299        let mut gen =
9300            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
9301
9302        let mut results = Vec::new();
9303        for company in &self.config.companies {
9304            let spec = OpeningBalanceSpec::new(
9305                company.code.clone(),
9306                start_date,
9307                fiscal_year,
9308                company.currency.clone(),
9309                rust_decimal::Decimal::new(10_000_000, 0),
9310                industry,
9311            );
9312            let ob = gen.generate(&spec, coa, start_date, &company.code);
9313            results.push(ob);
9314        }
9315
9316        stats.opening_balance_count = results.len();
9317        info!("Opening balances generated: {} companies", results.len());
9318        self.check_resources_with_log("post-opening-balances")?;
9319
9320        Ok(results)
9321    }
9322
9323    /// Phase 9b: Reconcile GL control accounts to subledger balances.
9324    fn phase_subledger_reconciliation(
9325        &mut self,
9326        subledger: &SubledgerSnapshot,
9327        entries: &[JournalEntry],
9328        stats: &mut EnhancedGenerationStatistics,
9329    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
9330        if !self.config.balance.reconcile_subledgers {
9331            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
9332            return Ok(Vec::new());
9333        }
9334        info!("Phase 9b: Reconciling GL to subledger balances");
9335
9336        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9337            .map(|d| d + chrono::Months::new(self.config.global.period_months))
9338            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9339
9340        // Build GL balance map from journal entries using a balance tracker
9341        let tracker_config = BalanceTrackerConfig {
9342            validate_on_each_entry: false,
9343            track_history: false,
9344            fail_on_validation_error: false,
9345            ..Default::default()
9346        };
9347        let recon_currency = self
9348            .config
9349            .companies
9350            .first()
9351            .map(|c| c.currency.clone())
9352            .unwrap_or_else(|| "USD".to_string());
9353        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9354        let validation_errors = tracker.apply_entries(entries);
9355        if !validation_errors.is_empty() {
9356            warn!(
9357                error_count = validation_errors.len(),
9358                "Balance tracker encountered validation errors during subledger reconciliation"
9359            );
9360            for err in &validation_errors {
9361                debug!("Balance validation error: {:?}", err);
9362            }
9363        }
9364
9365        let mut engine = datasynth_generators::ReconciliationEngine::new(
9366            datasynth_generators::ReconciliationConfig::default(),
9367        );
9368
9369        let mut results = Vec::new();
9370        let company_code = self
9371            .config
9372            .companies
9373            .first()
9374            .map(|c| c.code.as_str())
9375            .unwrap_or("1000");
9376
9377        // Reconcile AR
9378        if !subledger.ar_invoices.is_empty() {
9379            let gl_balance = tracker
9380                .get_account_balance(
9381                    company_code,
9382                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9383                )
9384                .map(|b| b.closing_balance)
9385                .unwrap_or_default();
9386            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9387            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9388        }
9389
9390        // Reconcile AP
9391        if !subledger.ap_invoices.is_empty() {
9392            let gl_balance = tracker
9393                .get_account_balance(
9394                    company_code,
9395                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9396                )
9397                .map(|b| b.closing_balance)
9398                .unwrap_or_default();
9399            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9400            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9401        }
9402
9403        // Reconcile FA
9404        if !subledger.fa_records.is_empty() {
9405            let gl_asset_balance = tracker
9406                .get_account_balance(
9407                    company_code,
9408                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9409                )
9410                .map(|b| b.closing_balance)
9411                .unwrap_or_default();
9412            let gl_accum_depr_balance = tracker
9413                .get_account_balance(
9414                    company_code,
9415                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9416                )
9417                .map(|b| b.closing_balance)
9418                .unwrap_or_default();
9419            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9420                subledger.fa_records.iter().collect();
9421            let (asset_recon, depr_recon) = engine.reconcile_fa(
9422                company_code,
9423                end_date,
9424                gl_asset_balance,
9425                gl_accum_depr_balance,
9426                &fa_refs,
9427            );
9428            results.push(asset_recon);
9429            results.push(depr_recon);
9430        }
9431
9432        // Reconcile Inventory
9433        if !subledger.inventory_positions.is_empty() {
9434            let gl_balance = tracker
9435                .get_account_balance(
9436                    company_code,
9437                    datasynth_core::accounts::control_accounts::INVENTORY,
9438                )
9439                .map(|b| b.closing_balance)
9440                .unwrap_or_default();
9441            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9442                subledger.inventory_positions.iter().collect();
9443            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9444        }
9445
9446        stats.subledger_reconciliation_count = results.len();
9447        let passed = results.iter().filter(|r| r.is_balanced()).count();
9448        let failed = results.len() - passed;
9449        info!(
9450            "Subledger reconciliation: {} checks, {} passed, {} failed",
9451            results.len(),
9452            passed,
9453            failed
9454        );
9455        self.check_resources_with_log("post-subledger-reconciliation")?;
9456
9457        Ok(results)
9458    }
9459
9460    /// Generate the chart of accounts.
9461    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9462        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9463
9464        let coa_framework = self.resolve_coa_framework();
9465
9466        let mut gen = ChartOfAccountsGenerator::new(
9467            self.config.chart_of_accounts.complexity,
9468            self.config.global.industry,
9469            self.seed,
9470        )
9471        .with_coa_framework(coa_framework);
9472
9473        let coa = Arc::new(gen.generate());
9474        self.coa = Some(Arc::clone(&coa));
9475
9476        if let Some(pb) = pb {
9477            pb.finish_with_message("Chart of Accounts complete");
9478        }
9479
9480        Ok(coa)
9481    }
9482
9483    /// Generate master data entities.
9484    fn generate_master_data(&mut self) -> SynthResult<()> {
9485        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9486            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9487        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9488
9489        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9490        let pb = self.create_progress_bar(total, "Generating Master Data");
9491
9492        // Resolve country pack once for all companies (uses primary company's country)
9493        let pack = self.primary_pack().clone();
9494
9495        // Capture config values needed inside the parallel closure
9496        let vendors_per_company = self.phase_config.vendors_per_company;
9497        let customers_per_company = self.phase_config.customers_per_company;
9498        let materials_per_company = self.phase_config.materials_per_company;
9499        let assets_per_company = self.phase_config.assets_per_company;
9500        let coa_framework = self.resolve_coa_framework();
9501
9502        // Generate all master data in parallel across companies.
9503        // Each company's data is independent, making this embarrassingly parallel.
9504        let per_company_results: Vec<_> = self
9505            .config
9506            .companies
9507            .par_iter()
9508            .enumerate()
9509            .map(|(i, company)| {
9510                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9511                let pack = pack.clone();
9512
9513                // Generate vendors (offset counter so IDs are globally unique across companies)
9514                let mut vendor_gen = VendorGenerator::new(company_seed);
9515                vendor_gen.set_country_pack(pack.clone());
9516                vendor_gen.set_coa_framework(coa_framework);
9517                vendor_gen.set_counter_offset(i * vendors_per_company);
9518                // Wire vendor network config when enabled
9519                if self.config.vendor_network.enabled {
9520                    let vn = &self.config.vendor_network;
9521                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9522                        enabled: true,
9523                        depth: vn.depth,
9524                        tier1_count: datasynth_generators::TierCountConfig::new(
9525                            vn.tier1.min,
9526                            vn.tier1.max,
9527                        ),
9528                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9529                            vn.tier2_per_parent.min,
9530                            vn.tier2_per_parent.max,
9531                        ),
9532                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9533                            vn.tier3_per_parent.min,
9534                            vn.tier3_per_parent.max,
9535                        ),
9536                        cluster_distribution: datasynth_generators::ClusterDistribution {
9537                            reliable_strategic: vn.clusters.reliable_strategic,
9538                            standard_operational: vn.clusters.standard_operational,
9539                            transactional: vn.clusters.transactional,
9540                            problematic: vn.clusters.problematic,
9541                        },
9542                        concentration_limits: datasynth_generators::ConcentrationLimits {
9543                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9544                            max_top5: vn.dependencies.top_5_concentration,
9545                        },
9546                        ..datasynth_generators::VendorNetworkConfig::default()
9547                    });
9548                }
9549                let vendor_pool =
9550                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9551
9552                // Generate customers (offset counter so IDs are globally unique across companies)
9553                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9554                customer_gen.set_country_pack(pack.clone());
9555                customer_gen.set_coa_framework(coa_framework);
9556                customer_gen.set_counter_offset(i * customers_per_company);
9557                // Wire customer segmentation config when enabled
9558                if self.config.customer_segmentation.enabled {
9559                    let cs = &self.config.customer_segmentation;
9560                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9561                        enabled: true,
9562                        segment_distribution: datasynth_generators::SegmentDistribution {
9563                            enterprise: cs.value_segments.enterprise.customer_share,
9564                            mid_market: cs.value_segments.mid_market.customer_share,
9565                            smb: cs.value_segments.smb.customer_share,
9566                            consumer: cs.value_segments.consumer.customer_share,
9567                        },
9568                        referral_config: datasynth_generators::ReferralConfig {
9569                            enabled: cs.networks.referrals.enabled,
9570                            referral_rate: cs.networks.referrals.referral_rate,
9571                            ..Default::default()
9572                        },
9573                        hierarchy_config: datasynth_generators::HierarchyConfig {
9574                            enabled: cs.networks.corporate_hierarchies.enabled,
9575                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9576                            ..Default::default()
9577                        },
9578                        ..Default::default()
9579                    };
9580                    customer_gen.set_segmentation_config(seg_cfg);
9581                }
9582                let customer_pool = customer_gen.generate_customer_pool(
9583                    customers_per_company,
9584                    &company.code,
9585                    start_date,
9586                );
9587
9588                // Generate materials (offset counter so IDs are globally unique across companies)
9589                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9590                material_gen.set_country_pack(pack.clone());
9591                material_gen.set_counter_offset(i * materials_per_company);
9592                let material_pool = material_gen.generate_material_pool(
9593                    materials_per_company,
9594                    &company.code,
9595                    start_date,
9596                );
9597
9598                // Generate fixed assets
9599                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9600                let asset_pool = asset_gen.generate_asset_pool(
9601                    assets_per_company,
9602                    &company.code,
9603                    (start_date, end_date),
9604                );
9605
9606                // Generate employees
9607                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9608                employee_gen.set_country_pack(pack);
9609                let employee_pool =
9610                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9611
9612                // Generate employee change history (2-5 events per employee)
9613                let employee_change_history =
9614                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9615
9616                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9617                let employee_ids: Vec<String> = employee_pool
9618                    .employees
9619                    .iter()
9620                    .map(|e| e.employee_id.clone())
9621                    .collect();
9622                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9623                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9624
9625                (
9626                    vendor_pool.vendors,
9627                    customer_pool.customers,
9628                    material_pool.materials,
9629                    asset_pool.assets,
9630                    employee_pool.employees,
9631                    employee_change_history,
9632                    cost_centers,
9633                )
9634            })
9635            .collect();
9636
9637        // Aggregate results from all companies
9638        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9639            per_company_results
9640        {
9641            self.master_data.vendors.extend(vendors);
9642            self.master_data.customers.extend(customers);
9643            self.master_data.materials.extend(materials);
9644            self.master_data.assets.extend(assets);
9645            self.master_data.employees.extend(employees);
9646            self.master_data.cost_centers.extend(cost_centers);
9647            self.master_data
9648                .employee_change_history
9649                .extend(change_history);
9650        }
9651
9652        if let Some(pb) = &pb {
9653            pb.inc(total);
9654        }
9655        if let Some(pb) = pb {
9656            pb.finish_with_message("Master data generation complete");
9657        }
9658
9659        Ok(())
9660    }
9661
9662    /// Generate document flows (P2P and O2C).
9663    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9664        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9665            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9666
9667        // Generate P2P chains
9668        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9669        let months = (self.config.global.period_months as usize).max(1);
9670        let p2p_count = self
9671            .phase_config
9672            .p2p_chains
9673            .min(self.master_data.vendors.len() * 2 * months);
9674        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9675
9676        // Convert P2P config from schema to generator config
9677        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9678        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9679        p2p_gen.set_country_pack(self.primary_pack().clone());
9680
9681        for i in 0..p2p_count {
9682            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9683            let materials: Vec<&Material> = self
9684                .master_data
9685                .materials
9686                .iter()
9687                .skip(i % self.master_data.materials.len().max(1))
9688                .take(2.min(self.master_data.materials.len()))
9689                .collect();
9690
9691            if materials.is_empty() {
9692                continue;
9693            }
9694
9695            let company = &self.config.companies[i % self.config.companies.len()];
9696            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9697            let fiscal_period = po_date.month() as u8;
9698            let created_by = if self.master_data.employees.is_empty() {
9699                "SYSTEM"
9700            } else {
9701                self.master_data.employees[i % self.master_data.employees.len()]
9702                    .user_id
9703                    .as_str()
9704            };
9705
9706            let chain = p2p_gen.generate_chain(
9707                &company.code,
9708                vendor,
9709                &materials,
9710                po_date,
9711                start_date.year() as u16,
9712                fiscal_period,
9713                created_by,
9714            );
9715
9716            // Flatten documents
9717            flows.purchase_orders.push(chain.purchase_order.clone());
9718            flows.goods_receipts.extend(chain.goods_receipts.clone());
9719            if let Some(vi) = &chain.vendor_invoice {
9720                flows.vendor_invoices.push(vi.clone());
9721            }
9722            if let Some(payment) = &chain.payment {
9723                flows.payments.push(payment.clone());
9724            }
9725            for remainder in &chain.remainder_payments {
9726                flows.payments.push(remainder.clone());
9727            }
9728            flows.p2p_chains.push(chain);
9729
9730            if let Some(pb) = &pb {
9731                pb.inc(1);
9732            }
9733        }
9734
9735        if let Some(pb) = pb {
9736            pb.finish_with_message("P2P document flows complete");
9737        }
9738
9739        // Generate O2C chains
9740        // Cap at ~2 SOs per customer per month to keep order volume realistic
9741        let o2c_count = self
9742            .phase_config
9743            .o2c_chains
9744            .min(self.master_data.customers.len() * 2 * months);
9745        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9746
9747        // Convert O2C config from schema to generator config
9748        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9749        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9750        o2c_gen.set_country_pack(self.primary_pack().clone());
9751
9752        for i in 0..o2c_count {
9753            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9754            let materials: Vec<&Material> = self
9755                .master_data
9756                .materials
9757                .iter()
9758                .skip(i % self.master_data.materials.len().max(1))
9759                .take(2.min(self.master_data.materials.len()))
9760                .collect();
9761
9762            if materials.is_empty() {
9763                continue;
9764            }
9765
9766            let company = &self.config.companies[i % self.config.companies.len()];
9767            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9768            let fiscal_period = so_date.month() as u8;
9769            let created_by = if self.master_data.employees.is_empty() {
9770                "SYSTEM"
9771            } else {
9772                self.master_data.employees[i % self.master_data.employees.len()]
9773                    .user_id
9774                    .as_str()
9775            };
9776
9777            let chain = o2c_gen.generate_chain(
9778                &company.code,
9779                customer,
9780                &materials,
9781                so_date,
9782                start_date.year() as u16,
9783                fiscal_period,
9784                created_by,
9785            );
9786
9787            // Flatten documents
9788            flows.sales_orders.push(chain.sales_order.clone());
9789            flows.deliveries.extend(chain.deliveries.clone());
9790            if let Some(ci) = &chain.customer_invoice {
9791                flows.customer_invoices.push(ci.clone());
9792            }
9793            if let Some(receipt) = &chain.customer_receipt {
9794                flows.payments.push(receipt.clone());
9795            }
9796            // Extract remainder receipts (follow-up to partial payments)
9797            for receipt in &chain.remainder_receipts {
9798                flows.payments.push(receipt.clone());
9799            }
9800            flows.o2c_chains.push(chain);
9801
9802            if let Some(pb) = &pb {
9803                pb.inc(1);
9804            }
9805        }
9806
9807        if let Some(pb) = pb {
9808            pb.finish_with_message("O2C document flows complete");
9809        }
9810
9811        // Collect all document cross-references from document headers.
9812        // Each document embeds references to its predecessor(s) via add_reference(); here we
9813        // denormalise them into a flat list for the document_references.json output file.
9814        {
9815            let mut refs = Vec::new();
9816            for doc in &flows.purchase_orders {
9817                refs.extend(doc.header.document_references.iter().cloned());
9818            }
9819            for doc in &flows.goods_receipts {
9820                refs.extend(doc.header.document_references.iter().cloned());
9821            }
9822            for doc in &flows.vendor_invoices {
9823                refs.extend(doc.header.document_references.iter().cloned());
9824            }
9825            for doc in &flows.sales_orders {
9826                refs.extend(doc.header.document_references.iter().cloned());
9827            }
9828            for doc in &flows.deliveries {
9829                refs.extend(doc.header.document_references.iter().cloned());
9830            }
9831            for doc in &flows.customer_invoices {
9832                refs.extend(doc.header.document_references.iter().cloned());
9833            }
9834            for doc in &flows.payments {
9835                refs.extend(doc.header.document_references.iter().cloned());
9836            }
9837            debug!(
9838                "Collected {} document cross-references from document headers",
9839                refs.len()
9840            );
9841            flows.document_references = refs;
9842        }
9843
9844        Ok(())
9845    }
9846
9847    /// Generate journal entries using parallel generation across multiple cores.
9848    fn generate_journal_entries(
9849        &mut self,
9850        coa: &Arc<ChartOfAccounts>,
9851    ) -> SynthResult<Vec<JournalEntry>> {
9852        use datasynth_core::traits::ParallelGenerator;
9853
9854        let total = self.calculate_total_transactions();
9855        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9856
9857        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9858            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9859        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9860
9861        let company_codes: Vec<String> = self
9862            .config
9863            .companies
9864            .iter()
9865            .map(|c| c.code.clone())
9866            .collect();
9867
9868        let mut generator = JournalEntryGenerator::new_with_params(
9869            self.config.transactions.clone(),
9870            Arc::clone(coa),
9871            company_codes,
9872            start_date,
9873            end_date,
9874            self.seed,
9875        );
9876        // Wire the `business_processes.*_weight` config through (phantom knob
9877        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
9878        let bp = &self.config.business_processes;
9879        generator.set_business_process_weights(
9880            bp.o2c_weight,
9881            bp.p2p_weight,
9882            bp.r2r_weight,
9883            bp.h2r_weight,
9884            bp.a2r_weight,
9885        );
9886        let generator = generator;
9887
9888        // Connect generated master data to ensure JEs reference real entities
9889        // Enable persona-based error injection for realistic human behavior
9890        // Pass fraud configuration for fraud injection
9891        let je_pack = self.primary_pack();
9892
9893        let mut generator = generator
9894            .with_master_data(
9895                &self.master_data.vendors,
9896                &self.master_data.customers,
9897                &self.master_data.materials,
9898            )
9899            .with_country_pack_names(je_pack)
9900            .with_country_pack_temporal(
9901                self.config.temporal_patterns.clone(),
9902                self.seed + 200,
9903                je_pack,
9904            )
9905            .with_persona_errors(true)
9906            .with_fraud_config(self.config.fraud.clone());
9907
9908        // Apply temporal drift if configured
9909        if self.config.temporal.enabled {
9910            let drift_config = self.config.temporal.to_core_config();
9911            generator = generator.with_drift_config(drift_config, self.seed + 100);
9912        }
9913
9914        // Check memory limit at start
9915        self.check_memory_limit()?;
9916
9917        // Determine parallelism: use available cores, but cap at total entries
9918        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9919
9920        // Use parallel generation for datasets with 10K+ entries.
9921        // Below this threshold, the statistical properties of a single-seeded
9922        // generator (e.g. Benford compliance) are better preserved.
9923        let entries = if total >= 10_000 && num_threads > 1 {
9924            // Parallel path: split the generator across cores and generate in parallel.
9925            // Each sub-generator gets a unique seed for deterministic, independent generation.
9926            let sub_generators = generator.split(num_threads);
9927            let entries_per_thread = total as usize / num_threads;
9928            let remainder = total as usize % num_threads;
9929
9930            let batches: Vec<Vec<JournalEntry>> = sub_generators
9931                .into_par_iter()
9932                .enumerate()
9933                .map(|(i, mut gen)| {
9934                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9935                    gen.generate_batch(count)
9936                })
9937                .collect();
9938
9939            // Merge all batches into a single Vec
9940            let entries = JournalEntryGenerator::merge_results(batches);
9941
9942            if let Some(pb) = &pb {
9943                pb.inc(total);
9944            }
9945            entries
9946        } else {
9947            // Sequential path for small datasets (< 1000 entries)
9948            let mut entries = Vec::with_capacity(total as usize);
9949            for _ in 0..total {
9950                let entry = generator.generate();
9951                entries.push(entry);
9952                if let Some(pb) = &pb {
9953                    pb.inc(1);
9954                }
9955            }
9956            entries
9957        };
9958
9959        if let Some(pb) = pb {
9960            pb.finish_with_message("Journal entries complete");
9961        }
9962
9963        Ok(entries)
9964    }
9965
9966    /// Generate journal entries from document flows.
9967    ///
9968    /// This creates proper GL entries for each document in the P2P and O2C flows,
9969    /// ensuring that document activity is reflected in the general ledger.
9970    fn generate_jes_from_document_flows(
9971        &mut self,
9972        flows: &DocumentFlowSnapshot,
9973    ) -> SynthResult<Vec<JournalEntry>> {
9974        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9975        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9976
9977        let je_config = match self.resolve_coa_framework() {
9978            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9979            CoAFramework::GermanSkr04 => {
9980                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9981                DocumentFlowJeConfig::from(&fa)
9982            }
9983            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9984        };
9985
9986        let populate_fec = je_config.populate_fec_fields;
9987        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9988
9989        // Build auxiliary account lookup from vendor/customer master data so that
9990        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9991        // PCG "4010001") instead of raw partner IDs.
9992        if populate_fec {
9993            let mut aux_lookup = std::collections::HashMap::new();
9994            for vendor in &self.master_data.vendors {
9995                if let Some(ref aux) = vendor.auxiliary_gl_account {
9996                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9997                }
9998            }
9999            for customer in &self.master_data.customers {
10000                if let Some(ref aux) = customer.auxiliary_gl_account {
10001                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
10002                }
10003            }
10004            if !aux_lookup.is_empty() {
10005                generator.set_auxiliary_account_lookup(aux_lookup);
10006            }
10007        }
10008
10009        let mut entries = Vec::new();
10010
10011        // Generate JEs from P2P chains
10012        for chain in &flows.p2p_chains {
10013            let chain_entries = generator.generate_from_p2p_chain(chain);
10014            entries.extend(chain_entries);
10015            if let Some(pb) = &pb {
10016                pb.inc(1);
10017            }
10018        }
10019
10020        // Generate JEs from O2C chains
10021        for chain in &flows.o2c_chains {
10022            let chain_entries = generator.generate_from_o2c_chain(chain);
10023            entries.extend(chain_entries);
10024            if let Some(pb) = &pb {
10025                pb.inc(1);
10026            }
10027        }
10028
10029        if let Some(pb) = pb {
10030            pb.finish_with_message(format!(
10031                "Generated {} JEs from document flows",
10032                entries.len()
10033            ));
10034        }
10035
10036        Ok(entries)
10037    }
10038
10039    /// Generate journal entries from payroll runs.
10040    ///
10041    /// Creates one JE per payroll run:
10042    /// - DR Salaries & Wages (6100) for gross pay
10043    /// - CR Payroll Clearing (9100) for gross pay
10044    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
10045        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
10046
10047        let mut jes = Vec::with_capacity(payroll_runs.len());
10048
10049        for run in payroll_runs {
10050            let mut je = JournalEntry::new_simple(
10051                format!("JE-PAYROLL-{}", run.payroll_id),
10052                run.company_code.clone(),
10053                run.run_date,
10054                format!("Payroll {}", run.payroll_id),
10055            );
10056
10057            // Debit Salaries & Wages for gross pay
10058            je.add_line(JournalEntryLine {
10059                line_number: 1,
10060                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
10061                debit_amount: run.total_gross,
10062                reference: Some(run.payroll_id.clone()),
10063                text: Some(format!(
10064                    "Payroll {} ({} employees)",
10065                    run.payroll_id, run.employee_count
10066                )),
10067                ..Default::default()
10068            });
10069
10070            // Credit Payroll Clearing for gross pay
10071            je.add_line(JournalEntryLine {
10072                line_number: 2,
10073                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
10074                credit_amount: run.total_gross,
10075                reference: Some(run.payroll_id.clone()),
10076                ..Default::default()
10077            });
10078
10079            jes.push(je);
10080        }
10081
10082        jes
10083    }
10084
10085    /// Link document flows to subledger records.
10086    ///
10087    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
10088    /// ensuring subledger data is coherent with document flow data.
10089    fn link_document_flows_to_subledgers(
10090        &mut self,
10091        flows: &DocumentFlowSnapshot,
10092    ) -> SynthResult<SubledgerSnapshot> {
10093        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
10094        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
10095
10096        // Build vendor/customer name maps from master data for realistic subledger names
10097        let vendor_names: std::collections::HashMap<String, String> = self
10098            .master_data
10099            .vendors
10100            .iter()
10101            .map(|v| (v.vendor_id.clone(), v.name.clone()))
10102            .collect();
10103        let customer_names: std::collections::HashMap<String, String> = self
10104            .master_data
10105            .customers
10106            .iter()
10107            .map(|c| (c.customer_id.clone(), c.name.clone()))
10108            .collect();
10109
10110        let mut linker = DocumentFlowLinker::new()
10111            .with_vendor_names(vendor_names)
10112            .with_customer_names(customer_names);
10113
10114        // Convert vendor invoices to AP invoices
10115        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
10116        if let Some(pb) = &pb {
10117            pb.inc(flows.vendor_invoices.len() as u64);
10118        }
10119
10120        // Convert customer invoices to AR invoices
10121        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
10122        if let Some(pb) = &pb {
10123            pb.inc(flows.customer_invoices.len() as u64);
10124        }
10125
10126        if let Some(pb) = pb {
10127            pb.finish_with_message(format!(
10128                "Linked {} AP and {} AR invoices",
10129                ap_invoices.len(),
10130                ar_invoices.len()
10131            ));
10132        }
10133
10134        Ok(SubledgerSnapshot {
10135            ap_invoices,
10136            ar_invoices,
10137            fa_records: Vec::new(),
10138            inventory_positions: Vec::new(),
10139            inventory_movements: Vec::new(),
10140            // Aging reports are computed after payment settlement in phase_document_flows.
10141            ar_aging_reports: Vec::new(),
10142            ap_aging_reports: Vec::new(),
10143            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
10144            depreciation_runs: Vec::new(),
10145            inventory_valuations: Vec::new(),
10146            // Dunning runs and letters are populated in phase_document_flows after AR aging.
10147            dunning_runs: Vec::new(),
10148            dunning_letters: Vec::new(),
10149        })
10150    }
10151
10152    /// Generate OCPM events from document flows.
10153    ///
10154    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
10155    /// capturing the object-centric process perspective.
10156    #[allow(clippy::too_many_arguments)]
10157    fn generate_ocpm_events(
10158        &mut self,
10159        flows: &DocumentFlowSnapshot,
10160        sourcing: &SourcingSnapshot,
10161        hr: &HrSnapshot,
10162        manufacturing: &ManufacturingSnapshot,
10163        banking: &BankingSnapshot,
10164        audit: &AuditSnapshot,
10165        financial_reporting: &FinancialReportingSnapshot,
10166    ) -> SynthResult<OcpmSnapshot> {
10167        let total_chains = flows.p2p_chains.len()
10168            + flows.o2c_chains.len()
10169            + sourcing.sourcing_projects.len()
10170            + hr.payroll_runs.len()
10171            + manufacturing.production_orders.len()
10172            + banking.customers.len()
10173            + audit.engagements.len()
10174            + financial_reporting.bank_reconciliations.len();
10175        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
10176
10177        // Create OCPM event log with standard types
10178        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
10179        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
10180
10181        // Configure the OCPM generator
10182        let ocpm_config = OcpmGeneratorConfig {
10183            generate_p2p: true,
10184            generate_o2c: true,
10185            generate_s2c: !sourcing.sourcing_projects.is_empty(),
10186            generate_h2r: !hr.payroll_runs.is_empty(),
10187            generate_mfg: !manufacturing.production_orders.is_empty(),
10188            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
10189            generate_bank: !banking.customers.is_empty(),
10190            generate_audit: !audit.engagements.is_empty(),
10191            happy_path_rate: 0.75,
10192            exception_path_rate: 0.20,
10193            error_path_rate: 0.05,
10194            add_duration_variability: true,
10195            duration_std_dev_factor: 0.3,
10196        };
10197        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
10198        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
10199
10200        // Get available users for resource assignment
10201        let available_users: Vec<String> = self
10202            .master_data
10203            .employees
10204            .iter()
10205            .take(20)
10206            .map(|e| e.user_id.clone())
10207            .collect();
10208
10209        // Deterministic base date from config (avoids Utc::now() non-determinism)
10210        let fallback_date =
10211            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
10212        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10213            .unwrap_or(fallback_date);
10214        let base_midnight = base_date
10215            .and_hms_opt(0, 0, 0)
10216            .expect("midnight is always valid");
10217        let base_datetime =
10218            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
10219
10220        // Helper closure to add case results to event log
10221        let add_result = |event_log: &mut OcpmEventLog,
10222                          result: datasynth_ocpm::CaseGenerationResult| {
10223            for event in result.events {
10224                event_log.add_event(event);
10225            }
10226            for object in result.objects {
10227                event_log.add_object(object);
10228            }
10229            for relationship in result.relationships {
10230                event_log.add_relationship(relationship);
10231            }
10232            for corr in result.correlation_events {
10233                event_log.add_correlation_event(corr);
10234            }
10235            event_log.add_case(result.case_trace);
10236        };
10237
10238        // Generate events from P2P chains
10239        for chain in &flows.p2p_chains {
10240            let po = &chain.purchase_order;
10241            let documents = P2pDocuments::new(
10242                &po.header.document_id,
10243                &po.vendor_id,
10244                &po.header.company_code,
10245                po.total_net_amount,
10246                &po.header.currency,
10247                &ocpm_uuid_factory,
10248            )
10249            .with_goods_receipt(
10250                chain
10251                    .goods_receipts
10252                    .first()
10253                    .map(|gr| gr.header.document_id.as_str())
10254                    .unwrap_or(""),
10255                &ocpm_uuid_factory,
10256            )
10257            .with_invoice(
10258                chain
10259                    .vendor_invoice
10260                    .as_ref()
10261                    .map(|vi| vi.header.document_id.as_str())
10262                    .unwrap_or(""),
10263                &ocpm_uuid_factory,
10264            )
10265            .with_payment(
10266                chain
10267                    .payment
10268                    .as_ref()
10269                    .map(|p| p.header.document_id.as_str())
10270                    .unwrap_or(""),
10271                &ocpm_uuid_factory,
10272            );
10273
10274            let start_time =
10275                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
10276            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
10277            add_result(&mut event_log, result);
10278
10279            if let Some(pb) = &pb {
10280                pb.inc(1);
10281            }
10282        }
10283
10284        // Generate events from O2C chains
10285        for chain in &flows.o2c_chains {
10286            let so = &chain.sales_order;
10287            let documents = O2cDocuments::new(
10288                &so.header.document_id,
10289                &so.customer_id,
10290                &so.header.company_code,
10291                so.total_net_amount,
10292                &so.header.currency,
10293                &ocpm_uuid_factory,
10294            )
10295            .with_delivery(
10296                chain
10297                    .deliveries
10298                    .first()
10299                    .map(|d| d.header.document_id.as_str())
10300                    .unwrap_or(""),
10301                &ocpm_uuid_factory,
10302            )
10303            .with_invoice(
10304                chain
10305                    .customer_invoice
10306                    .as_ref()
10307                    .map(|ci| ci.header.document_id.as_str())
10308                    .unwrap_or(""),
10309                &ocpm_uuid_factory,
10310            )
10311            .with_receipt(
10312                chain
10313                    .customer_receipt
10314                    .as_ref()
10315                    .map(|r| r.header.document_id.as_str())
10316                    .unwrap_or(""),
10317                &ocpm_uuid_factory,
10318            );
10319
10320            let start_time =
10321                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
10322            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
10323            add_result(&mut event_log, result);
10324
10325            if let Some(pb) = &pb {
10326                pb.inc(1);
10327            }
10328        }
10329
10330        // Generate events from S2C sourcing projects
10331        for project in &sourcing.sourcing_projects {
10332            // Find vendor from contracts or qualifications
10333            let vendor_id = sourcing
10334                .contracts
10335                .iter()
10336                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10337                .map(|c| c.vendor_id.clone())
10338                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
10339                .or_else(|| {
10340                    self.master_data
10341                        .vendors
10342                        .first()
10343                        .map(|v| v.vendor_id.clone())
10344                })
10345                .unwrap_or_else(|| "V000".to_string());
10346            let mut docs = S2cDocuments::new(
10347                &project.project_id,
10348                &vendor_id,
10349                &project.company_code,
10350                project.estimated_annual_spend,
10351                &ocpm_uuid_factory,
10352            );
10353            // Link RFx if available
10354            if let Some(rfx) = sourcing
10355                .rfx_events
10356                .iter()
10357                .find(|r| r.sourcing_project_id == project.project_id)
10358            {
10359                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10360                // Link winning bid (status == Accepted)
10361                if let Some(bid) = sourcing.bids.iter().find(|b| {
10362                    b.rfx_id == rfx.rfx_id
10363                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10364                }) {
10365                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10366                }
10367            }
10368            // Link contract
10369            if let Some(contract) = sourcing
10370                .contracts
10371                .iter()
10372                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10373            {
10374                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10375            }
10376            let start_time = base_datetime - chrono::Duration::days(90);
10377            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10378            add_result(&mut event_log, result);
10379
10380            if let Some(pb) = &pb {
10381                pb.inc(1);
10382            }
10383        }
10384
10385        // Generate events from H2R payroll runs
10386        for run in &hr.payroll_runs {
10387            // Use first matching payroll line item's employee, or fallback
10388            let employee_id = hr
10389                .payroll_line_items
10390                .iter()
10391                .find(|li| li.payroll_id == run.payroll_id)
10392                .map(|li| li.employee_id.as_str())
10393                .unwrap_or("EMP000");
10394            let docs = H2rDocuments::new(
10395                &run.payroll_id,
10396                employee_id,
10397                &run.company_code,
10398                run.total_gross,
10399                &ocpm_uuid_factory,
10400            )
10401            .with_time_entries(
10402                hr.time_entries
10403                    .iter()
10404                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10405                    .take(5)
10406                    .map(|t| t.entry_id.as_str())
10407                    .collect(),
10408            );
10409            let start_time = base_datetime - chrono::Duration::days(30);
10410            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10411            add_result(&mut event_log, result);
10412
10413            if let Some(pb) = &pb {
10414                pb.inc(1);
10415            }
10416        }
10417
10418        // Generate events from MFG production orders
10419        for order in &manufacturing.production_orders {
10420            let mut docs = MfgDocuments::new(
10421                &order.order_id,
10422                &order.material_id,
10423                &order.company_code,
10424                order.planned_quantity,
10425                &ocpm_uuid_factory,
10426            )
10427            .with_operations(
10428                order
10429                    .operations
10430                    .iter()
10431                    .map(|o| format!("OP-{:04}", o.operation_number))
10432                    .collect::<Vec<_>>()
10433                    .iter()
10434                    .map(std::string::String::as_str)
10435                    .collect(),
10436            );
10437            // Link quality inspection if available (via reference_id matching order_id)
10438            if let Some(insp) = manufacturing
10439                .quality_inspections
10440                .iter()
10441                .find(|i| i.reference_id == order.order_id)
10442            {
10443                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10444            }
10445            // Link cycle count if available (match by material_id in items)
10446            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10447                cc.items
10448                    .iter()
10449                    .any(|item| item.material_id == order.material_id)
10450            }) {
10451                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10452            }
10453            let start_time = base_datetime - chrono::Duration::days(60);
10454            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10455            add_result(&mut event_log, result);
10456
10457            if let Some(pb) = &pb {
10458                pb.inc(1);
10459            }
10460        }
10461
10462        // Generate events from Banking customers
10463        for customer in &banking.customers {
10464            let customer_id_str = customer.customer_id.to_string();
10465            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10466            // Link accounts (primary_owner_id matches customer_id)
10467            if let Some(account) = banking
10468                .accounts
10469                .iter()
10470                .find(|a| a.primary_owner_id == customer.customer_id)
10471            {
10472                let account_id_str = account.account_id.to_string();
10473                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10474                // Link transactions for this account
10475                let txn_strs: Vec<String> = banking
10476                    .transactions
10477                    .iter()
10478                    .filter(|t| t.account_id == account.account_id)
10479                    .take(10)
10480                    .map(|t| t.transaction_id.to_string())
10481                    .collect();
10482                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10483                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10484                    .transactions
10485                    .iter()
10486                    .filter(|t| t.account_id == account.account_id)
10487                    .take(10)
10488                    .map(|t| t.amount)
10489                    .collect();
10490                if !txn_ids.is_empty() {
10491                    docs = docs.with_transactions(txn_ids, txn_amounts);
10492                }
10493            }
10494            let start_time = base_datetime - chrono::Duration::days(180);
10495            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10496            add_result(&mut event_log, result);
10497
10498            if let Some(pb) = &pb {
10499                pb.inc(1);
10500            }
10501        }
10502
10503        // Generate events from Audit engagements
10504        for engagement in &audit.engagements {
10505            let engagement_id_str = engagement.engagement_id.to_string();
10506            let docs = AuditDocuments::new(
10507                &engagement_id_str,
10508                &engagement.client_entity_id,
10509                &ocpm_uuid_factory,
10510            )
10511            .with_workpapers(
10512                audit
10513                    .workpapers
10514                    .iter()
10515                    .filter(|w| w.engagement_id == engagement.engagement_id)
10516                    .take(10)
10517                    .map(|w| w.workpaper_id.to_string())
10518                    .collect::<Vec<_>>()
10519                    .iter()
10520                    .map(std::string::String::as_str)
10521                    .collect(),
10522            )
10523            .with_evidence(
10524                audit
10525                    .evidence
10526                    .iter()
10527                    .filter(|e| e.engagement_id == engagement.engagement_id)
10528                    .take(10)
10529                    .map(|e| e.evidence_id.to_string())
10530                    .collect::<Vec<_>>()
10531                    .iter()
10532                    .map(std::string::String::as_str)
10533                    .collect(),
10534            )
10535            .with_risks(
10536                audit
10537                    .risk_assessments
10538                    .iter()
10539                    .filter(|r| r.engagement_id == engagement.engagement_id)
10540                    .take(5)
10541                    .map(|r| r.risk_id.to_string())
10542                    .collect::<Vec<_>>()
10543                    .iter()
10544                    .map(std::string::String::as_str)
10545                    .collect(),
10546            )
10547            .with_findings(
10548                audit
10549                    .findings
10550                    .iter()
10551                    .filter(|f| f.engagement_id == engagement.engagement_id)
10552                    .take(5)
10553                    .map(|f| f.finding_id.to_string())
10554                    .collect::<Vec<_>>()
10555                    .iter()
10556                    .map(std::string::String::as_str)
10557                    .collect(),
10558            )
10559            .with_judgments(
10560                audit
10561                    .judgments
10562                    .iter()
10563                    .filter(|j| j.engagement_id == engagement.engagement_id)
10564                    .take(5)
10565                    .map(|j| j.judgment_id.to_string())
10566                    .collect::<Vec<_>>()
10567                    .iter()
10568                    .map(std::string::String::as_str)
10569                    .collect(),
10570            );
10571            let start_time = base_datetime - chrono::Duration::days(120);
10572            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10573            add_result(&mut event_log, result);
10574
10575            if let Some(pb) = &pb {
10576                pb.inc(1);
10577            }
10578        }
10579
10580        // Generate events from Bank Reconciliations
10581        for recon in &financial_reporting.bank_reconciliations {
10582            let docs = BankReconDocuments::new(
10583                &recon.reconciliation_id,
10584                &recon.bank_account_id,
10585                &recon.company_code,
10586                recon.bank_ending_balance,
10587                &ocpm_uuid_factory,
10588            )
10589            .with_statement_lines(
10590                recon
10591                    .statement_lines
10592                    .iter()
10593                    .take(20)
10594                    .map(|l| l.line_id.as_str())
10595                    .collect(),
10596            )
10597            .with_reconciling_items(
10598                recon
10599                    .reconciling_items
10600                    .iter()
10601                    .take(10)
10602                    .map(|i| i.item_id.as_str())
10603                    .collect(),
10604            );
10605            let start_time = base_datetime - chrono::Duration::days(30);
10606            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10607            add_result(&mut event_log, result);
10608
10609            if let Some(pb) = &pb {
10610                pb.inc(1);
10611            }
10612        }
10613
10614        // Compute process variants
10615        event_log.compute_variants();
10616
10617        let summary = event_log.summary();
10618
10619        if let Some(pb) = pb {
10620            pb.finish_with_message(format!(
10621                "Generated {} OCPM events, {} objects",
10622                summary.event_count, summary.object_count
10623            ));
10624        }
10625
10626        Ok(OcpmSnapshot {
10627            event_count: summary.event_count,
10628            object_count: summary.object_count,
10629            case_count: summary.case_count,
10630            event_log: Some(event_log),
10631        })
10632    }
10633
10634    /// Inject anomalies into journal entries.
10635    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10636        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10637
10638        // Read anomaly rates from config instead of using hardcoded values.
10639        // Priority: anomaly_injection config > fraud config > default 0.02
10640        let total_rate = if self.config.anomaly_injection.enabled {
10641            self.config.anomaly_injection.rates.total_rate
10642        } else if self.config.fraud.enabled {
10643            self.config.fraud.fraud_rate
10644        } else {
10645            0.02
10646        };
10647
10648        let fraud_rate = if self.config.anomaly_injection.enabled {
10649            self.config.anomaly_injection.rates.fraud_rate
10650        } else {
10651            AnomalyRateConfig::default().fraud_rate
10652        };
10653
10654        let error_rate = if self.config.anomaly_injection.enabled {
10655            self.config.anomaly_injection.rates.error_rate
10656        } else {
10657            AnomalyRateConfig::default().error_rate
10658        };
10659
10660        let process_issue_rate = if self.config.anomaly_injection.enabled {
10661            self.config.anomaly_injection.rates.process_rate
10662        } else {
10663            AnomalyRateConfig::default().process_issue_rate
10664        };
10665
10666        let anomaly_config = AnomalyInjectorConfig {
10667            rates: AnomalyRateConfig {
10668                total_rate,
10669                fraud_rate,
10670                error_rate,
10671                process_issue_rate,
10672                ..Default::default()
10673            },
10674            seed: self.seed + 5000,
10675            ..Default::default()
10676        };
10677
10678        let mut injector = AnomalyInjector::new(anomaly_config);
10679        let result = injector.process_entries(entries);
10680
10681        if let Some(pb) = &pb {
10682            pb.inc(entries.len() as u64);
10683            pb.finish_with_message("Anomaly injection complete");
10684        }
10685
10686        let mut by_type = HashMap::new();
10687        for label in &result.labels {
10688            *by_type
10689                .entry(format!("{:?}", label.anomaly_type))
10690                .or_insert(0) += 1;
10691        }
10692
10693        Ok(AnomalyLabels {
10694            labels: result.labels,
10695            summary: Some(result.summary),
10696            by_type,
10697        })
10698    }
10699
10700    /// Validate journal entries using running balance tracker.
10701    ///
10702    /// Applies all entries to the balance tracker and validates:
10703    /// - Each entry is internally balanced (debits = credits)
10704    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10705    ///
10706    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10707    /// excluded from balance validation as they may be intentionally unbalanced.
10708    fn validate_journal_entries(
10709        &mut self,
10710        entries: &[JournalEntry],
10711    ) -> SynthResult<BalanceValidationResult> {
10712        // Filter out entries with human errors as they may be intentionally unbalanced
10713        let clean_entries: Vec<&JournalEntry> = entries
10714            .iter()
10715            .filter(|e| {
10716                e.header
10717                    .header_text
10718                    .as_ref()
10719                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10720                    .unwrap_or(true)
10721            })
10722            .collect();
10723
10724        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10725
10726        // Configure tracker to not fail on errors (collect them instead)
10727        let config = BalanceTrackerConfig {
10728            validate_on_each_entry: false,   // We'll validate at the end
10729            track_history: false,            // Skip history for performance
10730            fail_on_validation_error: false, // Collect errors, don't fail
10731            ..Default::default()
10732        };
10733        let validation_currency = self
10734            .config
10735            .companies
10736            .first()
10737            .map(|c| c.currency.clone())
10738            .unwrap_or_else(|| "USD".to_string());
10739
10740        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10741
10742        // Apply clean entries (without human errors)
10743        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10744        let errors = tracker.apply_entries(&clean_refs);
10745
10746        if let Some(pb) = &pb {
10747            pb.inc(entries.len() as u64);
10748        }
10749
10750        // Check if any entries were unbalanced
10751        // Note: When fail_on_validation_error is false, errors are stored in tracker
10752        let has_unbalanced = tracker
10753            .get_validation_errors()
10754            .iter()
10755            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10756
10757        // Validate balance sheet for each company
10758        // Include both returned errors and collected validation errors
10759        let mut all_errors = errors;
10760        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10761        let company_codes: Vec<String> = self
10762            .config
10763            .companies
10764            .iter()
10765            .map(|c| c.code.clone())
10766            .collect();
10767
10768        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10769            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10770            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10771
10772        for company_code in &company_codes {
10773            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10774                all_errors.push(e);
10775            }
10776        }
10777
10778        // Get statistics after all mutable operations are done
10779        let stats = tracker.get_statistics();
10780
10781        // Determine if balanced overall
10782        let is_balanced = all_errors.is_empty();
10783
10784        if let Some(pb) = pb {
10785            let msg = if is_balanced {
10786                "Balance validation passed"
10787            } else {
10788                "Balance validation completed with errors"
10789            };
10790            pb.finish_with_message(msg);
10791        }
10792
10793        Ok(BalanceValidationResult {
10794            validated: true,
10795            is_balanced,
10796            entries_processed: stats.entries_processed,
10797            total_debits: stats.total_debits,
10798            total_credits: stats.total_credits,
10799            accounts_tracked: stats.accounts_tracked,
10800            companies_tracked: stats.companies_tracked,
10801            validation_errors: all_errors,
10802            has_unbalanced_entries: has_unbalanced,
10803        })
10804    }
10805
10806    /// Inject data quality variations into journal entries.
10807    ///
10808    /// Applies typos, missing values, and format variations to make
10809    /// the synthetic data more realistic for testing data cleaning pipelines.
10810    fn inject_data_quality(
10811        &mut self,
10812        entries: &mut [JournalEntry],
10813    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10814        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10815
10816        // Build config from user-specified schema settings when data_quality is enabled;
10817        // otherwise fall back to the low-rate minimal() preset.
10818        let config = if self.config.data_quality.enabled {
10819            let dq = &self.config.data_quality;
10820            DataQualityConfig {
10821                enable_missing_values: dq.missing_values.enabled,
10822                missing_values: datasynth_generators::MissingValueConfig {
10823                    global_rate: dq.effective_missing_rate(),
10824                    ..Default::default()
10825                },
10826                enable_format_variations: dq.format_variations.enabled,
10827                format_variations: datasynth_generators::FormatVariationConfig {
10828                    date_variation_rate: dq.format_variations.dates.rate,
10829                    amount_variation_rate: dq.format_variations.amounts.rate,
10830                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10831                    ..Default::default()
10832                },
10833                enable_duplicates: dq.duplicates.enabled,
10834                duplicates: datasynth_generators::DuplicateConfig {
10835                    duplicate_rate: dq.effective_duplicate_rate(),
10836                    ..Default::default()
10837                },
10838                enable_typos: dq.typos.enabled,
10839                typos: datasynth_generators::TypoConfig {
10840                    char_error_rate: dq.effective_typo_rate(),
10841                    ..Default::default()
10842                },
10843                enable_encoding_issues: dq.encoding_issues.enabled,
10844                encoding_issue_rate: dq.encoding_issues.rate,
10845                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10846                track_statistics: true,
10847            }
10848        } else {
10849            DataQualityConfig::minimal()
10850        };
10851        let mut injector = DataQualityInjector::new(config);
10852
10853        // Wire country pack for locale-aware format baselines
10854        injector.set_country_pack(self.primary_pack().clone());
10855
10856        // Build context for missing value decisions
10857        let context = HashMap::new();
10858
10859        for entry in entries.iter_mut() {
10860            // Process header_text field (common target for typos)
10861            if let Some(text) = &entry.header.header_text {
10862                let processed = injector.process_text_field(
10863                    "header_text",
10864                    text,
10865                    &entry.header.document_id.to_string(),
10866                    &context,
10867                );
10868                match processed {
10869                    Some(new_text) if new_text != *text => {
10870                        entry.header.header_text = Some(new_text);
10871                    }
10872                    None => {
10873                        entry.header.header_text = None; // Missing value
10874                    }
10875                    _ => {}
10876                }
10877            }
10878
10879            // Process reference field
10880            if let Some(ref_text) = &entry.header.reference {
10881                let processed = injector.process_text_field(
10882                    "reference",
10883                    ref_text,
10884                    &entry.header.document_id.to_string(),
10885                    &context,
10886                );
10887                match processed {
10888                    Some(new_text) if new_text != *ref_text => {
10889                        entry.header.reference = Some(new_text);
10890                    }
10891                    None => {
10892                        entry.header.reference = None;
10893                    }
10894                    _ => {}
10895                }
10896            }
10897
10898            // Process user_persona field (potential for typos in user IDs)
10899            let user_persona = entry.header.user_persona.clone();
10900            if let Some(processed) = injector.process_text_field(
10901                "user_persona",
10902                &user_persona,
10903                &entry.header.document_id.to_string(),
10904                &context,
10905            ) {
10906                if processed != user_persona {
10907                    entry.header.user_persona = processed;
10908                }
10909            }
10910
10911            // Process line items
10912            for line in &mut entry.lines {
10913                // Process line description if present
10914                if let Some(ref text) = line.line_text {
10915                    let processed = injector.process_text_field(
10916                        "line_text",
10917                        text,
10918                        &entry.header.document_id.to_string(),
10919                        &context,
10920                    );
10921                    match processed {
10922                        Some(new_text) if new_text != *text => {
10923                            line.line_text = Some(new_text);
10924                        }
10925                        None => {
10926                            line.line_text = None;
10927                        }
10928                        _ => {}
10929                    }
10930                }
10931
10932                // Process cost_center if present
10933                if let Some(cc) = &line.cost_center {
10934                    let processed = injector.process_text_field(
10935                        "cost_center",
10936                        cc,
10937                        &entry.header.document_id.to_string(),
10938                        &context,
10939                    );
10940                    match processed {
10941                        Some(new_cc) if new_cc != *cc => {
10942                            line.cost_center = Some(new_cc);
10943                        }
10944                        None => {
10945                            line.cost_center = None;
10946                        }
10947                        _ => {}
10948                    }
10949                }
10950            }
10951
10952            if let Some(pb) = &pb {
10953                pb.inc(1);
10954            }
10955        }
10956
10957        if let Some(pb) = pb {
10958            pb.finish_with_message("Data quality injection complete");
10959        }
10960
10961        let quality_issues = injector.issues().to_vec();
10962        Ok((injector.stats().clone(), quality_issues))
10963    }
10964
10965    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10966    ///
10967    /// Creates complete audit documentation for each company in the configuration,
10968    /// following ISA standards:
10969    /// - ISA 210/220: Engagement acceptance and terms
10970    /// - ISA 230: Audit documentation (workpapers)
10971    /// - ISA 265: Control deficiencies (findings)
10972    /// - ISA 315/330: Risk assessment and response
10973    /// - ISA 500: Audit evidence
10974    /// - ISA 200: Professional judgment
10975    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10976        // Check if FSM-driven audit generation is enabled
10977        let use_fsm = self
10978            .config
10979            .audit
10980            .fsm
10981            .as_ref()
10982            .map(|f| f.enabled)
10983            .unwrap_or(false);
10984
10985        if use_fsm {
10986            return self.generate_audit_data_with_fsm(entries);
10987        }
10988
10989        // --- Legacy (non-FSM) audit generation follows ---
10990        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10991            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10992        let fiscal_year = start_date.year() as u16;
10993        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10994
10995        // Calculate rough total revenue from entries for materiality
10996        let total_revenue: rust_decimal::Decimal = entries
10997            .iter()
10998            .flat_map(|e| e.lines.iter())
10999            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
11000            .map(|l| l.credit_amount)
11001            .sum();
11002
11003        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
11004        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
11005
11006        let mut snapshot = AuditSnapshot::default();
11007
11008        // Initialize generators
11009        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
11010        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
11011        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
11012        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
11013        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
11014        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
11015        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
11016        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
11017        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
11018        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
11019        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
11020        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
11021
11022        // Get list of accounts from CoA for risk assessment
11023        let accounts: Vec<String> = self
11024            .coa
11025            .as_ref()
11026            .map(|coa| {
11027                coa.get_postable_accounts()
11028                    .iter()
11029                    .map(|acc| acc.account_code().to_string())
11030                    .collect()
11031            })
11032            .unwrap_or_default();
11033
11034        // Generate engagements for each company
11035        for (i, company) in self.config.companies.iter().enumerate() {
11036            // Calculate company-specific revenue (proportional to volume weight)
11037            let company_revenue = total_revenue
11038                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
11039
11040            // Generate engagements for this company
11041            let engagements_for_company =
11042                self.phase_config.audit_engagements / self.config.companies.len().max(1);
11043            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
11044                1
11045            } else {
11046                0
11047            };
11048
11049            for _eng_idx in 0..(engagements_for_company + extra) {
11050                // Generate the engagement
11051                let mut engagement = engagement_gen.generate_engagement(
11052                    &company.code,
11053                    &company.name,
11054                    fiscal_year,
11055                    period_end,
11056                    company_revenue,
11057                    None, // Use default engagement type
11058                );
11059
11060                // Replace synthetic team IDs with real employee IDs from master data
11061                if !self.master_data.employees.is_empty() {
11062                    let emp_count = self.master_data.employees.len();
11063                    // Use employee IDs deterministically based on engagement index
11064                    let base = (i * 10 + _eng_idx) % emp_count;
11065                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
11066                        .employee_id
11067                        .clone();
11068                    engagement.engagement_manager_id = self.master_data.employees
11069                        [(base + 1) % emp_count]
11070                        .employee_id
11071                        .clone();
11072                    let real_team: Vec<String> = engagement
11073                        .team_member_ids
11074                        .iter()
11075                        .enumerate()
11076                        .map(|(j, _)| {
11077                            self.master_data.employees[(base + 2 + j) % emp_count]
11078                                .employee_id
11079                                .clone()
11080                        })
11081                        .collect();
11082                    engagement.team_member_ids = real_team;
11083                }
11084
11085                if let Some(pb) = &pb {
11086                    pb.inc(1);
11087                }
11088
11089                // Get team members from the engagement
11090                let team_members: Vec<String> = engagement.team_member_ids.clone();
11091
11092                // Generate workpapers for the engagement
11093                let workpapers =
11094                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
11095
11096                for wp in &workpapers {
11097                    if let Some(pb) = &pb {
11098                        pb.inc(1);
11099                    }
11100
11101                    // Generate evidence for each workpaper
11102                    let evidence = evidence_gen.generate_evidence_for_workpaper(
11103                        wp,
11104                        &team_members,
11105                        wp.preparer_date,
11106                    );
11107
11108                    for _ in &evidence {
11109                        if let Some(pb) = &pb {
11110                            pb.inc(1);
11111                        }
11112                    }
11113
11114                    snapshot.evidence.extend(evidence);
11115                }
11116
11117                // Generate risk assessments for the engagement
11118                let risks =
11119                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
11120
11121                for _ in &risks {
11122                    if let Some(pb) = &pb {
11123                        pb.inc(1);
11124                    }
11125                }
11126                snapshot.risk_assessments.extend(risks);
11127
11128                // Generate findings for the engagement
11129                let findings = finding_gen.generate_findings_for_engagement(
11130                    &engagement,
11131                    &workpapers,
11132                    &team_members,
11133                );
11134
11135                for _ in &findings {
11136                    if let Some(pb) = &pb {
11137                        pb.inc(1);
11138                    }
11139                }
11140                snapshot.findings.extend(findings);
11141
11142                // Generate professional judgments for the engagement
11143                let judgments =
11144                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
11145
11146                for _ in &judgments {
11147                    if let Some(pb) = &pb {
11148                        pb.inc(1);
11149                    }
11150                }
11151                snapshot.judgments.extend(judgments);
11152
11153                // ISA 505: External confirmations and responses
11154                let (confs, resps) =
11155                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
11156                snapshot.confirmations.extend(confs);
11157                snapshot.confirmation_responses.extend(resps);
11158
11159                // ISA 330: Procedure steps per workpaper
11160                let team_pairs: Vec<(String, String)> = team_members
11161                    .iter()
11162                    .map(|id| {
11163                        let name = self
11164                            .master_data
11165                            .employees
11166                            .iter()
11167                            .find(|e| e.employee_id == *id)
11168                            .map(|e| e.display_name.clone())
11169                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
11170                        (id.clone(), name)
11171                    })
11172                    .collect();
11173                for wp in &workpapers {
11174                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
11175                    snapshot.procedure_steps.extend(steps);
11176                }
11177
11178                // ISA 530: Samples per workpaper
11179                for wp in &workpapers {
11180                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
11181                        snapshot.samples.push(sample);
11182                    }
11183                }
11184
11185                // ISA 520: Analytical procedures
11186                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
11187                snapshot.analytical_results.extend(analytical);
11188
11189                // ISA 610: Internal audit function and reports
11190                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
11191                snapshot.ia_functions.push(ia_func);
11192                snapshot.ia_reports.extend(ia_reports);
11193
11194                // ISA 550: Related parties and transactions
11195                let vendor_names: Vec<String> = self
11196                    .master_data
11197                    .vendors
11198                    .iter()
11199                    .map(|v| v.name.clone())
11200                    .collect();
11201                let customer_names: Vec<String> = self
11202                    .master_data
11203                    .customers
11204                    .iter()
11205                    .map(|c| c.name.clone())
11206                    .collect();
11207                let (parties, rp_txns) =
11208                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
11209                snapshot.related_parties.extend(parties);
11210                snapshot.related_party_transactions.extend(rp_txns);
11211
11212                // Add workpapers after findings since findings need them
11213                snapshot.workpapers.extend(workpapers);
11214
11215                // Generate audit scope record for this engagement (one per engagement)
11216                {
11217                    let scope_id = format!(
11218                        "SCOPE-{}-{}",
11219                        engagement.engagement_id.simple(),
11220                        &engagement.client_entity_id
11221                    );
11222                    let scope = datasynth_core::models::audit::AuditScope::new(
11223                        scope_id.clone(),
11224                        engagement.engagement_id.to_string(),
11225                        engagement.client_entity_id.clone(),
11226                        engagement.materiality,
11227                    );
11228                    // Wire scope_id back to engagement
11229                    let mut eng = engagement;
11230                    eng.scope_id = Some(scope_id);
11231                    snapshot.audit_scopes.push(scope);
11232                    snapshot.engagements.push(eng);
11233                }
11234            }
11235        }
11236
11237        // ----------------------------------------------------------------
11238        // ISA 600: Group audit — component auditors, plan, instructions, reports
11239        // ----------------------------------------------------------------
11240        if self.config.companies.len() > 1 {
11241            // Use materiality from the first engagement if available, otherwise
11242            // derive a reasonable figure from total revenue.
11243            let group_materiality = snapshot
11244                .engagements
11245                .first()
11246                .map(|e| e.materiality)
11247                .unwrap_or_else(|| {
11248                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
11249                    total_revenue * pct
11250                });
11251
11252            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
11253            let group_engagement_id = snapshot
11254                .engagements
11255                .first()
11256                .map(|e| e.engagement_id.to_string())
11257                .unwrap_or_else(|| "GROUP-ENG".to_string());
11258
11259            let component_snapshot = component_gen.generate(
11260                &self.config.companies,
11261                group_materiality,
11262                &group_engagement_id,
11263                period_end,
11264            );
11265
11266            snapshot.component_auditors = component_snapshot.component_auditors;
11267            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
11268            snapshot.component_instructions = component_snapshot.component_instructions;
11269            snapshot.component_reports = component_snapshot.component_reports;
11270
11271            info!(
11272                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
11273                snapshot.component_auditors.len(),
11274                snapshot.component_instructions.len(),
11275                snapshot.component_reports.len(),
11276            );
11277        }
11278
11279        // ----------------------------------------------------------------
11280        // ISA 210: Engagement letters — one per engagement
11281        // ----------------------------------------------------------------
11282        {
11283            let applicable_framework = self
11284                .config
11285                .accounting_standards
11286                .framework
11287                .as_ref()
11288                .map(|f| format!("{f:?}"))
11289                .unwrap_or_else(|| "IFRS".to_string());
11290
11291            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
11292            let entity_count = self.config.companies.len();
11293
11294            for engagement in &snapshot.engagements {
11295                let company = self
11296                    .config
11297                    .companies
11298                    .iter()
11299                    .find(|c| c.code == engagement.client_entity_id);
11300                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
11301                let letter_date = engagement.planning_start;
11302                let letter = letter_gen.generate(
11303                    &engagement.engagement_id.to_string(),
11304                    &engagement.client_name,
11305                    entity_count,
11306                    engagement.period_end_date,
11307                    currency,
11308                    &applicable_framework,
11309                    letter_date,
11310                );
11311                snapshot.engagement_letters.push(letter);
11312            }
11313
11314            info!(
11315                "ISA 210 engagement letters: {} generated",
11316                snapshot.engagement_letters.len()
11317            );
11318        }
11319
11320        // ----------------------------------------------------------------
11321        // ISA 560 / IAS 10: Subsequent events
11322        // ----------------------------------------------------------------
11323        {
11324            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
11325            let entity_codes: Vec<String> = self
11326                .config
11327                .companies
11328                .iter()
11329                .map(|c| c.code.clone())
11330                .collect();
11331            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
11332            info!(
11333                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
11334                subsequent.len(),
11335                subsequent
11336                    .iter()
11337                    .filter(|e| matches!(
11338                        e.classification,
11339                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
11340                    ))
11341                    .count(),
11342                subsequent
11343                    .iter()
11344                    .filter(|e| matches!(
11345                        e.classification,
11346                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
11347                    ))
11348                    .count(),
11349            );
11350            snapshot.subsequent_events = subsequent;
11351        }
11352
11353        // ----------------------------------------------------------------
11354        // ISA 402: Service organization controls
11355        // ----------------------------------------------------------------
11356        {
11357            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11358            let entity_codes: Vec<String> = self
11359                .config
11360                .companies
11361                .iter()
11362                .map(|c| c.code.clone())
11363                .collect();
11364            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11365            info!(
11366                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11367                soc_snapshot.service_organizations.len(),
11368                soc_snapshot.soc_reports.len(),
11369                soc_snapshot.user_entity_controls.len(),
11370            );
11371            snapshot.service_organizations = soc_snapshot.service_organizations;
11372            snapshot.soc_reports = soc_snapshot.soc_reports;
11373            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11374        }
11375
11376        // ----------------------------------------------------------------
11377        // ISA 570: Going concern assessments
11378        // ----------------------------------------------------------------
11379        {
11380            use datasynth_generators::audit::going_concern_generator::{
11381                GoingConcernGenerator, GoingConcernInput,
11382            };
11383            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11384            let entity_codes: Vec<String> = self
11385                .config
11386                .companies
11387                .iter()
11388                .map(|c| c.code.clone())
11389                .collect();
11390            // Assessment date = period end + 75 days (typical sign-off window).
11391            let assessment_date = period_end + chrono::Duration::days(75);
11392            let period_label = format!("FY{}", period_end.year());
11393
11394            // Build financial inputs from actual journal entries.
11395            //
11396            // We derive approximate P&L, working capital, and operating cash flow
11397            // by aggregating GL account balances from the journal entry population.
11398            // Account ranges used (standard chart):
11399            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
11400            //   Expenses:        6xxx (debit-normal)
11401            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
11402            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
11403            //   Operating CF:    net income adjusted for D&A (rough proxy)
11404            let gc_inputs: Vec<GoingConcernInput> = self
11405                .config
11406                .companies
11407                .iter()
11408                .map(|company| {
11409                    let code = &company.code;
11410                    let mut revenue = rust_decimal::Decimal::ZERO;
11411                    let mut expenses = rust_decimal::Decimal::ZERO;
11412                    let mut current_assets = rust_decimal::Decimal::ZERO;
11413                    let mut current_liabs = rust_decimal::Decimal::ZERO;
11414                    let mut total_debt = rust_decimal::Decimal::ZERO;
11415
11416                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11417                        for line in &je.lines {
11418                            let acct = line.gl_account.as_str();
11419                            let net = line.debit_amount - line.credit_amount;
11420                            if acct.starts_with('4') {
11421                                // Revenue accounts: credit-normal, so negative net = revenue earned
11422                                revenue -= net;
11423                            } else if acct.starts_with('6') {
11424                                // Expense accounts: debit-normal
11425                                expenses += net;
11426                            }
11427                            // Balance sheet accounts for working capital
11428                            if acct.starts_with('1') {
11429                                // Current asset accounts (1000–1499)
11430                                if let Ok(n) = acct.parse::<u32>() {
11431                                    if (1000..=1499).contains(&n) {
11432                                        current_assets += net;
11433                                    }
11434                                }
11435                            } else if acct.starts_with('2') {
11436                                if let Ok(n) = acct.parse::<u32>() {
11437                                    if (2000..=2499).contains(&n) {
11438                                        // Current liabilities
11439                                        current_liabs -= net; // credit-normal
11440                                    } else if (2500..=2999).contains(&n) {
11441                                        // Long-term debt
11442                                        total_debt -= net;
11443                                    }
11444                                }
11445                            }
11446                        }
11447                    }
11448
11449                    let net_income = revenue - expenses;
11450                    let working_capital = current_assets - current_liabs;
11451                    // Rough operating CF proxy: net income (full accrual CF calculation
11452                    // is done separately in the cash flow statement generator)
11453                    let operating_cash_flow = net_income;
11454
11455                    GoingConcernInput {
11456                        entity_code: code.clone(),
11457                        net_income,
11458                        working_capital,
11459                        operating_cash_flow,
11460                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11461                        assessment_date,
11462                    }
11463                })
11464                .collect();
11465
11466            let assessments = if gc_inputs.is_empty() {
11467                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11468            } else {
11469                gc_gen.generate_for_entities_with_inputs(
11470                    &entity_codes,
11471                    &gc_inputs,
11472                    assessment_date,
11473                    &period_label,
11474                )
11475            };
11476            info!(
11477                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11478                assessments.len(),
11479                assessments.iter().filter(|a| matches!(
11480                    a.auditor_conclusion,
11481                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11482                )).count(),
11483                assessments.iter().filter(|a| matches!(
11484                    a.auditor_conclusion,
11485                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11486                )).count(),
11487                assessments.iter().filter(|a| matches!(
11488                    a.auditor_conclusion,
11489                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11490                )).count(),
11491            );
11492            snapshot.going_concern_assessments = assessments;
11493        }
11494
11495        // ----------------------------------------------------------------
11496        // ISA 540: Accounting estimates
11497        // ----------------------------------------------------------------
11498        {
11499            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11500            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11501            let entity_codes: Vec<String> = self
11502                .config
11503                .companies
11504                .iter()
11505                .map(|c| c.code.clone())
11506                .collect();
11507            let estimates = est_gen.generate_for_entities(&entity_codes);
11508            info!(
11509                "ISA 540 accounting estimates: {} estimates across {} entities \
11510                 ({} with retrospective reviews, {} with auditor point estimates)",
11511                estimates.len(),
11512                entity_codes.len(),
11513                estimates
11514                    .iter()
11515                    .filter(|e| e.retrospective_review.is_some())
11516                    .count(),
11517                estimates
11518                    .iter()
11519                    .filter(|e| e.auditor_point_estimate.is_some())
11520                    .count(),
11521            );
11522            snapshot.accounting_estimates = estimates;
11523        }
11524
11525        // ----------------------------------------------------------------
11526        // ISA 700/701/705/706: Audit opinions (one per engagement)
11527        // ----------------------------------------------------------------
11528        {
11529            use datasynth_generators::audit::audit_opinion_generator::{
11530                AuditOpinionGenerator, AuditOpinionInput,
11531            };
11532
11533            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11534
11535            // Build inputs — one per engagement, linking findings and going concern.
11536            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11537                .engagements
11538                .iter()
11539                .map(|eng| {
11540                    // Collect findings for this engagement.
11541                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11542                        .findings
11543                        .iter()
11544                        .filter(|f| f.engagement_id == eng.engagement_id)
11545                        .cloned()
11546                        .collect();
11547
11548                    // Going concern for this entity.
11549                    let gc = snapshot
11550                        .going_concern_assessments
11551                        .iter()
11552                        .find(|g| g.entity_code == eng.client_entity_id)
11553                        .cloned();
11554
11555                    // Component reports relevant to this engagement.
11556                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11557                        snapshot.component_reports.clone();
11558
11559                    let auditor = self
11560                        .master_data
11561                        .employees
11562                        .first()
11563                        .map(|e| e.display_name.clone())
11564                        .unwrap_or_else(|| "Global Audit LLP".into());
11565
11566                    let partner = self
11567                        .master_data
11568                        .employees
11569                        .get(1)
11570                        .map(|e| e.display_name.clone())
11571                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11572
11573                    AuditOpinionInput {
11574                        entity_code: eng.client_entity_id.clone(),
11575                        entity_name: eng.client_name.clone(),
11576                        engagement_id: eng.engagement_id,
11577                        period_end: eng.period_end_date,
11578                        findings: eng_findings,
11579                        going_concern: gc,
11580                        component_reports: comp_reports,
11581                        // Mark as US-listed when audit standards include PCAOB.
11582                        is_us_listed: {
11583                            let fw = &self.config.audit_standards.isa_compliance.framework;
11584                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11585                        },
11586                        auditor_name: auditor,
11587                        engagement_partner: partner,
11588                    }
11589                })
11590                .collect();
11591
11592            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11593
11594            for go in &generated_opinions {
11595                snapshot
11596                    .key_audit_matters
11597                    .extend(go.key_audit_matters.clone());
11598            }
11599            snapshot.audit_opinions = generated_opinions
11600                .into_iter()
11601                .map(|go| go.opinion)
11602                .collect();
11603
11604            info!(
11605                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11606                snapshot.audit_opinions.len(),
11607                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11608                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11609                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11610                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11611            );
11612        }
11613
11614        // ----------------------------------------------------------------
11615        // SOX 302 / 404 assessments
11616        // ----------------------------------------------------------------
11617        {
11618            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11619
11620            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11621
11622            for (i, company) in self.config.companies.iter().enumerate() {
11623                // Collect findings for this company's engagements.
11624                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11625                    .engagements
11626                    .iter()
11627                    .filter(|e| e.client_entity_id == company.code)
11628                    .map(|e| e.engagement_id)
11629                    .collect();
11630
11631                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11632                    .findings
11633                    .iter()
11634                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11635                    .cloned()
11636                    .collect();
11637
11638                // Derive executive names from employee list.
11639                let emp_count = self.master_data.employees.len();
11640                let ceo_name = if emp_count > 0 {
11641                    self.master_data.employees[i % emp_count]
11642                        .display_name
11643                        .clone()
11644                } else {
11645                    format!("CEO of {}", company.name)
11646                };
11647                let cfo_name = if emp_count > 1 {
11648                    self.master_data.employees[(i + 1) % emp_count]
11649                        .display_name
11650                        .clone()
11651                } else {
11652                    format!("CFO of {}", company.name)
11653                };
11654
11655                // Use engagement materiality if available.
11656                let materiality = snapshot
11657                    .engagements
11658                    .iter()
11659                    .find(|e| e.client_entity_id == company.code)
11660                    .map(|e| e.materiality)
11661                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11662
11663                let input = SoxGeneratorInput {
11664                    company_code: company.code.clone(),
11665                    company_name: company.name.clone(),
11666                    fiscal_year,
11667                    period_end,
11668                    findings: company_findings,
11669                    ceo_name,
11670                    cfo_name,
11671                    materiality_threshold: materiality,
11672                    revenue_percent: rust_decimal::Decimal::from(100),
11673                    assets_percent: rust_decimal::Decimal::from(100),
11674                    significant_accounts: vec![
11675                        "Revenue".into(),
11676                        "Accounts Receivable".into(),
11677                        "Inventory".into(),
11678                        "Fixed Assets".into(),
11679                        "Accounts Payable".into(),
11680                    ],
11681                };
11682
11683                let (certs, assessment) = sox_gen.generate(&input);
11684                snapshot.sox_302_certifications.extend(certs);
11685                snapshot.sox_404_assessments.push(assessment);
11686            }
11687
11688            info!(
11689                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11690                snapshot.sox_302_certifications.len(),
11691                snapshot.sox_404_assessments.len(),
11692                snapshot
11693                    .sox_404_assessments
11694                    .iter()
11695                    .filter(|a| a.icfr_effective)
11696                    .count(),
11697                snapshot
11698                    .sox_404_assessments
11699                    .iter()
11700                    .filter(|a| !a.icfr_effective)
11701                    .count(),
11702            );
11703        }
11704
11705        // ----------------------------------------------------------------
11706        // ISA 320: Materiality calculations (one per entity)
11707        // ----------------------------------------------------------------
11708        {
11709            use datasynth_generators::audit::materiality_generator::{
11710                MaterialityGenerator, MaterialityInput,
11711            };
11712
11713            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11714
11715            // Compute per-company financials from JEs.
11716            // Asset accounts start with '1', revenue with '4',
11717            // expense accounts with '5' or '6'.
11718            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11719
11720            for company in &self.config.companies {
11721                let company_code = company.code.clone();
11722
11723                // Revenue: credit-side entries on 4xxx accounts
11724                let company_revenue: rust_decimal::Decimal = entries
11725                    .iter()
11726                    .filter(|e| e.company_code() == company_code)
11727                    .flat_map(|e| e.lines.iter())
11728                    .filter(|l| l.account_code.starts_with('4'))
11729                    .map(|l| l.credit_amount)
11730                    .sum();
11731
11732                // Total assets: debit balances on 1xxx accounts
11733                let total_assets: rust_decimal::Decimal = entries
11734                    .iter()
11735                    .filter(|e| e.company_code() == company_code)
11736                    .flat_map(|e| e.lines.iter())
11737                    .filter(|l| l.account_code.starts_with('1'))
11738                    .map(|l| l.debit_amount)
11739                    .sum();
11740
11741                // Expenses: debit-side entries on 5xxx/6xxx accounts
11742                let total_expenses: rust_decimal::Decimal = entries
11743                    .iter()
11744                    .filter(|e| e.company_code() == company_code)
11745                    .flat_map(|e| e.lines.iter())
11746                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11747                    .map(|l| l.debit_amount)
11748                    .sum();
11749
11750                // Equity: credit balances on 3xxx accounts
11751                let equity: rust_decimal::Decimal = entries
11752                    .iter()
11753                    .filter(|e| e.company_code() == company_code)
11754                    .flat_map(|e| e.lines.iter())
11755                    .filter(|l| l.account_code.starts_with('3'))
11756                    .map(|l| l.credit_amount)
11757                    .sum();
11758
11759                let pretax_income = company_revenue - total_expenses;
11760
11761                // If no company-specific data, fall back to proportional share
11762                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11763                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11764                        .unwrap_or(rust_decimal::Decimal::ONE);
11765                    (
11766                        total_revenue * w,
11767                        total_revenue * w * rust_decimal::Decimal::from(3),
11768                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11769                        total_revenue * w * rust_decimal::Decimal::from(2),
11770                    )
11771                } else {
11772                    (company_revenue, total_assets, pretax_income, equity)
11773                };
11774
11775                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11776
11777                materiality_inputs.push(MaterialityInput {
11778                    entity_code: company_code,
11779                    period: format!("FY{}", fiscal_year),
11780                    revenue: rev,
11781                    pretax_income: pti,
11782                    total_assets: assets,
11783                    equity: eq,
11784                    gross_profit,
11785                });
11786            }
11787
11788            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11789
11790            info!(
11791                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11792                 {} total assets, {} equity benchmarks)",
11793                snapshot.materiality_calculations.len(),
11794                snapshot
11795                    .materiality_calculations
11796                    .iter()
11797                    .filter(|m| matches!(
11798                        m.benchmark,
11799                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11800                    ))
11801                    .count(),
11802                snapshot
11803                    .materiality_calculations
11804                    .iter()
11805                    .filter(|m| matches!(
11806                        m.benchmark,
11807                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11808                    ))
11809                    .count(),
11810                snapshot
11811                    .materiality_calculations
11812                    .iter()
11813                    .filter(|m| matches!(
11814                        m.benchmark,
11815                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11816                    ))
11817                    .count(),
11818                snapshot
11819                    .materiality_calculations
11820                    .iter()
11821                    .filter(|m| matches!(
11822                        m.benchmark,
11823                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11824                    ))
11825                    .count(),
11826            );
11827        }
11828
11829        // ----------------------------------------------------------------
11830        // ISA 315: Combined Risk Assessments (per entity, per account area)
11831        // ----------------------------------------------------------------
11832        {
11833            use datasynth_generators::audit::cra_generator::CraGenerator;
11834
11835            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11836
11837            // Build entity → scope_id map from already-generated scopes
11838            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11839                .audit_scopes
11840                .iter()
11841                .map(|s| (s.entity_code.clone(), s.id.clone()))
11842                .collect();
11843
11844            for company in &self.config.companies {
11845                let cras = cra_gen.generate_for_entity(&company.code, None);
11846                let scope_id = entity_scope_map.get(&company.code).cloned();
11847                let cras_with_scope: Vec<_> = cras
11848                    .into_iter()
11849                    .map(|mut cra| {
11850                        cra.scope_id = scope_id.clone();
11851                        cra
11852                    })
11853                    .collect();
11854                snapshot.combined_risk_assessments.extend(cras_with_scope);
11855            }
11856
11857            let significant_count = snapshot
11858                .combined_risk_assessments
11859                .iter()
11860                .filter(|c| c.significant_risk)
11861                .count();
11862            let high_cra_count = snapshot
11863                .combined_risk_assessments
11864                .iter()
11865                .filter(|c| {
11866                    matches!(
11867                        c.combined_risk,
11868                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11869                    )
11870                })
11871                .count();
11872
11873            info!(
11874                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11875                snapshot.combined_risk_assessments.len(),
11876                significant_count,
11877                high_cra_count,
11878            );
11879        }
11880
11881        // ----------------------------------------------------------------
11882        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11883        // ----------------------------------------------------------------
11884        {
11885            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11886
11887            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11888
11889            // Group CRAs by entity and use per-entity tolerable error from materiality
11890            for company in &self.config.companies {
11891                let entity_code = company.code.clone();
11892
11893                // Find tolerable error for this entity (= performance materiality)
11894                let tolerable_error = snapshot
11895                    .materiality_calculations
11896                    .iter()
11897                    .find(|m| m.entity_code == entity_code)
11898                    .map(|m| m.tolerable_error);
11899
11900                // Collect CRAs for this entity
11901                let entity_cras: Vec<_> = snapshot
11902                    .combined_risk_assessments
11903                    .iter()
11904                    .filter(|c| c.entity_code == entity_code)
11905                    .cloned()
11906                    .collect();
11907
11908                if !entity_cras.is_empty() {
11909                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11910                    snapshot.sampling_plans.extend(plans);
11911                    snapshot.sampled_items.extend(items);
11912                }
11913            }
11914
11915            let misstatement_count = snapshot
11916                .sampled_items
11917                .iter()
11918                .filter(|i| i.misstatement_found)
11919                .count();
11920
11921            info!(
11922                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11923                snapshot.sampling_plans.len(),
11924                snapshot.sampled_items.len(),
11925                misstatement_count,
11926            );
11927        }
11928
11929        // ----------------------------------------------------------------
11930        // ISA 315: Significant Classes of Transactions (SCOTS)
11931        // ----------------------------------------------------------------
11932        {
11933            use datasynth_generators::audit::scots_generator::{
11934                ScotsGenerator, ScotsGeneratorConfig,
11935            };
11936
11937            let ic_enabled = self.config.intercompany.enabled;
11938
11939            let config = ScotsGeneratorConfig {
11940                intercompany_enabled: ic_enabled,
11941                ..ScotsGeneratorConfig::default()
11942            };
11943            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11944
11945            for company in &self.config.companies {
11946                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11947                snapshot
11948                    .significant_transaction_classes
11949                    .extend(entity_scots);
11950            }
11951
11952            let estimation_count = snapshot
11953                .significant_transaction_classes
11954                .iter()
11955                .filter(|s| {
11956                    matches!(
11957                        s.transaction_type,
11958                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11959                    )
11960                })
11961                .count();
11962
11963            info!(
11964                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11965                snapshot.significant_transaction_classes.len(),
11966                estimation_count,
11967            );
11968        }
11969
11970        // ----------------------------------------------------------------
11971        // ISA 520: Unusual Item Markers
11972        // ----------------------------------------------------------------
11973        {
11974            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11975
11976            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11977            let entity_codes: Vec<String> = self
11978                .config
11979                .companies
11980                .iter()
11981                .map(|c| c.code.clone())
11982                .collect();
11983            let unusual_flags =
11984                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11985            info!(
11986                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11987                unusual_flags.len(),
11988                unusual_flags
11989                    .iter()
11990                    .filter(|f| matches!(
11991                        f.severity,
11992                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11993                    ))
11994                    .count(),
11995                unusual_flags
11996                    .iter()
11997                    .filter(|f| matches!(
11998                        f.severity,
11999                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
12000                    ))
12001                    .count(),
12002                unusual_flags
12003                    .iter()
12004                    .filter(|f| matches!(
12005                        f.severity,
12006                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
12007                    ))
12008                    .count(),
12009            );
12010            snapshot.unusual_items = unusual_flags;
12011        }
12012
12013        // ----------------------------------------------------------------
12014        // ISA 520: Analytical Relationships
12015        // ----------------------------------------------------------------
12016        {
12017            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
12018
12019            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
12020            let entity_codes: Vec<String> = self
12021                .config
12022                .companies
12023                .iter()
12024                .map(|c| c.code.clone())
12025                .collect();
12026            let current_period_label = format!("FY{fiscal_year}");
12027            let prior_period_label = format!("FY{}", fiscal_year - 1);
12028            let analytical_rels = ar_gen.generate_for_entities(
12029                &entity_codes,
12030                entries,
12031                &current_period_label,
12032                &prior_period_label,
12033            );
12034            let out_of_range = analytical_rels
12035                .iter()
12036                .filter(|r| !r.within_expected_range)
12037                .count();
12038            info!(
12039                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
12040                analytical_rels.len(),
12041                out_of_range,
12042            );
12043            snapshot.analytical_relationships = analytical_rels;
12044        }
12045
12046        if let Some(pb) = pb {
12047            pb.finish_with_message(format!(
12048                "Audit data: {} engagements, {} workpapers, {} evidence, \
12049                 {} confirmations, {} procedure steps, {} samples, \
12050                 {} analytical, {} IA funcs, {} related parties, \
12051                 {} component auditors, {} letters, {} subsequent events, \
12052                 {} service orgs, {} going concern, {} accounting estimates, \
12053                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
12054                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
12055                 {} unusual items, {} analytical relationships",
12056                snapshot.engagements.len(),
12057                snapshot.workpapers.len(),
12058                snapshot.evidence.len(),
12059                snapshot.confirmations.len(),
12060                snapshot.procedure_steps.len(),
12061                snapshot.samples.len(),
12062                snapshot.analytical_results.len(),
12063                snapshot.ia_functions.len(),
12064                snapshot.related_parties.len(),
12065                snapshot.component_auditors.len(),
12066                snapshot.engagement_letters.len(),
12067                snapshot.subsequent_events.len(),
12068                snapshot.service_organizations.len(),
12069                snapshot.going_concern_assessments.len(),
12070                snapshot.accounting_estimates.len(),
12071                snapshot.audit_opinions.len(),
12072                snapshot.key_audit_matters.len(),
12073                snapshot.sox_302_certifications.len(),
12074                snapshot.sox_404_assessments.len(),
12075                snapshot.materiality_calculations.len(),
12076                snapshot.combined_risk_assessments.len(),
12077                snapshot.sampling_plans.len(),
12078                snapshot.significant_transaction_classes.len(),
12079                snapshot.unusual_items.len(),
12080                snapshot.analytical_relationships.len(),
12081            ));
12082        }
12083
12084        // ----------------------------------------------------------------
12085        // PCAOB-ISA cross-reference mappings
12086        // ----------------------------------------------------------------
12087        // Always include the standard PCAOB-ISA mappings when audit generation is
12088        // enabled. These are static reference data (no randomness required) so we
12089        // call standard_mappings() directly.
12090        {
12091            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12092            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12093            debug!(
12094                "PCAOB-ISA mappings generated: {} mappings",
12095                snapshot.isa_pcaob_mappings.len()
12096            );
12097        }
12098
12099        // ----------------------------------------------------------------
12100        // ISA standard reference entries
12101        // ----------------------------------------------------------------
12102        // Emit flat ISA standard reference data (number, title, series) so
12103        // consumers get a machine-readable listing of all 34 ISA standards in
12104        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
12105        {
12106            use datasynth_standards::audit::isa_reference::IsaStandard;
12107            snapshot.isa_mappings = IsaStandard::standard_entries();
12108            debug!(
12109                "ISA standard entries generated: {} standards",
12110                snapshot.isa_mappings.len()
12111            );
12112        }
12113
12114        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
12115        // For each RPT, find the chronologically closest JE for the engagement's entity.
12116        {
12117            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
12118                .engagements
12119                .iter()
12120                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
12121                .collect();
12122
12123            for rpt in &mut snapshot.related_party_transactions {
12124                if rpt.journal_entry_id.is_some() {
12125                    continue; // already set
12126                }
12127                let entity = engagement_by_id
12128                    .get(&rpt.engagement_id.to_string())
12129                    .copied()
12130                    .unwrap_or("");
12131
12132                // Find closest JE by date in the entity's company
12133                let best_je = entries
12134                    .iter()
12135                    .filter(|je| je.header.company_code == entity)
12136                    .min_by_key(|je| {
12137                        (je.header.posting_date - rpt.transaction_date)
12138                            .num_days()
12139                            .abs()
12140                    });
12141
12142                if let Some(je) = best_je {
12143                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
12144                }
12145            }
12146
12147            let linked = snapshot
12148                .related_party_transactions
12149                .iter()
12150                .filter(|t| t.journal_entry_id.is_some())
12151                .count();
12152            debug!(
12153                "Linked {}/{} related party transactions to journal entries",
12154                linked,
12155                snapshot.related_party_transactions.len()
12156            );
12157        }
12158
12159        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
12160        // One opinion per engagement, derived from that engagement's findings,
12161        // going-concern assessment, and any component-auditor reports. Fills
12162        // `audit_opinions` + a flattened `key_audit_matters` for downstream
12163        // export.
12164        if !snapshot.engagements.is_empty() {
12165            use datasynth_generators::audit_opinion_generator::{
12166                AuditOpinionGenerator, AuditOpinionInput,
12167            };
12168
12169            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
12170            let inputs: Vec<AuditOpinionInput> = snapshot
12171                .engagements
12172                .iter()
12173                .map(|eng| {
12174                    let findings = snapshot
12175                        .findings
12176                        .iter()
12177                        .filter(|f| f.engagement_id == eng.engagement_id)
12178                        .cloned()
12179                        .collect();
12180                    let going_concern = snapshot
12181                        .going_concern_assessments
12182                        .iter()
12183                        .find(|gc| gc.entity_code == eng.client_entity_id)
12184                        .cloned();
12185                    // ComponentAuditorReport doesn't carry an engagement id, but
12186                    // component scope is keyed by `entity_code`, so filter on that.
12187                    let component_reports = snapshot
12188                        .component_reports
12189                        .iter()
12190                        .filter(|r| r.entity_code == eng.client_entity_id)
12191                        .cloned()
12192                        .collect();
12193
12194                    AuditOpinionInput {
12195                        entity_code: eng.client_entity_id.clone(),
12196                        entity_name: eng.client_name.clone(),
12197                        engagement_id: eng.engagement_id,
12198                        period_end: eng.period_end_date,
12199                        findings,
12200                        going_concern,
12201                        component_reports,
12202                        is_us_listed: matches!(
12203                            eng.engagement_type,
12204                            datasynth_core::audit::EngagementType::IntegratedAudit
12205                                | datasynth_core::audit::EngagementType::Sox404
12206                        ),
12207                        auditor_name: "DataSynth Audit LLP".to_string(),
12208                        engagement_partner: "Engagement Partner".to_string(),
12209                    }
12210                })
12211                .collect();
12212
12213            let generated = opinion_gen.generate_batch(&inputs);
12214            for g in generated {
12215                snapshot.key_audit_matters.extend(g.key_audit_matters);
12216                snapshot.audit_opinions.push(g.opinion);
12217            }
12218            debug!(
12219                "Generated {} audit opinions with {} key audit matters",
12220                snapshot.audit_opinions.len(),
12221                snapshot.key_audit_matters.len()
12222            );
12223        }
12224
12225        Ok(snapshot)
12226    }
12227
12228    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
12229    ///
12230    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
12231    /// from the current orchestrator state, runs the FSM engine, and maps the
12232    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
12233    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
12234    fn generate_audit_data_with_fsm(
12235        &mut self,
12236        entries: &[JournalEntry],
12237    ) -> SynthResult<AuditSnapshot> {
12238        use datasynth_audit_fsm::{
12239            context::EngagementContext,
12240            engine::AuditFsmEngine,
12241            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
12242        };
12243        use rand::SeedableRng;
12244        use rand_chacha::ChaCha8Rng;
12245
12246        info!("Audit FSM: generating audit data via FSM engine");
12247
12248        let fsm_config = self
12249            .config
12250            .audit
12251            .fsm
12252            .as_ref()
12253            .expect("FSM config must be present when FSM is enabled");
12254
12255        // 1. Load blueprint from config string.
12256        let bwp = match fsm_config.blueprint.as_str() {
12257            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
12258            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
12259            _ => {
12260                warn!(
12261                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
12262                    fsm_config.blueprint
12263                );
12264                BlueprintWithPreconditions::load_builtin_fsa()
12265            }
12266        }
12267        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
12268
12269        // 2. Load overlay from config string.
12270        let overlay = match fsm_config.overlay.as_str() {
12271            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
12272            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
12273            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
12274            _ => {
12275                warn!(
12276                    "Unknown FSM overlay '{}', falling back to builtin:default",
12277                    fsm_config.overlay
12278                );
12279                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
12280            }
12281        }
12282        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
12283
12284        // 3. Build EngagementContext from orchestrator state.
12285        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12286            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12287        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12288
12289        // Determine the engagement entity early so we can filter JEs.
12290        let company = self.config.companies.first();
12291        let company_code = company
12292            .map(|c| c.code.clone())
12293            .unwrap_or_else(|| "UNKNOWN".to_string());
12294        let company_name = company
12295            .map(|c| c.name.clone())
12296            .unwrap_or_else(|| "Unknown Company".to_string());
12297        let currency = company
12298            .map(|c| c.currency.clone())
12299            .unwrap_or_else(|| "USD".to_string());
12300
12301        // Filter JEs to the engagement entity for single-company coherence.
12302        let entity_entries: Vec<_> = entries
12303            .iter()
12304            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
12305            .cloned()
12306            .collect();
12307        let entries = &entity_entries; // Shadow the parameter for remaining usage
12308
12309        // Financial aggregates from journal entries.
12310        let total_revenue: rust_decimal::Decimal = entries
12311            .iter()
12312            .flat_map(|e| e.lines.iter())
12313            .filter(|l| l.account_code.starts_with('4'))
12314            .map(|l| l.credit_amount - l.debit_amount)
12315            .sum();
12316
12317        let total_assets: rust_decimal::Decimal = entries
12318            .iter()
12319            .flat_map(|e| e.lines.iter())
12320            .filter(|l| l.account_code.starts_with('1'))
12321            .map(|l| l.debit_amount - l.credit_amount)
12322            .sum();
12323
12324        let total_expenses: rust_decimal::Decimal = entries
12325            .iter()
12326            .flat_map(|e| e.lines.iter())
12327            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
12328            .map(|l| l.debit_amount)
12329            .sum();
12330
12331        let equity: rust_decimal::Decimal = entries
12332            .iter()
12333            .flat_map(|e| e.lines.iter())
12334            .filter(|l| l.account_code.starts_with('3'))
12335            .map(|l| l.credit_amount - l.debit_amount)
12336            .sum();
12337
12338        let total_debt: rust_decimal::Decimal = entries
12339            .iter()
12340            .flat_map(|e| e.lines.iter())
12341            .filter(|l| l.account_code.starts_with('2'))
12342            .map(|l| l.credit_amount - l.debit_amount)
12343            .sum();
12344
12345        let pretax_income = total_revenue - total_expenses;
12346
12347        let cogs: rust_decimal::Decimal = entries
12348            .iter()
12349            .flat_map(|e| e.lines.iter())
12350            .filter(|l| l.account_code.starts_with('5'))
12351            .map(|l| l.debit_amount)
12352            .sum();
12353        let gross_profit = total_revenue - cogs;
12354
12355        let current_assets: rust_decimal::Decimal = entries
12356            .iter()
12357            .flat_map(|e| e.lines.iter())
12358            .filter(|l| {
12359                l.account_code.starts_with("10")
12360                    || l.account_code.starts_with("11")
12361                    || l.account_code.starts_with("12")
12362                    || l.account_code.starts_with("13")
12363            })
12364            .map(|l| l.debit_amount - l.credit_amount)
12365            .sum();
12366        let current_liabilities: rust_decimal::Decimal = entries
12367            .iter()
12368            .flat_map(|e| e.lines.iter())
12369            .filter(|l| {
12370                l.account_code.starts_with("20")
12371                    || l.account_code.starts_with("21")
12372                    || l.account_code.starts_with("22")
12373            })
12374            .map(|l| l.credit_amount - l.debit_amount)
12375            .sum();
12376        let working_capital = current_assets - current_liabilities;
12377
12378        let depreciation: rust_decimal::Decimal = entries
12379            .iter()
12380            .flat_map(|e| e.lines.iter())
12381            .filter(|l| l.account_code.starts_with("60"))
12382            .map(|l| l.debit_amount)
12383            .sum();
12384        let operating_cash_flow = pretax_income + depreciation;
12385
12386        // GL accounts for reference data.
12387        let accounts: Vec<String> = self
12388            .coa
12389            .as_ref()
12390            .map(|coa| {
12391                coa.get_postable_accounts()
12392                    .iter()
12393                    .map(|acc| acc.account_code().to_string())
12394                    .collect()
12395            })
12396            .unwrap_or_default();
12397
12398        // Team member IDs and display names from master data.
12399        let team_member_ids: Vec<String> = self
12400            .master_data
12401            .employees
12402            .iter()
12403            .take(8) // Cap team size
12404            .map(|e| e.employee_id.clone())
12405            .collect();
12406        let team_member_pairs: Vec<(String, String)> = self
12407            .master_data
12408            .employees
12409            .iter()
12410            .take(8)
12411            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12412            .collect();
12413
12414        let vendor_names: Vec<String> = self
12415            .master_data
12416            .vendors
12417            .iter()
12418            .map(|v| v.name.clone())
12419            .collect();
12420        let customer_names: Vec<String> = self
12421            .master_data
12422            .customers
12423            .iter()
12424            .map(|c| c.name.clone())
12425            .collect();
12426
12427        let entity_codes: Vec<String> = self
12428            .config
12429            .companies
12430            .iter()
12431            .map(|c| c.code.clone())
12432            .collect();
12433
12434        // Journal entry IDs for evidence tracing (sample up to 50).
12435        let journal_entry_ids: Vec<String> = entries
12436            .iter()
12437            .take(50)
12438            .map(|e| e.header.document_id.to_string())
12439            .collect();
12440
12441        // Account balances for risk weighting (aggregate debit - credit per account).
12442        let mut account_balances = std::collections::HashMap::<String, f64>::new();
12443        for entry in entries {
12444            for line in &entry.lines {
12445                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12446                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12447                *account_balances
12448                    .entry(line.account_code.clone())
12449                    .or_insert(0.0) += debit_f64 - credit_f64;
12450            }
12451        }
12452
12453        // Internal control IDs and anomaly refs are populated by the
12454        // caller when available; here we default to empty because the
12455        // orchestrator state may not have generated controls/anomalies
12456        // yet at this point in the pipeline.
12457        let control_ids: Vec<String> = Vec::new();
12458        let anomaly_refs: Vec<String> = Vec::new();
12459
12460        let mut context = EngagementContext {
12461            company_code,
12462            company_name,
12463            fiscal_year: start_date.year(),
12464            currency,
12465            total_revenue,
12466            total_assets,
12467            engagement_start: start_date,
12468            report_date: period_end,
12469            pretax_income,
12470            equity,
12471            gross_profit,
12472            working_capital,
12473            operating_cash_flow,
12474            total_debt,
12475            team_member_ids,
12476            team_member_pairs,
12477            accounts,
12478            vendor_names,
12479            customer_names,
12480            journal_entry_ids,
12481            account_balances,
12482            control_ids,
12483            anomaly_refs,
12484            journal_entries: entries.to_vec(),
12485            is_us_listed: false,
12486            entity_codes,
12487            auditor_firm_name: "DataSynth Audit LLP".into(),
12488            accounting_framework: self
12489                .config
12490                .accounting_standards
12491                .framework
12492                .map(|f| match f {
12493                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12494                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12495                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12496                        "French GAAP"
12497                    }
12498                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12499                        "German GAAP"
12500                    }
12501                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12502                        "Dual Reporting"
12503                    }
12504                })
12505                .unwrap_or("IFRS")
12506                .into(),
12507        };
12508
12509        // 4. Create and run the FSM engine.
12510        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12511        let rng = ChaCha8Rng::seed_from_u64(seed);
12512        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12513
12514        let mut result = engine
12515            .run_engagement(&context)
12516            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12517
12518        info!(
12519            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12520             {} phases completed, duration {:.1}h",
12521            result.event_log.len(),
12522            result.artifacts.total_artifacts(),
12523            result.anomalies.len(),
12524            result.phases_completed.len(),
12525            result.total_duration_hours,
12526        );
12527
12528        // 4b. Populate financial data in the artifact bag for downstream consumers.
12529        let tb_entity = context.company_code.clone();
12530        let tb_fy = context.fiscal_year;
12531        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12532        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12533            entries,
12534            &tb_entity,
12535            tb_fy,
12536            self.coa.as_ref().map(|c| c.as_ref()),
12537        );
12538
12539        // 5. Map ArtifactBag fields to AuditSnapshot.
12540        let bag = result.artifacts;
12541        let mut snapshot = AuditSnapshot {
12542            engagements: bag.engagements,
12543            engagement_letters: bag.engagement_letters,
12544            materiality_calculations: bag.materiality_calculations,
12545            risk_assessments: bag.risk_assessments,
12546            combined_risk_assessments: bag.combined_risk_assessments,
12547            workpapers: bag.workpapers,
12548            evidence: bag.evidence,
12549            findings: bag.findings,
12550            judgments: bag.judgments,
12551            sampling_plans: bag.sampling_plans,
12552            sampled_items: bag.sampled_items,
12553            analytical_results: bag.analytical_results,
12554            going_concern_assessments: bag.going_concern_assessments,
12555            subsequent_events: bag.subsequent_events,
12556            audit_opinions: bag.audit_opinions,
12557            key_audit_matters: bag.key_audit_matters,
12558            procedure_steps: bag.procedure_steps,
12559            samples: bag.samples,
12560            confirmations: bag.confirmations,
12561            confirmation_responses: bag.confirmation_responses,
12562            // Store the event trail for downstream export.
12563            fsm_event_trail: Some(result.event_log),
12564            // Fields not produced by the FSM engine remain at their defaults.
12565            ..Default::default()
12566        };
12567
12568        // 6. Add static reference data (same as legacy path).
12569        {
12570            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12571            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12572        }
12573        {
12574            use datasynth_standards::audit::isa_reference::IsaStandard;
12575            snapshot.isa_mappings = IsaStandard::standard_entries();
12576        }
12577
12578        info!(
12579            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12580             {} risk assessments, {} findings, {} materiality calcs",
12581            snapshot.engagements.len(),
12582            snapshot.workpapers.len(),
12583            snapshot.evidence.len(),
12584            snapshot.risk_assessments.len(),
12585            snapshot.findings.len(),
12586            snapshot.materiality_calculations.len(),
12587        );
12588
12589        Ok(snapshot)
12590    }
12591
12592    /// Export journal entries as graph data for ML training and network reconstruction.
12593    ///
12594    /// Builds a transaction graph where:
12595    /// - Nodes are GL accounts
12596    /// - Edges are money flows from credit to debit accounts
12597    /// - Edge attributes include amount, date, business process, anomaly flags
12598    fn export_graphs(
12599        &mut self,
12600        entries: &[JournalEntry],
12601        _coa: &Arc<ChartOfAccounts>,
12602        stats: &mut EnhancedGenerationStatistics,
12603    ) -> SynthResult<GraphExportSnapshot> {
12604        let pb = self.create_progress_bar(100, "Exporting Graphs");
12605
12606        let mut snapshot = GraphExportSnapshot::default();
12607
12608        // Get output directory
12609        let output_dir = self
12610            .output_path
12611            .clone()
12612            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12613        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12614
12615        // Process each graph type configuration
12616        for graph_type in &self.config.graph_export.graph_types {
12617            if let Some(pb) = &pb {
12618                pb.inc(10);
12619            }
12620
12621            // Build transaction graph
12622            let graph_config = TransactionGraphConfig {
12623                include_vendors: false,
12624                include_customers: false,
12625                create_debit_credit_edges: true,
12626                include_document_nodes: graph_type.include_document_nodes,
12627                min_edge_weight: graph_type.min_edge_weight,
12628                aggregate_parallel_edges: graph_type.aggregate_edges,
12629                framework: None,
12630            };
12631
12632            let mut builder = TransactionGraphBuilder::new(graph_config);
12633            builder.add_journal_entries(entries);
12634            let graph = builder.build();
12635
12636            // Update stats
12637            stats.graph_node_count += graph.node_count();
12638            stats.graph_edge_count += graph.edge_count();
12639
12640            if let Some(pb) = &pb {
12641                pb.inc(40);
12642            }
12643
12644            // Export to each configured format
12645            for format in &self.config.graph_export.formats {
12646                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12647
12648                // Create output directory
12649                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12650                    warn!("Failed to create graph output directory: {}", e);
12651                    continue;
12652                }
12653
12654                match format {
12655                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12656                        let pyg_config = PyGExportConfig {
12657                            common: datasynth_graph::CommonExportConfig {
12658                                export_node_features: true,
12659                                export_edge_features: true,
12660                                export_node_labels: true,
12661                                export_edge_labels: true,
12662                                export_masks: true,
12663                                train_ratio: self.config.graph_export.train_ratio,
12664                                val_ratio: self.config.graph_export.validation_ratio,
12665                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12666                            },
12667                            one_hot_categoricals: false,
12668                        };
12669
12670                        let exporter = PyGExporter::new(pyg_config);
12671                        match exporter.export(&graph, &format_dir) {
12672                            Ok(metadata) => {
12673                                snapshot.exports.insert(
12674                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12675                                    GraphExportInfo {
12676                                        name: graph_type.name.clone(),
12677                                        format: "pytorch_geometric".to_string(),
12678                                        output_path: format_dir.clone(),
12679                                        node_count: metadata.num_nodes,
12680                                        edge_count: metadata.num_edges,
12681                                    },
12682                                );
12683                                snapshot.graph_count += 1;
12684                            }
12685                            Err(e) => {
12686                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12687                            }
12688                        }
12689                    }
12690                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12691                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12692
12693                        let neo4j_config = Neo4jExportConfig {
12694                            export_node_properties: true,
12695                            export_edge_properties: true,
12696                            export_features: true,
12697                            generate_cypher: true,
12698                            generate_admin_import: true,
12699                            database_name: "synth".to_string(),
12700                            cypher_batch_size: 1000,
12701                        };
12702
12703                        let exporter = Neo4jExporter::new(neo4j_config);
12704                        match exporter.export(&graph, &format_dir) {
12705                            Ok(metadata) => {
12706                                snapshot.exports.insert(
12707                                    format!("{}_{}", graph_type.name, "neo4j"),
12708                                    GraphExportInfo {
12709                                        name: graph_type.name.clone(),
12710                                        format: "neo4j".to_string(),
12711                                        output_path: format_dir.clone(),
12712                                        node_count: metadata.num_nodes,
12713                                        edge_count: metadata.num_edges,
12714                                    },
12715                                );
12716                                snapshot.graph_count += 1;
12717                            }
12718                            Err(e) => {
12719                                warn!("Failed to export Neo4j graph: {}", e);
12720                            }
12721                        }
12722                    }
12723                    datasynth_config::schema::GraphExportFormat::Dgl => {
12724                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12725
12726                        let dgl_config = DGLExportConfig {
12727                            common: datasynth_graph::CommonExportConfig {
12728                                export_node_features: true,
12729                                export_edge_features: true,
12730                                export_node_labels: true,
12731                                export_edge_labels: true,
12732                                export_masks: true,
12733                                train_ratio: self.config.graph_export.train_ratio,
12734                                val_ratio: self.config.graph_export.validation_ratio,
12735                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12736                            },
12737                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12738                            include_pickle_script: true, // DGL ecosystem standard helper
12739                        };
12740
12741                        let exporter = DGLExporter::new(dgl_config);
12742                        match exporter.export(&graph, &format_dir) {
12743                            Ok(metadata) => {
12744                                snapshot.exports.insert(
12745                                    format!("{}_{}", graph_type.name, "dgl"),
12746                                    GraphExportInfo {
12747                                        name: graph_type.name.clone(),
12748                                        format: "dgl".to_string(),
12749                                        output_path: format_dir.clone(),
12750                                        node_count: metadata.common.num_nodes,
12751                                        edge_count: metadata.common.num_edges,
12752                                    },
12753                                );
12754                                snapshot.graph_count += 1;
12755                            }
12756                            Err(e) => {
12757                                warn!("Failed to export DGL graph: {}", e);
12758                            }
12759                        }
12760                    }
12761                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12762                        use datasynth_graph::{
12763                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12764                        };
12765
12766                        let rustgraph_config = RustGraphExportConfig {
12767                            include_features: true,
12768                            include_temporal: true,
12769                            include_labels: true,
12770                            source_name: "datasynth".to_string(),
12771                            batch_id: None,
12772                            output_format: RustGraphOutputFormat::JsonLines,
12773                            export_node_properties: true,
12774                            export_edge_properties: true,
12775                            pretty_print: false,
12776                        };
12777
12778                        let exporter = RustGraphExporter::new(rustgraph_config);
12779                        match exporter.export(&graph, &format_dir) {
12780                            Ok(metadata) => {
12781                                snapshot.exports.insert(
12782                                    format!("{}_{}", graph_type.name, "rustgraph"),
12783                                    GraphExportInfo {
12784                                        name: graph_type.name.clone(),
12785                                        format: "rustgraph".to_string(),
12786                                        output_path: format_dir.clone(),
12787                                        node_count: metadata.num_nodes,
12788                                        edge_count: metadata.num_edges,
12789                                    },
12790                                );
12791                                snapshot.graph_count += 1;
12792                            }
12793                            Err(e) => {
12794                                warn!("Failed to export RustGraph: {}", e);
12795                            }
12796                        }
12797                    }
12798                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12799                        // Hypergraph export is handled separately in Phase 10b
12800                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12801                    }
12802                }
12803            }
12804
12805            if let Some(pb) = &pb {
12806                pb.inc(40);
12807            }
12808        }
12809
12810        stats.graph_export_count = snapshot.graph_count;
12811        snapshot.exported = snapshot.graph_count > 0;
12812
12813        if let Some(pb) = pb {
12814            pb.finish_with_message(format!(
12815                "Graphs exported: {} graphs ({} nodes, {} edges)",
12816                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12817            ));
12818        }
12819
12820        Ok(snapshot)
12821    }
12822
12823    /// Build additional graph types (banking, approval, entity) when relevant data
12824    /// is available. These run as a late phase because the data they need (banking
12825    /// snapshot, intercompany snapshot) is only generated after the main graph
12826    /// export phase.
12827    fn build_additional_graphs(
12828        &self,
12829        banking: &BankingSnapshot,
12830        intercompany: &IntercompanySnapshot,
12831        entries: &[JournalEntry],
12832        stats: &mut EnhancedGenerationStatistics,
12833    ) {
12834        let output_dir = self
12835            .output_path
12836            .clone()
12837            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12838        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12839
12840        // Banking graph: build when banking customers and transactions exist
12841        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12842            info!("Phase 10c: Building banking network graph");
12843            let config = BankingGraphConfig::default();
12844            let mut builder = BankingGraphBuilder::new(config);
12845            builder.add_customers(&banking.customers);
12846            builder.add_accounts(&banking.accounts, &banking.customers);
12847            builder.add_transactions(&banking.transactions);
12848            let graph = builder.build();
12849
12850            let node_count = graph.node_count();
12851            let edge_count = graph.edge_count();
12852            stats.graph_node_count += node_count;
12853            stats.graph_edge_count += edge_count;
12854
12855            // Export as PyG if configured
12856            for format in &self.config.graph_export.formats {
12857                if matches!(
12858                    format,
12859                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12860                ) {
12861                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12862                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12863                        warn!("Failed to create banking graph output dir: {}", e);
12864                        continue;
12865                    }
12866                    let pyg_config = PyGExportConfig::default();
12867                    let exporter = PyGExporter::new(pyg_config);
12868                    if let Err(e) = exporter.export(&graph, &format_dir) {
12869                        warn!("Failed to export banking graph as PyG: {}", e);
12870                    } else {
12871                        info!(
12872                            "Banking network graph exported: {} nodes, {} edges",
12873                            node_count, edge_count
12874                        );
12875                    }
12876                }
12877            }
12878        }
12879
12880        // Approval graph: build from journal entry approval workflows
12881        let approval_entries: Vec<_> = entries
12882            .iter()
12883            .filter(|je| je.header.approval_workflow.is_some())
12884            .collect();
12885
12886        if !approval_entries.is_empty() {
12887            info!(
12888                "Phase 10c: Building approval network graph ({} entries with approvals)",
12889                approval_entries.len()
12890            );
12891            let config = ApprovalGraphConfig::default();
12892            let mut builder = ApprovalGraphBuilder::new(config);
12893
12894            for je in &approval_entries {
12895                if let Some(ref wf) = je.header.approval_workflow {
12896                    for action in &wf.actions {
12897                        let record = datasynth_core::models::ApprovalRecord {
12898                            approval_id: format!(
12899                                "APR-{}-{}",
12900                                je.header.document_id, action.approval_level
12901                            ),
12902                            document_number: je.header.document_id.to_string(),
12903                            document_type: "JE".to_string(),
12904                            company_code: je.company_code().to_string(),
12905                            requester_id: wf.preparer_id.clone(),
12906                            requester_name: Some(wf.preparer_name.clone()),
12907                            approver_id: action.actor_id.clone(),
12908                            approver_name: action.actor_name.clone(),
12909                            approval_date: je.posting_date(),
12910                            action: format!("{:?}", action.action),
12911                            amount: wf.amount,
12912                            approval_limit: None,
12913                            comments: action.comments.clone(),
12914                            delegation_from: None,
12915                            is_auto_approved: false,
12916                        };
12917                        builder.add_approval(&record);
12918                    }
12919                }
12920            }
12921
12922            let graph = builder.build();
12923            let node_count = graph.node_count();
12924            let edge_count = graph.edge_count();
12925            stats.graph_node_count += node_count;
12926            stats.graph_edge_count += edge_count;
12927
12928            // Export as PyG if configured
12929            for format in &self.config.graph_export.formats {
12930                if matches!(
12931                    format,
12932                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12933                ) {
12934                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12935                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12936                        warn!("Failed to create approval graph output dir: {}", e);
12937                        continue;
12938                    }
12939                    let pyg_config = PyGExportConfig::default();
12940                    let exporter = PyGExporter::new(pyg_config);
12941                    if let Err(e) = exporter.export(&graph, &format_dir) {
12942                        warn!("Failed to export approval graph as PyG: {}", e);
12943                    } else {
12944                        info!(
12945                            "Approval network graph exported: {} nodes, {} edges",
12946                            node_count, edge_count
12947                        );
12948                    }
12949                }
12950            }
12951        }
12952
12953        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12954        if self.config.companies.len() >= 2 {
12955            info!(
12956                "Phase 10c: Building entity relationship graph ({} companies)",
12957                self.config.companies.len()
12958            );
12959
12960            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12961                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12962
12963            // Map CompanyConfig → Company objects
12964            let parent_code = &self.config.companies[0].code;
12965            let mut companies: Vec<datasynth_core::models::Company> =
12966                Vec::with_capacity(self.config.companies.len());
12967
12968            // First company is the parent
12969            let first = &self.config.companies[0];
12970            companies.push(datasynth_core::models::Company::parent(
12971                &first.code,
12972                &first.name,
12973                &first.country,
12974                &first.currency,
12975            ));
12976
12977            // Remaining companies are subsidiaries (100% owned by parent)
12978            for cc in self.config.companies.iter().skip(1) {
12979                companies.push(datasynth_core::models::Company::subsidiary(
12980                    &cc.code,
12981                    &cc.name,
12982                    &cc.country,
12983                    &cc.currency,
12984                    parent_code,
12985                    rust_decimal::Decimal::from(100),
12986                ));
12987            }
12988
12989            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12990            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12991                self.config
12992                    .companies
12993                    .iter()
12994                    .skip(1)
12995                    .enumerate()
12996                    .map(|(i, cc)| {
12997                        let mut rel =
12998                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12999                                format!("REL{:03}", i + 1),
13000                                parent_code.clone(),
13001                                cc.code.clone(),
13002                                rust_decimal::Decimal::from(100),
13003                                start_date,
13004                            );
13005                        rel.functional_currency = cc.currency.clone();
13006                        rel
13007                    })
13008                    .collect();
13009
13010            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
13011            builder.add_companies(&companies);
13012            builder.add_ownership_relationships(&relationships);
13013
13014            // Thread IC matched-pair transaction edges into the entity graph
13015            for pair in &intercompany.matched_pairs {
13016                builder.add_intercompany_edge(
13017                    &pair.seller_company,
13018                    &pair.buyer_company,
13019                    pair.amount,
13020                    &format!("{:?}", pair.transaction_type),
13021                );
13022            }
13023
13024            let graph = builder.build();
13025            let node_count = graph.node_count();
13026            let edge_count = graph.edge_count();
13027            stats.graph_node_count += node_count;
13028            stats.graph_edge_count += edge_count;
13029
13030            // Export as PyG if configured
13031            for format in &self.config.graph_export.formats {
13032                if matches!(
13033                    format,
13034                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
13035                ) {
13036                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
13037                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
13038                        warn!("Failed to create entity graph output dir: {}", e);
13039                        continue;
13040                    }
13041                    let pyg_config = PyGExportConfig::default();
13042                    let exporter = PyGExporter::new(pyg_config);
13043                    if let Err(e) = exporter.export(&graph, &format_dir) {
13044                        warn!("Failed to export entity graph as PyG: {}", e);
13045                    } else {
13046                        info!(
13047                            "Entity relationship graph exported: {} nodes, {} edges",
13048                            node_count, edge_count
13049                        );
13050                    }
13051                }
13052            }
13053        } else {
13054            debug!(
13055                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
13056                self.config.companies.len()
13057            );
13058        }
13059    }
13060
13061    /// Export a multi-layer hypergraph for RustGraph integration.
13062    ///
13063    /// Builds a 3-layer hypergraph:
13064    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
13065    /// - Layer 2: Process Events (all process family document flows + OCPM events)
13066    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
13067    #[allow(clippy::too_many_arguments)]
13068    fn export_hypergraph(
13069        &self,
13070        coa: &Arc<ChartOfAccounts>,
13071        entries: &[JournalEntry],
13072        document_flows: &DocumentFlowSnapshot,
13073        sourcing: &SourcingSnapshot,
13074        hr: &HrSnapshot,
13075        manufacturing: &ManufacturingSnapshot,
13076        banking: &BankingSnapshot,
13077        audit: &AuditSnapshot,
13078        financial_reporting: &FinancialReportingSnapshot,
13079        ocpm: &OcpmSnapshot,
13080        compliance: &ComplianceRegulationsSnapshot,
13081        stats: &mut EnhancedGenerationStatistics,
13082    ) -> SynthResult<HypergraphExportInfo> {
13083        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
13084        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
13085        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
13086        use datasynth_graph::models::hypergraph::AggregationStrategy;
13087
13088        let hg_settings = &self.config.graph_export.hypergraph;
13089
13090        // Parse aggregation strategy from config string
13091        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
13092            "truncate" => AggregationStrategy::Truncate,
13093            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
13094            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
13095            "importance_sample" => AggregationStrategy::ImportanceSample,
13096            _ => AggregationStrategy::PoolByCounterparty,
13097        };
13098
13099        let builder_config = HypergraphConfig {
13100            max_nodes: hg_settings.max_nodes,
13101            aggregation_strategy,
13102            include_coso: hg_settings.governance_layer.include_coso,
13103            include_controls: hg_settings.governance_layer.include_controls,
13104            include_sox: hg_settings.governance_layer.include_sox,
13105            include_vendors: hg_settings.governance_layer.include_vendors,
13106            include_customers: hg_settings.governance_layer.include_customers,
13107            include_employees: hg_settings.governance_layer.include_employees,
13108            include_p2p: hg_settings.process_layer.include_p2p,
13109            include_o2c: hg_settings.process_layer.include_o2c,
13110            include_s2c: hg_settings.process_layer.include_s2c,
13111            include_h2r: hg_settings.process_layer.include_h2r,
13112            include_mfg: hg_settings.process_layer.include_mfg,
13113            include_bank: hg_settings.process_layer.include_bank,
13114            include_audit: hg_settings.process_layer.include_audit,
13115            include_r2r: hg_settings.process_layer.include_r2r,
13116            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
13117            docs_per_counterparty_threshold: hg_settings
13118                .process_layer
13119                .docs_per_counterparty_threshold,
13120            include_accounts: hg_settings.accounting_layer.include_accounts,
13121            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
13122            include_cross_layer_edges: hg_settings.cross_layer.enabled,
13123            include_compliance: self.config.compliance_regulations.enabled,
13124            include_tax: true,
13125            include_treasury: true,
13126            include_esg: true,
13127            include_project: true,
13128            include_intercompany: true,
13129            include_temporal_events: true,
13130        };
13131
13132        let mut builder = HypergraphBuilder::new(builder_config);
13133
13134        // Layer 1: Governance & Controls
13135        builder.add_coso_framework();
13136
13137        // Add controls if available (generated during JE generation)
13138        // Controls are generated per-company; we use the standard set
13139        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
13140            let controls = InternalControl::standard_controls();
13141            builder.add_controls(&controls);
13142        }
13143
13144        // Add master data
13145        builder.add_vendors(&self.master_data.vendors);
13146        builder.add_customers(&self.master_data.customers);
13147        builder.add_employees(&self.master_data.employees);
13148
13149        // Layer 2: Process Events (all process families)
13150        builder.add_p2p_documents(
13151            &document_flows.purchase_orders,
13152            &document_flows.goods_receipts,
13153            &document_flows.vendor_invoices,
13154            &document_flows.payments,
13155        );
13156        builder.add_o2c_documents(
13157            &document_flows.sales_orders,
13158            &document_flows.deliveries,
13159            &document_flows.customer_invoices,
13160        );
13161        builder.add_s2c_documents(
13162            &sourcing.sourcing_projects,
13163            &sourcing.qualifications,
13164            &sourcing.rfx_events,
13165            &sourcing.bids,
13166            &sourcing.bid_evaluations,
13167            &sourcing.contracts,
13168        );
13169        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
13170        builder.add_mfg_documents(
13171            &manufacturing.production_orders,
13172            &manufacturing.quality_inspections,
13173            &manufacturing.cycle_counts,
13174        );
13175        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
13176        builder.add_audit_documents(
13177            &audit.engagements,
13178            &audit.workpapers,
13179            &audit.findings,
13180            &audit.evidence,
13181            &audit.risk_assessments,
13182            &audit.judgments,
13183            &audit.materiality_calculations,
13184            &audit.audit_opinions,
13185            &audit.going_concern_assessments,
13186        );
13187        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
13188
13189        // OCPM events as hyperedges
13190        if let Some(ref event_log) = ocpm.event_log {
13191            builder.add_ocpm_events(event_log);
13192        }
13193
13194        // Compliance regulations as cross-layer nodes
13195        if self.config.compliance_regulations.enabled
13196            && hg_settings.governance_layer.include_controls
13197        {
13198            // Reconstruct ComplianceStandard objects from the registry
13199            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13200            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
13201                .standard_records
13202                .iter()
13203                .filter_map(|r| {
13204                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
13205                    registry.get(&sid).cloned()
13206                })
13207                .collect();
13208
13209            builder.add_compliance_regulations(
13210                &standards,
13211                &compliance.findings,
13212                &compliance.filings,
13213            );
13214        }
13215
13216        // Layer 3: Accounting Network
13217        builder.add_accounts(coa);
13218        builder.add_journal_entries_as_hyperedges(entries);
13219
13220        // Build the hypergraph
13221        let hypergraph = builder.build();
13222
13223        // Export
13224        let output_dir = self
13225            .output_path
13226            .clone()
13227            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13228        let hg_dir = output_dir
13229            .join(&self.config.graph_export.output_subdirectory)
13230            .join(&hg_settings.output_subdirectory);
13231
13232        // Branch on output format
13233        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
13234            "unified" => {
13235                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13236                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13237                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
13238                })?;
13239                (
13240                    metadata.num_nodes,
13241                    metadata.num_edges,
13242                    metadata.num_hyperedges,
13243                )
13244            }
13245            _ => {
13246                // "native" or any unrecognized format → use existing exporter
13247                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
13248                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
13249                    SynthError::generation(format!("Hypergraph export failed: {e}"))
13250                })?;
13251                (
13252                    metadata.num_nodes,
13253                    metadata.num_edges,
13254                    metadata.num_hyperedges,
13255                )
13256            }
13257        };
13258
13259        // Stream to RustGraph ingest endpoint if configured
13260        #[cfg(feature = "streaming")]
13261        if let Some(ref target_url) = hg_settings.stream_target {
13262            use crate::stream_client::{StreamClient, StreamConfig};
13263            use std::io::Write as _;
13264
13265            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
13266            let stream_config = StreamConfig {
13267                target_url: target_url.clone(),
13268                batch_size: hg_settings.stream_batch_size,
13269                api_key,
13270                ..StreamConfig::default()
13271            };
13272
13273            match StreamClient::new(stream_config) {
13274                Ok(mut client) => {
13275                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
13276                    match exporter.export_to_writer(&hypergraph, &mut client) {
13277                        Ok(_) => {
13278                            if let Err(e) = client.flush() {
13279                                warn!("Failed to flush stream client: {}", e);
13280                            } else {
13281                                info!("Streamed {} records to {}", client.total_sent(), target_url);
13282                            }
13283                        }
13284                        Err(e) => {
13285                            warn!("Streaming export failed: {}", e);
13286                        }
13287                    }
13288                }
13289                Err(e) => {
13290                    warn!("Failed to create stream client: {}", e);
13291                }
13292            }
13293        }
13294
13295        // Update stats
13296        stats.graph_node_count += num_nodes;
13297        stats.graph_edge_count += num_edges;
13298        stats.graph_export_count += 1;
13299
13300        Ok(HypergraphExportInfo {
13301            node_count: num_nodes,
13302            edge_count: num_edges,
13303            hyperedge_count: num_hyperedges,
13304            output_path: hg_dir,
13305        })
13306    }
13307
13308    /// Generate banking KYC/AML data.
13309    ///
13310    /// Creates banking customers, accounts, and transactions with AML typology injection.
13311    /// Uses the BankingOrchestrator from synth-banking crate.
13312    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
13313        let pb = self.create_progress_bar(100, "Generating Banking Data");
13314
13315        // Build the banking orchestrator from config
13316        let orchestrator = BankingOrchestratorBuilder::new()
13317            .config(self.config.banking.clone())
13318            .seed(self.seed + 9000)
13319            .country_pack(self.primary_pack().clone())
13320            .build();
13321
13322        if let Some(pb) = &pb {
13323            pb.inc(10);
13324        }
13325
13326        // Generate the banking data
13327        let result = orchestrator.generate();
13328
13329        if let Some(pb) = &pb {
13330            pb.inc(90);
13331            pb.finish_with_message(format!(
13332                "Banking: {} customers, {} transactions",
13333                result.customers.len(),
13334                result.transactions.len()
13335            ));
13336        }
13337
13338        // Cross-reference banking customers with core master data so that
13339        // banking customer names align with the enterprise customer list.
13340        // We rotate through core customers, overlaying their name and country
13341        // onto the generated banking customers where possible.
13342        let mut banking_customers = result.customers;
13343        let core_customers = &self.master_data.customers;
13344        if !core_customers.is_empty() {
13345            for (i, bc) in banking_customers.iter_mut().enumerate() {
13346                let core = &core_customers[i % core_customers.len()];
13347                bc.name = CustomerName::business(&core.name);
13348                bc.residence_country = core.country.clone();
13349                bc.enterprise_customer_id = Some(core.customer_id.clone());
13350            }
13351            debug!(
13352                "Cross-referenced {} banking customers with {} core customers",
13353                banking_customers.len(),
13354                core_customers.len()
13355            );
13356        }
13357
13358        Ok(BankingSnapshot {
13359            customers: banking_customers,
13360            accounts: result.accounts,
13361            transactions: result.transactions,
13362            transaction_labels: result.transaction_labels,
13363            customer_labels: result.customer_labels,
13364            account_labels: result.account_labels,
13365            relationship_labels: result.relationship_labels,
13366            narratives: result.narratives,
13367            suspicious_count: result.stats.suspicious_count,
13368            scenario_count: result.scenarios.len(),
13369        })
13370    }
13371
13372    /// Calculate total transactions to generate.
13373    fn calculate_total_transactions(&self) -> u64 {
13374        let months = self.config.global.period_months as f64;
13375        self.config
13376            .companies
13377            .iter()
13378            .map(|c| {
13379                let annual = c.annual_transaction_volume.count() as f64;
13380                let weighted = annual * c.volume_weight;
13381                (weighted * months / 12.0) as u64
13382            })
13383            .sum()
13384    }
13385
13386    /// Create a progress bar if progress display is enabled.
13387    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
13388        if !self.phase_config.show_progress {
13389            return None;
13390        }
13391
13392        let pb = if let Some(mp) = &self.multi_progress {
13393            mp.add(ProgressBar::new(total))
13394        } else {
13395            ProgressBar::new(total)
13396        };
13397
13398        pb.set_style(
13399            ProgressStyle::default_bar()
13400                .template(&format!(
13401                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
13402                ))
13403                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
13404                .progress_chars("#>-"),
13405        );
13406
13407        Some(pb)
13408    }
13409
13410    /// Get the generated chart of accounts.
13411    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
13412        self.coa.clone()
13413    }
13414
13415    /// Get the generated master data.
13416    pub fn get_master_data(&self) -> &MasterDataSnapshot {
13417        &self.master_data
13418    }
13419
13420    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
13421    fn phase_compliance_regulations(
13422        &mut self,
13423        _stats: &mut EnhancedGenerationStatistics,
13424    ) -> SynthResult<ComplianceRegulationsSnapshot> {
13425        if !self.phase_config.generate_compliance_regulations {
13426            return Ok(ComplianceRegulationsSnapshot::default());
13427        }
13428
13429        info!("Phase: Generating Compliance Regulations Data");
13430
13431        let cr_config = &self.config.compliance_regulations;
13432
13433        // Determine jurisdictions: from config or inferred from companies
13434        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13435            self.config
13436                .companies
13437                .iter()
13438                .map(|c| c.country.clone())
13439                .collect::<std::collections::HashSet<_>>()
13440                .into_iter()
13441                .collect()
13442        } else {
13443            cr_config.jurisdictions.clone()
13444        };
13445
13446        // Determine reference date
13447        let fallback_date =
13448            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13449        let reference_date = cr_config
13450            .reference_date
13451            .as_ref()
13452            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13453            .unwrap_or_else(|| {
13454                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13455                    .unwrap_or(fallback_date)
13456            });
13457
13458        // Generate standards registry data
13459        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13460        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13461        let cross_reference_records = reg_gen.generate_cross_reference_records();
13462        let jurisdiction_records =
13463            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13464
13465        info!(
13466            "  Standards: {} records, {} cross-references, {} jurisdictions",
13467            standard_records.len(),
13468            cross_reference_records.len(),
13469            jurisdiction_records.len()
13470        );
13471
13472        // Generate audit procedures (if enabled)
13473        let audit_procedures = if cr_config.audit_procedures.enabled {
13474            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13475                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13476                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13477                confidence_level: cr_config.audit_procedures.confidence_level,
13478                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13479            };
13480            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13481                self.seed + 9000,
13482                proc_config,
13483            );
13484            let registry = reg_gen.registry();
13485            let mut all_procs = Vec::new();
13486            for jurisdiction in &jurisdictions {
13487                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13488                all_procs.extend(procs);
13489            }
13490            info!("  Audit procedures: {}", all_procs.len());
13491            all_procs
13492        } else {
13493            Vec::new()
13494        };
13495
13496        // Generate compliance findings (if enabled)
13497        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13498            let finding_config =
13499                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13500                    finding_rate: cr_config.findings.finding_rate,
13501                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13502                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13503                    generate_remediation: cr_config.findings.generate_remediation,
13504                };
13505            let mut finding_gen =
13506                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13507                    self.seed + 9100,
13508                    finding_config,
13509                );
13510            let mut all_findings = Vec::new();
13511            for company in &self.config.companies {
13512                let company_findings =
13513                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13514                all_findings.extend(company_findings);
13515            }
13516            info!("  Compliance findings: {}", all_findings.len());
13517            all_findings
13518        } else {
13519            Vec::new()
13520        };
13521
13522        // Generate regulatory filings (if enabled)
13523        let filings = if cr_config.filings.enabled {
13524            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13525                filing_types: cr_config.filings.filing_types.clone(),
13526                generate_status_progression: cr_config.filings.generate_status_progression,
13527            };
13528            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13529                self.seed + 9200,
13530                filing_config,
13531            );
13532            let company_codes: Vec<String> = self
13533                .config
13534                .companies
13535                .iter()
13536                .map(|c| c.code.clone())
13537                .collect();
13538            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13539                .unwrap_or(fallback_date);
13540            let filings = filing_gen.generate_filings(
13541                &company_codes,
13542                &jurisdictions,
13543                start_date,
13544                self.config.global.period_months,
13545            );
13546            info!("  Regulatory filings: {}", filings.len());
13547            filings
13548        } else {
13549            Vec::new()
13550        };
13551
13552        // Build compliance graph (if enabled)
13553        let compliance_graph = if cr_config.graph.enabled {
13554            let graph_config = datasynth_graph::ComplianceGraphConfig {
13555                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13556                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13557                include_cross_references: cr_config.graph.include_cross_references,
13558                include_supersession_edges: cr_config.graph.include_supersession_edges,
13559                include_account_links: cr_config.graph.include_account_links,
13560                include_control_links: cr_config.graph.include_control_links,
13561                include_company_links: cr_config.graph.include_company_links,
13562            };
13563            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13564
13565            // Add standard nodes
13566            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13567                .iter()
13568                .map(|r| datasynth_graph::StandardNodeInput {
13569                    standard_id: r.standard_id.clone(),
13570                    title: r.title.clone(),
13571                    category: r.category.clone(),
13572                    domain: r.domain.clone(),
13573                    is_active: r.is_active,
13574                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13575                    applicable_account_types: r.applicable_account_types.clone(),
13576                    applicable_processes: r.applicable_processes.clone(),
13577                })
13578                .collect();
13579            builder.add_standards(&standard_inputs);
13580
13581            // Add jurisdiction nodes
13582            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13583                jurisdiction_records
13584                    .iter()
13585                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13586                        country_code: r.country_code.clone(),
13587                        country_name: r.country_name.clone(),
13588                        framework: r.accounting_framework.clone(),
13589                        standard_count: r.standard_count,
13590                        tax_rate: r.statutory_tax_rate,
13591                    })
13592                    .collect();
13593            builder.add_jurisdictions(&jurisdiction_inputs);
13594
13595            // Add cross-reference edges
13596            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13597                cross_reference_records
13598                    .iter()
13599                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13600                        from_standard: r.from_standard.clone(),
13601                        to_standard: r.to_standard.clone(),
13602                        relationship: r.relationship.clone(),
13603                        convergence_level: r.convergence_level,
13604                    })
13605                    .collect();
13606            builder.add_cross_references(&xref_inputs);
13607
13608            // Add jurisdiction→standard mappings
13609            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13610                .iter()
13611                .map(|r| datasynth_graph::JurisdictionMappingInput {
13612                    country_code: r.jurisdiction.clone(),
13613                    standard_id: r.standard_id.clone(),
13614                })
13615                .collect();
13616            builder.add_jurisdiction_mappings(&mapping_inputs);
13617
13618            // Add procedure nodes
13619            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13620                .iter()
13621                .map(|p| datasynth_graph::ProcedureNodeInput {
13622                    procedure_id: p.procedure_id.clone(),
13623                    standard_id: p.standard_id.clone(),
13624                    procedure_type: p.procedure_type.clone(),
13625                    sample_size: p.sample_size,
13626                    confidence_level: p.confidence_level,
13627                })
13628                .collect();
13629            builder.add_procedures(&proc_inputs);
13630
13631            // Add finding nodes
13632            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13633                .iter()
13634                .map(|f| datasynth_graph::FindingNodeInput {
13635                    finding_id: f.finding_id.to_string(),
13636                    standard_id: f
13637                        .related_standards
13638                        .first()
13639                        .map(|s| s.as_str().to_string())
13640                        .unwrap_or_default(),
13641                    severity: f.severity.to_string(),
13642                    deficiency_level: f.deficiency_level.to_string(),
13643                    severity_score: f.deficiency_level.severity_score(),
13644                    control_id: f.control_id.clone(),
13645                    affected_accounts: f.affected_accounts.clone(),
13646                })
13647                .collect();
13648            builder.add_findings(&finding_inputs);
13649
13650            // Cross-domain: link standards to accounts from chart of accounts
13651            if cr_config.graph.include_account_links {
13652                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13653                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13654                for std_record in &standard_records {
13655                    if let Some(std_obj) =
13656                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13657                            &std_record.standard_id,
13658                        ))
13659                    {
13660                        for acct_type in &std_obj.applicable_account_types {
13661                            account_links.push(datasynth_graph::AccountLinkInput {
13662                                standard_id: std_record.standard_id.clone(),
13663                                account_code: acct_type.clone(),
13664                                account_name: acct_type.clone(),
13665                            });
13666                        }
13667                    }
13668                }
13669                builder.add_account_links(&account_links);
13670            }
13671
13672            // Cross-domain: link standards to internal controls
13673            if cr_config.graph.include_control_links {
13674                let mut control_links = Vec::new();
13675                // SOX/PCAOB standards link to all controls
13676                let sox_like_ids: Vec<String> = standard_records
13677                    .iter()
13678                    .filter(|r| {
13679                        r.standard_id.starts_with("SOX")
13680                            || r.standard_id.starts_with("PCAOB-AS-2201")
13681                    })
13682                    .map(|r| r.standard_id.clone())
13683                    .collect();
13684                // Get control IDs from config (C001-C060 standard controls)
13685                let control_ids = [
13686                    ("C001", "Cash Controls"),
13687                    ("C002", "Large Transaction Approval"),
13688                    ("C010", "PO Approval"),
13689                    ("C011", "Three-Way Match"),
13690                    ("C020", "Revenue Recognition"),
13691                    ("C021", "Credit Check"),
13692                    ("C030", "Manual JE Approval"),
13693                    ("C031", "Period Close Review"),
13694                    ("C032", "Account Reconciliation"),
13695                    ("C040", "Payroll Processing"),
13696                    ("C050", "Fixed Asset Capitalization"),
13697                    ("C060", "Intercompany Elimination"),
13698                ];
13699                for sox_id in &sox_like_ids {
13700                    for (ctrl_id, ctrl_name) in &control_ids {
13701                        control_links.push(datasynth_graph::ControlLinkInput {
13702                            standard_id: sox_id.clone(),
13703                            control_id: ctrl_id.to_string(),
13704                            control_name: ctrl_name.to_string(),
13705                        });
13706                    }
13707                }
13708                builder.add_control_links(&control_links);
13709            }
13710
13711            // Cross-domain: filing nodes with company links
13712            if cr_config.graph.include_company_links {
13713                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13714                    .iter()
13715                    .enumerate()
13716                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13717                        filing_id: format!("F{:04}", i + 1),
13718                        filing_type: f.filing_type.to_string(),
13719                        company_code: f.company_code.clone(),
13720                        jurisdiction: f.jurisdiction.clone(),
13721                        status: format!("{:?}", f.status),
13722                    })
13723                    .collect();
13724                builder.add_filings(&filing_inputs);
13725            }
13726
13727            let graph = builder.build();
13728            info!(
13729                "  Compliance graph: {} nodes, {} edges",
13730                graph.nodes.len(),
13731                graph.edges.len()
13732            );
13733            Some(graph)
13734        } else {
13735            None
13736        };
13737
13738        self.check_resources_with_log("post-compliance-regulations")?;
13739
13740        Ok(ComplianceRegulationsSnapshot {
13741            standard_records,
13742            cross_reference_records,
13743            jurisdiction_records,
13744            audit_procedures,
13745            findings,
13746            filings,
13747            compliance_graph,
13748        })
13749    }
13750
13751    /// Build a lineage graph describing config → phase → output relationships.
13752    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13753        use super::lineage::LineageGraphBuilder;
13754
13755        let mut builder = LineageGraphBuilder::new();
13756
13757        // Config sections
13758        builder.add_config_section("config:global", "Global Config");
13759        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13760        builder.add_config_section("config:transactions", "Transaction Config");
13761
13762        // Generator phases
13763        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13764        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13765
13766        // Config → phase edges
13767        builder.configured_by("phase:coa", "config:chart_of_accounts");
13768        builder.configured_by("phase:je", "config:transactions");
13769
13770        // Output files
13771        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13772        builder.produced_by("output:je", "phase:je");
13773
13774        // Optional phases based on config
13775        if self.phase_config.generate_master_data {
13776            builder.add_config_section("config:master_data", "Master Data Config");
13777            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13778            builder.configured_by("phase:master_data", "config:master_data");
13779            builder.input_to("phase:master_data", "phase:je");
13780        }
13781
13782        if self.phase_config.generate_document_flows {
13783            builder.add_config_section("config:document_flows", "Document Flow Config");
13784            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13785            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13786            builder.configured_by("phase:p2p", "config:document_flows");
13787            builder.configured_by("phase:o2c", "config:document_flows");
13788
13789            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13790            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13791            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13792            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13793            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13794
13795            builder.produced_by("output:po", "phase:p2p");
13796            builder.produced_by("output:gr", "phase:p2p");
13797            builder.produced_by("output:vi", "phase:p2p");
13798            builder.produced_by("output:so", "phase:o2c");
13799            builder.produced_by("output:ci", "phase:o2c");
13800        }
13801
13802        if self.phase_config.inject_anomalies {
13803            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13804            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13805            builder.configured_by("phase:anomaly", "config:fraud");
13806            builder.add_output_file(
13807                "output:labels",
13808                "Anomaly Labels",
13809                "labels/anomaly_labels.csv",
13810            );
13811            builder.produced_by("output:labels", "phase:anomaly");
13812        }
13813
13814        if self.phase_config.generate_audit {
13815            builder.add_config_section("config:audit", "Audit Config");
13816            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13817            builder.configured_by("phase:audit", "config:audit");
13818        }
13819
13820        if self.phase_config.generate_banking {
13821            builder.add_config_section("config:banking", "Banking Config");
13822            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13823            builder.configured_by("phase:banking", "config:banking");
13824        }
13825
13826        if self.config.llm.enabled {
13827            builder.add_config_section("config:llm", "LLM Enrichment Config");
13828            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13829            builder.configured_by("phase:llm_enrichment", "config:llm");
13830        }
13831
13832        if self.config.diffusion.enabled {
13833            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13834            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13835            builder.configured_by("phase:diffusion", "config:diffusion");
13836        }
13837
13838        if self.config.causal.enabled {
13839            builder.add_config_section("config:causal", "Causal Generation Config");
13840            builder.add_generator_phase("phase:causal", "Causal Overlay");
13841            builder.configured_by("phase:causal", "config:causal");
13842        }
13843
13844        builder.build()
13845    }
13846
13847    // -----------------------------------------------------------------------
13848    // Trial-balance helpers used to replace hardcoded proxy values
13849    // -----------------------------------------------------------------------
13850
13851    /// Compute total revenue for a company from its journal entries.
13852    ///
13853    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13854    /// net credits on all revenue-account lines filtered to `company_code`.
13855    fn compute_company_revenue(
13856        entries: &[JournalEntry],
13857        company_code: &str,
13858    ) -> rust_decimal::Decimal {
13859        use rust_decimal::Decimal;
13860        let mut revenue = Decimal::ZERO;
13861        for je in entries {
13862            if je.header.company_code != company_code {
13863                continue;
13864            }
13865            for line in &je.lines {
13866                if line.gl_account.starts_with('4') {
13867                    // Revenue is credit-normal
13868                    revenue += line.credit_amount - line.debit_amount;
13869                }
13870            }
13871        }
13872        revenue.max(Decimal::ZERO)
13873    }
13874
13875    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13876    ///
13877    /// Asset accounts start with "1"; liability accounts start with "2".
13878    fn compute_entity_net_assets(
13879        entries: &[JournalEntry],
13880        entity_code: &str,
13881    ) -> rust_decimal::Decimal {
13882        use rust_decimal::Decimal;
13883        let mut asset_net = Decimal::ZERO;
13884        let mut liability_net = Decimal::ZERO;
13885        for je in entries {
13886            if je.header.company_code != entity_code {
13887                continue;
13888            }
13889            for line in &je.lines {
13890                if line.gl_account.starts_with('1') {
13891                    asset_net += line.debit_amount - line.credit_amount;
13892                } else if line.gl_account.starts_with('2') {
13893                    liability_net += line.credit_amount - line.debit_amount;
13894                }
13895            }
13896        }
13897        asset_net - liability_net
13898    }
13899}
13900
13901/// Get the directory name for a graph export format.
13902fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13903    match format {
13904        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13905        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13906        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13907        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13908        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13909    }
13910}
13911
13912/// Aggregate journal entry lines into per-account trial balance rows.
13913///
13914/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13915/// debit/credit totals and a net balance (debit minus credit).
13916fn compute_trial_balance_entries(
13917    entries: &[JournalEntry],
13918    entity_code: &str,
13919    fiscal_year: i32,
13920    coa: Option<&ChartOfAccounts>,
13921) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13922    use std::collections::BTreeMap;
13923
13924    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13925        BTreeMap::new();
13926
13927    for je in entries {
13928        for line in &je.lines {
13929            let entry = balances.entry(line.account_code.clone()).or_default();
13930            entry.0 += line.debit_amount;
13931            entry.1 += line.credit_amount;
13932        }
13933    }
13934
13935    balances
13936        .into_iter()
13937        .map(
13938            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13939                account_description: coa
13940                    .and_then(|c| c.get_account(&account_code))
13941                    .map(|a| a.description().to_string())
13942                    .unwrap_or_else(|| account_code.clone()),
13943                account_code,
13944                debit_balance: debit,
13945                credit_balance: credit,
13946                net_balance: debit - credit,
13947                entity_code: entity_code.to_string(),
13948                period: format!("FY{}", fiscal_year),
13949            },
13950        )
13951        .collect()
13952}
13953
13954#[cfg(test)]
13955#[allow(clippy::unwrap_used)]
13956mod tests {
13957    use super::*;
13958    use datasynth_config::schema::*;
13959
13960    fn create_test_config() -> GeneratorConfig {
13961        GeneratorConfig {
13962            global: GlobalConfig {
13963                industry: IndustrySector::Manufacturing,
13964                start_date: "2024-01-01".to_string(),
13965                period_months: 1,
13966                seed: Some(42),
13967                parallel: false,
13968                group_currency: "USD".to_string(),
13969                presentation_currency: None,
13970                worker_threads: 0,
13971                memory_limit_mb: 0,
13972                fiscal_year_months: None,
13973            },
13974            companies: vec![CompanyConfig {
13975                code: "1000".to_string(),
13976                name: "Test Company".to_string(),
13977                currency: "USD".to_string(),
13978                functional_currency: None,
13979                country: "US".to_string(),
13980                annual_transaction_volume: TransactionVolume::TenK,
13981                volume_weight: 1.0,
13982                fiscal_year_variant: "K4".to_string(),
13983            }],
13984            chart_of_accounts: ChartOfAccountsConfig {
13985                complexity: CoAComplexity::Small,
13986                industry_specific: true,
13987                custom_accounts: None,
13988                min_hierarchy_depth: 2,
13989                max_hierarchy_depth: 4,
13990            },
13991            transactions: TransactionConfig::default(),
13992            output: OutputConfig::default(),
13993            fraud: FraudConfig::default(),
13994            internal_controls: InternalControlsConfig::default(),
13995            business_processes: BusinessProcessConfig::default(),
13996            user_personas: UserPersonaConfig::default(),
13997            templates: TemplateConfig::default(),
13998            approval: ApprovalConfig::default(),
13999            departments: DepartmentConfig::default(),
14000            master_data: MasterDataConfig::default(),
14001            document_flows: DocumentFlowConfig::default(),
14002            intercompany: IntercompanyConfig::default(),
14003            balance: BalanceConfig::default(),
14004            ocpm: OcpmConfig::default(),
14005            audit: AuditGenerationConfig::default(),
14006            banking: datasynth_banking::BankingConfig::default(),
14007            data_quality: DataQualitySchemaConfig::default(),
14008            scenario: ScenarioConfig::default(),
14009            temporal: TemporalDriftConfig::default(),
14010            graph_export: GraphExportConfig::default(),
14011            streaming: StreamingSchemaConfig::default(),
14012            rate_limit: RateLimitSchemaConfig::default(),
14013            temporal_attributes: TemporalAttributeSchemaConfig::default(),
14014            relationships: RelationshipSchemaConfig::default(),
14015            accounting_standards: AccountingStandardsConfig::default(),
14016            audit_standards: AuditStandardsConfig::default(),
14017            distributions: Default::default(),
14018            temporal_patterns: Default::default(),
14019            vendor_network: VendorNetworkSchemaConfig::default(),
14020            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
14021            relationship_strength: RelationshipStrengthSchemaConfig::default(),
14022            cross_process_links: CrossProcessLinksSchemaConfig::default(),
14023            organizational_events: OrganizationalEventsSchemaConfig::default(),
14024            behavioral_drift: BehavioralDriftSchemaConfig::default(),
14025            market_drift: MarketDriftSchemaConfig::default(),
14026            drift_labeling: DriftLabelingSchemaConfig::default(),
14027            anomaly_injection: Default::default(),
14028            industry_specific: Default::default(),
14029            fingerprint_privacy: Default::default(),
14030            quality_gates: Default::default(),
14031            compliance: Default::default(),
14032            webhooks: Default::default(),
14033            llm: Default::default(),
14034            diffusion: Default::default(),
14035            causal: Default::default(),
14036            source_to_pay: Default::default(),
14037            financial_reporting: Default::default(),
14038            hr: Default::default(),
14039            manufacturing: Default::default(),
14040            sales_quotes: Default::default(),
14041            tax: Default::default(),
14042            treasury: Default::default(),
14043            project_accounting: Default::default(),
14044            esg: Default::default(),
14045            country_packs: None,
14046            scenarios: Default::default(),
14047            session: Default::default(),
14048            compliance_regulations: Default::default(),
14049        }
14050    }
14051
14052    #[test]
14053    fn test_enhanced_orchestrator_creation() {
14054        let config = create_test_config();
14055        let orchestrator = EnhancedOrchestrator::with_defaults(config);
14056        assert!(orchestrator.is_ok());
14057    }
14058
14059    #[test]
14060    fn test_minimal_generation() {
14061        let config = create_test_config();
14062        let phase_config = PhaseConfig {
14063            generate_master_data: false,
14064            generate_document_flows: false,
14065            generate_journal_entries: true,
14066            inject_anomalies: false,
14067            show_progress: false,
14068            ..Default::default()
14069        };
14070
14071        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14072        let result = orchestrator.generate();
14073
14074        assert!(result.is_ok());
14075        let result = result.unwrap();
14076        assert!(!result.journal_entries.is_empty());
14077    }
14078
14079    #[test]
14080    fn test_master_data_generation() {
14081        let config = create_test_config();
14082        let phase_config = PhaseConfig {
14083            generate_master_data: true,
14084            generate_document_flows: false,
14085            generate_journal_entries: false,
14086            inject_anomalies: false,
14087            show_progress: false,
14088            vendors_per_company: 5,
14089            customers_per_company: 5,
14090            materials_per_company: 10,
14091            assets_per_company: 5,
14092            employees_per_company: 10,
14093            ..Default::default()
14094        };
14095
14096        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14097        let result = orchestrator.generate().unwrap();
14098
14099        assert!(!result.master_data.vendors.is_empty());
14100        assert!(!result.master_data.customers.is_empty());
14101        assert!(!result.master_data.materials.is_empty());
14102    }
14103
14104    #[test]
14105    fn test_document_flow_generation() {
14106        let config = create_test_config();
14107        let phase_config = PhaseConfig {
14108            generate_master_data: true,
14109            generate_document_flows: true,
14110            generate_journal_entries: false,
14111            inject_anomalies: false,
14112            inject_data_quality: false,
14113            validate_balances: false,
14114            generate_ocpm_events: false,
14115            show_progress: false,
14116            vendors_per_company: 5,
14117            customers_per_company: 5,
14118            materials_per_company: 10,
14119            assets_per_company: 5,
14120            employees_per_company: 10,
14121            p2p_chains: 5,
14122            o2c_chains: 5,
14123            ..Default::default()
14124        };
14125
14126        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14127        let result = orchestrator.generate().unwrap();
14128
14129        // Should have generated P2P and O2C chains
14130        assert!(!result.document_flows.p2p_chains.is_empty());
14131        assert!(!result.document_flows.o2c_chains.is_empty());
14132
14133        // Flattened documents should be populated
14134        assert!(!result.document_flows.purchase_orders.is_empty());
14135        assert!(!result.document_flows.sales_orders.is_empty());
14136    }
14137
14138    #[test]
14139    fn test_anomaly_injection() {
14140        let config = create_test_config();
14141        let phase_config = PhaseConfig {
14142            generate_master_data: false,
14143            generate_document_flows: false,
14144            generate_journal_entries: true,
14145            inject_anomalies: true,
14146            show_progress: false,
14147            ..Default::default()
14148        };
14149
14150        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14151        let result = orchestrator.generate().unwrap();
14152
14153        // Should have journal entries
14154        assert!(!result.journal_entries.is_empty());
14155
14156        // With ~833 entries and 2% rate, expect some anomalies
14157        // Note: This is probabilistic, so we just verify the structure exists
14158        assert!(result.anomaly_labels.summary.is_some());
14159    }
14160
14161    #[test]
14162    fn test_full_generation_pipeline() {
14163        let config = create_test_config();
14164        let phase_config = PhaseConfig {
14165            generate_master_data: true,
14166            generate_document_flows: true,
14167            generate_journal_entries: true,
14168            inject_anomalies: false,
14169            inject_data_quality: false,
14170            validate_balances: true,
14171            generate_ocpm_events: false,
14172            show_progress: false,
14173            vendors_per_company: 3,
14174            customers_per_company: 3,
14175            materials_per_company: 5,
14176            assets_per_company: 3,
14177            employees_per_company: 5,
14178            p2p_chains: 3,
14179            o2c_chains: 3,
14180            ..Default::default()
14181        };
14182
14183        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14184        let result = orchestrator.generate().unwrap();
14185
14186        // All phases should have results
14187        assert!(!result.master_data.vendors.is_empty());
14188        assert!(!result.master_data.customers.is_empty());
14189        assert!(!result.document_flows.p2p_chains.is_empty());
14190        assert!(!result.document_flows.o2c_chains.is_empty());
14191        assert!(!result.journal_entries.is_empty());
14192        assert!(result.statistics.accounts_count > 0);
14193
14194        // Subledger linking should have run
14195        assert!(!result.subledger.ap_invoices.is_empty());
14196        assert!(!result.subledger.ar_invoices.is_empty());
14197
14198        // Balance validation should have run
14199        assert!(result.balance_validation.validated);
14200        assert!(result.balance_validation.entries_processed > 0);
14201    }
14202
14203    #[test]
14204    fn test_subledger_linking() {
14205        let config = create_test_config();
14206        let phase_config = PhaseConfig {
14207            generate_master_data: true,
14208            generate_document_flows: true,
14209            generate_journal_entries: false,
14210            inject_anomalies: false,
14211            inject_data_quality: false,
14212            validate_balances: false,
14213            generate_ocpm_events: false,
14214            show_progress: false,
14215            vendors_per_company: 5,
14216            customers_per_company: 5,
14217            materials_per_company: 10,
14218            assets_per_company: 3,
14219            employees_per_company: 5,
14220            p2p_chains: 5,
14221            o2c_chains: 5,
14222            ..Default::default()
14223        };
14224
14225        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14226        let result = orchestrator.generate().unwrap();
14227
14228        // Should have document flows
14229        assert!(!result.document_flows.vendor_invoices.is_empty());
14230        assert!(!result.document_flows.customer_invoices.is_empty());
14231
14232        // Subledger should be linked from document flows
14233        assert!(!result.subledger.ap_invoices.is_empty());
14234        assert!(!result.subledger.ar_invoices.is_empty());
14235
14236        // AP invoices count should match vendor invoices count
14237        assert_eq!(
14238            result.subledger.ap_invoices.len(),
14239            result.document_flows.vendor_invoices.len()
14240        );
14241
14242        // AR invoices count should match customer invoices count
14243        assert_eq!(
14244            result.subledger.ar_invoices.len(),
14245            result.document_flows.customer_invoices.len()
14246        );
14247
14248        // Statistics should reflect subledger counts
14249        assert_eq!(
14250            result.statistics.ap_invoice_count,
14251            result.subledger.ap_invoices.len()
14252        );
14253        assert_eq!(
14254            result.statistics.ar_invoice_count,
14255            result.subledger.ar_invoices.len()
14256        );
14257    }
14258
14259    #[test]
14260    fn test_balance_validation() {
14261        let config = create_test_config();
14262        let phase_config = PhaseConfig {
14263            generate_master_data: false,
14264            generate_document_flows: false,
14265            generate_journal_entries: true,
14266            inject_anomalies: false,
14267            validate_balances: true,
14268            show_progress: false,
14269            ..Default::default()
14270        };
14271
14272        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14273        let result = orchestrator.generate().unwrap();
14274
14275        // Balance validation should run
14276        assert!(result.balance_validation.validated);
14277        assert!(result.balance_validation.entries_processed > 0);
14278
14279        // Generated JEs should be balanced (no unbalanced entries)
14280        assert!(!result.balance_validation.has_unbalanced_entries);
14281
14282        // Total debits should equal total credits
14283        assert_eq!(
14284            result.balance_validation.total_debits,
14285            result.balance_validation.total_credits
14286        );
14287    }
14288
14289    #[test]
14290    fn test_statistics_accuracy() {
14291        let config = create_test_config();
14292        let phase_config = PhaseConfig {
14293            generate_master_data: true,
14294            generate_document_flows: false,
14295            generate_journal_entries: true,
14296            inject_anomalies: false,
14297            show_progress: false,
14298            vendors_per_company: 10,
14299            customers_per_company: 20,
14300            materials_per_company: 15,
14301            assets_per_company: 5,
14302            employees_per_company: 8,
14303            ..Default::default()
14304        };
14305
14306        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14307        let result = orchestrator.generate().unwrap();
14308
14309        // Statistics should match actual data
14310        assert_eq!(
14311            result.statistics.vendor_count,
14312            result.master_data.vendors.len()
14313        );
14314        assert_eq!(
14315            result.statistics.customer_count,
14316            result.master_data.customers.len()
14317        );
14318        assert_eq!(
14319            result.statistics.material_count,
14320            result.master_data.materials.len()
14321        );
14322        assert_eq!(
14323            result.statistics.total_entries as usize,
14324            result.journal_entries.len()
14325        );
14326    }
14327
14328    #[test]
14329    fn test_phase_config_defaults() {
14330        let config = PhaseConfig::default();
14331        assert!(config.generate_master_data);
14332        assert!(config.generate_document_flows);
14333        assert!(config.generate_journal_entries);
14334        assert!(!config.inject_anomalies);
14335        assert!(config.validate_balances);
14336        assert!(config.show_progress);
14337        assert!(config.vendors_per_company > 0);
14338        assert!(config.customers_per_company > 0);
14339    }
14340
14341    #[test]
14342    fn test_get_coa_before_generation() {
14343        let config = create_test_config();
14344        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
14345
14346        // Before generation, CoA should be None
14347        assert!(orchestrator.get_coa().is_none());
14348    }
14349
14350    #[test]
14351    fn test_get_coa_after_generation() {
14352        let config = create_test_config();
14353        let phase_config = PhaseConfig {
14354            generate_master_data: false,
14355            generate_document_flows: false,
14356            generate_journal_entries: true,
14357            inject_anomalies: false,
14358            show_progress: false,
14359            ..Default::default()
14360        };
14361
14362        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14363        let _ = orchestrator.generate().unwrap();
14364
14365        // After generation, CoA should be available
14366        assert!(orchestrator.get_coa().is_some());
14367    }
14368
14369    #[test]
14370    fn test_get_master_data() {
14371        let config = create_test_config();
14372        let phase_config = PhaseConfig {
14373            generate_master_data: true,
14374            generate_document_flows: false,
14375            generate_journal_entries: false,
14376            inject_anomalies: false,
14377            show_progress: false,
14378            vendors_per_company: 5,
14379            customers_per_company: 5,
14380            materials_per_company: 5,
14381            assets_per_company: 5,
14382            employees_per_company: 5,
14383            ..Default::default()
14384        };
14385
14386        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14387        let result = orchestrator.generate().unwrap();
14388
14389        // After generate(), master_data is moved into the result
14390        assert!(!result.master_data.vendors.is_empty());
14391    }
14392
14393    #[test]
14394    fn test_with_progress_builder() {
14395        let config = create_test_config();
14396        let orchestrator = EnhancedOrchestrator::with_defaults(config)
14397            .unwrap()
14398            .with_progress(false);
14399
14400        // Should still work without progress
14401        assert!(!orchestrator.phase_config.show_progress);
14402    }
14403
14404    #[test]
14405    fn test_multi_company_generation() {
14406        let mut config = create_test_config();
14407        config.companies.push(CompanyConfig {
14408            code: "2000".to_string(),
14409            name: "Subsidiary".to_string(),
14410            currency: "EUR".to_string(),
14411            functional_currency: None,
14412            country: "DE".to_string(),
14413            annual_transaction_volume: TransactionVolume::TenK,
14414            volume_weight: 0.5,
14415            fiscal_year_variant: "K4".to_string(),
14416        });
14417
14418        let phase_config = PhaseConfig {
14419            generate_master_data: true,
14420            generate_document_flows: false,
14421            generate_journal_entries: true,
14422            inject_anomalies: false,
14423            show_progress: false,
14424            vendors_per_company: 5,
14425            customers_per_company: 5,
14426            materials_per_company: 5,
14427            assets_per_company: 5,
14428            employees_per_company: 5,
14429            ..Default::default()
14430        };
14431
14432        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14433        let result = orchestrator.generate().unwrap();
14434
14435        // Should have master data for both companies
14436        assert!(result.statistics.vendor_count >= 10); // 5 per company
14437        assert!(result.statistics.customer_count >= 10);
14438        assert!(result.statistics.companies_count == 2);
14439    }
14440
14441    #[test]
14442    fn test_empty_master_data_skips_document_flows() {
14443        let config = create_test_config();
14444        let phase_config = PhaseConfig {
14445            generate_master_data: false,   // Skip master data
14446            generate_document_flows: true, // Try to generate flows
14447            generate_journal_entries: false,
14448            inject_anomalies: false,
14449            show_progress: false,
14450            ..Default::default()
14451        };
14452
14453        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14454        let result = orchestrator.generate().unwrap();
14455
14456        // Without master data, document flows should be empty
14457        assert!(result.document_flows.p2p_chains.is_empty());
14458        assert!(result.document_flows.o2c_chains.is_empty());
14459    }
14460
14461    #[test]
14462    fn test_journal_entry_line_item_count() {
14463        let config = create_test_config();
14464        let phase_config = PhaseConfig {
14465            generate_master_data: false,
14466            generate_document_flows: false,
14467            generate_journal_entries: true,
14468            inject_anomalies: false,
14469            show_progress: false,
14470            ..Default::default()
14471        };
14472
14473        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14474        let result = orchestrator.generate().unwrap();
14475
14476        // Total line items should match sum of all entry line counts
14477        let calculated_line_items: u64 = result
14478            .journal_entries
14479            .iter()
14480            .map(|e| e.line_count() as u64)
14481            .sum();
14482        assert_eq!(result.statistics.total_line_items, calculated_line_items);
14483    }
14484
14485    #[test]
14486    fn test_audit_generation() {
14487        let config = create_test_config();
14488        let phase_config = PhaseConfig {
14489            generate_master_data: false,
14490            generate_document_flows: false,
14491            generate_journal_entries: true,
14492            inject_anomalies: false,
14493            show_progress: false,
14494            generate_audit: true,
14495            audit_engagements: 2,
14496            workpapers_per_engagement: 5,
14497            evidence_per_workpaper: 2,
14498            risks_per_engagement: 3,
14499            findings_per_engagement: 2,
14500            judgments_per_engagement: 2,
14501            ..Default::default()
14502        };
14503
14504        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14505        let result = orchestrator.generate().unwrap();
14506
14507        // Should have generated audit data
14508        assert_eq!(result.audit.engagements.len(), 2);
14509        assert!(!result.audit.workpapers.is_empty());
14510        assert!(!result.audit.evidence.is_empty());
14511        assert!(!result.audit.risk_assessments.is_empty());
14512        assert!(!result.audit.findings.is_empty());
14513        assert!(!result.audit.judgments.is_empty());
14514
14515        // New ISA entity collections should also be populated
14516        assert!(
14517            !result.audit.confirmations.is_empty(),
14518            "ISA 505 confirmations should be generated"
14519        );
14520        assert!(
14521            !result.audit.confirmation_responses.is_empty(),
14522            "ISA 505 confirmation responses should be generated"
14523        );
14524        assert!(
14525            !result.audit.procedure_steps.is_empty(),
14526            "ISA 330 procedure steps should be generated"
14527        );
14528        // Samples may or may not be generated depending on workpaper sampling methods
14529        assert!(
14530            !result.audit.analytical_results.is_empty(),
14531            "ISA 520 analytical procedures should be generated"
14532        );
14533        assert!(
14534            !result.audit.ia_functions.is_empty(),
14535            "ISA 610 IA functions should be generated (one per engagement)"
14536        );
14537        assert!(
14538            !result.audit.related_parties.is_empty(),
14539            "ISA 550 related parties should be generated"
14540        );
14541
14542        // Statistics should match
14543        assert_eq!(
14544            result.statistics.audit_engagement_count,
14545            result.audit.engagements.len()
14546        );
14547        assert_eq!(
14548            result.statistics.audit_workpaper_count,
14549            result.audit.workpapers.len()
14550        );
14551        assert_eq!(
14552            result.statistics.audit_evidence_count,
14553            result.audit.evidence.len()
14554        );
14555        assert_eq!(
14556            result.statistics.audit_risk_count,
14557            result.audit.risk_assessments.len()
14558        );
14559        assert_eq!(
14560            result.statistics.audit_finding_count,
14561            result.audit.findings.len()
14562        );
14563        assert_eq!(
14564            result.statistics.audit_judgment_count,
14565            result.audit.judgments.len()
14566        );
14567        assert_eq!(
14568            result.statistics.audit_confirmation_count,
14569            result.audit.confirmations.len()
14570        );
14571        assert_eq!(
14572            result.statistics.audit_confirmation_response_count,
14573            result.audit.confirmation_responses.len()
14574        );
14575        assert_eq!(
14576            result.statistics.audit_procedure_step_count,
14577            result.audit.procedure_steps.len()
14578        );
14579        assert_eq!(
14580            result.statistics.audit_sample_count,
14581            result.audit.samples.len()
14582        );
14583        assert_eq!(
14584            result.statistics.audit_analytical_result_count,
14585            result.audit.analytical_results.len()
14586        );
14587        assert_eq!(
14588            result.statistics.audit_ia_function_count,
14589            result.audit.ia_functions.len()
14590        );
14591        assert_eq!(
14592            result.statistics.audit_ia_report_count,
14593            result.audit.ia_reports.len()
14594        );
14595        assert_eq!(
14596            result.statistics.audit_related_party_count,
14597            result.audit.related_parties.len()
14598        );
14599        assert_eq!(
14600            result.statistics.audit_related_party_transaction_count,
14601            result.audit.related_party_transactions.len()
14602        );
14603    }
14604
14605    #[test]
14606    fn test_new_phases_disabled_by_default() {
14607        let config = create_test_config();
14608        // Verify new config fields default to disabled
14609        assert!(!config.llm.enabled);
14610        assert!(!config.diffusion.enabled);
14611        assert!(!config.causal.enabled);
14612
14613        let phase_config = PhaseConfig {
14614            generate_master_data: false,
14615            generate_document_flows: false,
14616            generate_journal_entries: true,
14617            inject_anomalies: false,
14618            show_progress: false,
14619            ..Default::default()
14620        };
14621
14622        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14623        let result = orchestrator.generate().unwrap();
14624
14625        // All new phase statistics should be zero when disabled
14626        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14627        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14628        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14629        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14630        assert_eq!(result.statistics.causal_generation_ms, 0);
14631        assert_eq!(result.statistics.causal_samples_generated, 0);
14632        assert!(result.statistics.causal_validation_passed.is_none());
14633        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14634        assert!(result.counterfactual_pairs.is_empty());
14635    }
14636
14637    #[test]
14638    fn test_counterfactual_generation_enabled() {
14639        let config = create_test_config();
14640        let phase_config = PhaseConfig {
14641            generate_master_data: false,
14642            generate_document_flows: false,
14643            generate_journal_entries: true,
14644            inject_anomalies: false,
14645            show_progress: false,
14646            generate_counterfactuals: true,
14647            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14648            ..Default::default()
14649        };
14650
14651        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14652        let result = orchestrator.generate().unwrap();
14653
14654        // With JE generation enabled, counterfactual pairs should be generated
14655        if !result.journal_entries.is_empty() {
14656            assert_eq!(
14657                result.counterfactual_pairs.len(),
14658                result.journal_entries.len()
14659            );
14660            assert_eq!(
14661                result.statistics.counterfactual_pair_count,
14662                result.journal_entries.len()
14663            );
14664            // Each pair should have a distinct pair_id
14665            let ids: std::collections::HashSet<_> = result
14666                .counterfactual_pairs
14667                .iter()
14668                .map(|p| p.pair_id.clone())
14669                .collect();
14670            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14671        }
14672    }
14673
14674    #[test]
14675    fn test_llm_enrichment_enabled() {
14676        let mut config = create_test_config();
14677        config.llm.enabled = true;
14678        config.llm.max_vendor_enrichments = 3;
14679
14680        let phase_config = PhaseConfig {
14681            generate_master_data: true,
14682            generate_document_flows: false,
14683            generate_journal_entries: false,
14684            inject_anomalies: false,
14685            show_progress: false,
14686            vendors_per_company: 5,
14687            customers_per_company: 3,
14688            materials_per_company: 3,
14689            assets_per_company: 3,
14690            employees_per_company: 3,
14691            ..Default::default()
14692        };
14693
14694        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14695        let result = orchestrator.generate().unwrap();
14696
14697        // LLM enrichment should have run
14698        assert!(result.statistics.llm_vendors_enriched > 0);
14699        assert!(result.statistics.llm_vendors_enriched <= 3);
14700    }
14701
14702    #[test]
14703    fn test_diffusion_enhancement_enabled() {
14704        let mut config = create_test_config();
14705        config.diffusion.enabled = true;
14706        config.diffusion.n_steps = 50;
14707        config.diffusion.sample_size = 20;
14708
14709        let phase_config = PhaseConfig {
14710            generate_master_data: false,
14711            generate_document_flows: false,
14712            generate_journal_entries: true,
14713            inject_anomalies: false,
14714            show_progress: false,
14715            ..Default::default()
14716        };
14717
14718        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14719        let result = orchestrator.generate().unwrap();
14720
14721        // Diffusion phase should have generated samples
14722        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14723    }
14724
14725    #[test]
14726    fn test_causal_overlay_enabled() {
14727        let mut config = create_test_config();
14728        config.causal.enabled = true;
14729        config.causal.template = "fraud_detection".to_string();
14730        config.causal.sample_size = 100;
14731        config.causal.validate = true;
14732
14733        let phase_config = PhaseConfig {
14734            generate_master_data: false,
14735            generate_document_flows: false,
14736            generate_journal_entries: true,
14737            inject_anomalies: false,
14738            show_progress: false,
14739            ..Default::default()
14740        };
14741
14742        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14743        let result = orchestrator.generate().unwrap();
14744
14745        // Causal phase should have generated samples
14746        assert_eq!(result.statistics.causal_samples_generated, 100);
14747        // Validation should have run
14748        assert!(result.statistics.causal_validation_passed.is_some());
14749    }
14750
14751    #[test]
14752    fn test_causal_overlay_revenue_cycle_template() {
14753        let mut config = create_test_config();
14754        config.causal.enabled = true;
14755        config.causal.template = "revenue_cycle".to_string();
14756        config.causal.sample_size = 50;
14757        config.causal.validate = false;
14758
14759        let phase_config = PhaseConfig {
14760            generate_master_data: false,
14761            generate_document_flows: false,
14762            generate_journal_entries: true,
14763            inject_anomalies: false,
14764            show_progress: false,
14765            ..Default::default()
14766        };
14767
14768        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14769        let result = orchestrator.generate().unwrap();
14770
14771        // Causal phase should have generated samples
14772        assert_eq!(result.statistics.causal_samples_generated, 50);
14773        // Validation was disabled
14774        assert!(result.statistics.causal_validation_passed.is_none());
14775    }
14776
14777    #[test]
14778    fn test_all_new_phases_enabled_together() {
14779        let mut config = create_test_config();
14780        config.llm.enabled = true;
14781        config.llm.max_vendor_enrichments = 2;
14782        config.diffusion.enabled = true;
14783        config.diffusion.n_steps = 20;
14784        config.diffusion.sample_size = 10;
14785        config.causal.enabled = true;
14786        config.causal.sample_size = 50;
14787        config.causal.validate = true;
14788
14789        let phase_config = PhaseConfig {
14790            generate_master_data: true,
14791            generate_document_flows: false,
14792            generate_journal_entries: true,
14793            inject_anomalies: false,
14794            show_progress: false,
14795            vendors_per_company: 5,
14796            customers_per_company: 3,
14797            materials_per_company: 3,
14798            assets_per_company: 3,
14799            employees_per_company: 3,
14800            ..Default::default()
14801        };
14802
14803        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14804        let result = orchestrator.generate().unwrap();
14805
14806        // All three phases should have run
14807        assert!(result.statistics.llm_vendors_enriched > 0);
14808        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14809        assert_eq!(result.statistics.causal_samples_generated, 50);
14810        assert!(result.statistics.causal_validation_passed.is_some());
14811    }
14812
14813    #[test]
14814    fn test_statistics_serialization_with_new_fields() {
14815        let stats = EnhancedGenerationStatistics {
14816            total_entries: 100,
14817            total_line_items: 500,
14818            llm_enrichment_ms: 42,
14819            llm_vendors_enriched: 10,
14820            diffusion_enhancement_ms: 100,
14821            diffusion_samples_generated: 50,
14822            causal_generation_ms: 200,
14823            causal_samples_generated: 100,
14824            causal_validation_passed: Some(true),
14825            ..Default::default()
14826        };
14827
14828        let json = serde_json::to_string(&stats).unwrap();
14829        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14830
14831        assert_eq!(deserialized.llm_enrichment_ms, 42);
14832        assert_eq!(deserialized.llm_vendors_enriched, 10);
14833        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14834        assert_eq!(deserialized.diffusion_samples_generated, 50);
14835        assert_eq!(deserialized.causal_generation_ms, 200);
14836        assert_eq!(deserialized.causal_samples_generated, 100);
14837        assert_eq!(deserialized.causal_validation_passed, Some(true));
14838    }
14839
14840    #[test]
14841    fn test_statistics_backward_compat_deserialization() {
14842        // Old JSON without the new fields should still deserialize
14843        let old_json = r#"{
14844            "total_entries": 100,
14845            "total_line_items": 500,
14846            "accounts_count": 50,
14847            "companies_count": 1,
14848            "period_months": 12,
14849            "vendor_count": 10,
14850            "customer_count": 20,
14851            "material_count": 15,
14852            "asset_count": 5,
14853            "employee_count": 8,
14854            "p2p_chain_count": 5,
14855            "o2c_chain_count": 5,
14856            "ap_invoice_count": 5,
14857            "ar_invoice_count": 5,
14858            "ocpm_event_count": 0,
14859            "ocpm_object_count": 0,
14860            "ocpm_case_count": 0,
14861            "audit_engagement_count": 0,
14862            "audit_workpaper_count": 0,
14863            "audit_evidence_count": 0,
14864            "audit_risk_count": 0,
14865            "audit_finding_count": 0,
14866            "audit_judgment_count": 0,
14867            "anomalies_injected": 0,
14868            "data_quality_issues": 0,
14869            "banking_customer_count": 0,
14870            "banking_account_count": 0,
14871            "banking_transaction_count": 0,
14872            "banking_suspicious_count": 0,
14873            "graph_export_count": 0,
14874            "graph_node_count": 0,
14875            "graph_edge_count": 0
14876        }"#;
14877
14878        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14879
14880        // New fields should default to 0 / None
14881        assert_eq!(stats.llm_enrichment_ms, 0);
14882        assert_eq!(stats.llm_vendors_enriched, 0);
14883        assert_eq!(stats.diffusion_enhancement_ms, 0);
14884        assert_eq!(stats.diffusion_samples_generated, 0);
14885        assert_eq!(stats.causal_generation_ms, 0);
14886        assert_eq!(stats.causal_samples_generated, 0);
14887        assert!(stats.causal_validation_passed.is_none());
14888    }
14889}