Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Causal generation timing (milliseconds).
1264    #[serde(default)]
1265    pub causal_generation_ms: u64,
1266    /// Number of causal samples generated.
1267    #[serde(default)]
1268    pub causal_samples_generated: usize,
1269    /// Whether causal validation passed.
1270    #[serde(default)]
1271    pub causal_validation_passed: Option<bool>,
1272    /// S2C sourcing counts.
1273    #[serde(default)]
1274    pub sourcing_project_count: usize,
1275    #[serde(default)]
1276    pub rfx_event_count: usize,
1277    #[serde(default)]
1278    pub bid_count: usize,
1279    #[serde(default)]
1280    pub contract_count: usize,
1281    #[serde(default)]
1282    pub catalog_item_count: usize,
1283    #[serde(default)]
1284    pub scorecard_count: usize,
1285    /// Financial reporting counts.
1286    #[serde(default)]
1287    pub financial_statement_count: usize,
1288    #[serde(default)]
1289    pub bank_reconciliation_count: usize,
1290    /// HR counts.
1291    #[serde(default)]
1292    pub payroll_run_count: usize,
1293    #[serde(default)]
1294    pub time_entry_count: usize,
1295    #[serde(default)]
1296    pub expense_report_count: usize,
1297    #[serde(default)]
1298    pub benefit_enrollment_count: usize,
1299    #[serde(default)]
1300    pub pension_plan_count: usize,
1301    #[serde(default)]
1302    pub stock_grant_count: usize,
1303    /// Accounting standards counts.
1304    #[serde(default)]
1305    pub revenue_contract_count: usize,
1306    #[serde(default)]
1307    pub impairment_test_count: usize,
1308    #[serde(default)]
1309    pub business_combination_count: usize,
1310    #[serde(default)]
1311    pub ecl_model_count: usize,
1312    #[serde(default)]
1313    pub provision_count: usize,
1314    /// Manufacturing counts.
1315    #[serde(default)]
1316    pub production_order_count: usize,
1317    #[serde(default)]
1318    pub quality_inspection_count: usize,
1319    #[serde(default)]
1320    pub cycle_count_count: usize,
1321    #[serde(default)]
1322    pub bom_component_count: usize,
1323    #[serde(default)]
1324    pub inventory_movement_count: usize,
1325    /// Sales & reporting counts.
1326    #[serde(default)]
1327    pub sales_quote_count: usize,
1328    #[serde(default)]
1329    pub kpi_count: usize,
1330    #[serde(default)]
1331    pub budget_line_count: usize,
1332    /// Tax counts.
1333    #[serde(default)]
1334    pub tax_jurisdiction_count: usize,
1335    #[serde(default)]
1336    pub tax_code_count: usize,
1337    /// ESG counts.
1338    #[serde(default)]
1339    pub esg_emission_count: usize,
1340    #[serde(default)]
1341    pub esg_disclosure_count: usize,
1342    /// Intercompany counts.
1343    #[serde(default)]
1344    pub ic_matched_pair_count: usize,
1345    #[serde(default)]
1346    pub ic_elimination_count: usize,
1347    /// Number of intercompany journal entries (seller + buyer side).
1348    #[serde(default)]
1349    pub ic_transaction_count: usize,
1350    /// Number of fixed asset subledger records.
1351    #[serde(default)]
1352    pub fa_subledger_count: usize,
1353    /// Number of inventory subledger records.
1354    #[serde(default)]
1355    pub inventory_subledger_count: usize,
1356    /// Treasury debt instrument count.
1357    #[serde(default)]
1358    pub treasury_debt_instrument_count: usize,
1359    /// Treasury hedging instrument count.
1360    #[serde(default)]
1361    pub treasury_hedging_instrument_count: usize,
1362    /// Project accounting project count.
1363    #[serde(default)]
1364    pub project_count: usize,
1365    /// Project accounting change order count.
1366    #[serde(default)]
1367    pub project_change_order_count: usize,
1368    /// Tax provision count.
1369    #[serde(default)]
1370    pub tax_provision_count: usize,
1371    /// Opening balance count.
1372    #[serde(default)]
1373    pub opening_balance_count: usize,
1374    /// Subledger reconciliation count.
1375    #[serde(default)]
1376    pub subledger_reconciliation_count: usize,
1377    /// Tax line count.
1378    #[serde(default)]
1379    pub tax_line_count: usize,
1380    /// Project cost line count.
1381    #[serde(default)]
1382    pub project_cost_line_count: usize,
1383    /// Cash position count.
1384    #[serde(default)]
1385    pub cash_position_count: usize,
1386    /// Cash forecast count.
1387    #[serde(default)]
1388    pub cash_forecast_count: usize,
1389    /// Cash pool count.
1390    #[serde(default)]
1391    pub cash_pool_count: usize,
1392    /// Process evolution event count.
1393    #[serde(default)]
1394    pub process_evolution_event_count: usize,
1395    /// Organizational event count.
1396    #[serde(default)]
1397    pub organizational_event_count: usize,
1398    /// Counterfactual pair count.
1399    #[serde(default)]
1400    pub counterfactual_pair_count: usize,
1401    /// Number of fraud red-flag indicators generated.
1402    #[serde(default)]
1403    pub red_flag_count: usize,
1404    /// Number of collusion rings generated.
1405    #[serde(default)]
1406    pub collusion_ring_count: usize,
1407    /// Number of bi-temporal vendor version chains generated.
1408    #[serde(default)]
1409    pub temporal_version_chain_count: usize,
1410    /// Number of nodes in the entity relationship graph.
1411    #[serde(default)]
1412    pub entity_relationship_node_count: usize,
1413    /// Number of edges in the entity relationship graph.
1414    #[serde(default)]
1415    pub entity_relationship_edge_count: usize,
1416    /// Number of cross-process links generated.
1417    #[serde(default)]
1418    pub cross_process_link_count: usize,
1419    /// Number of disruption events generated.
1420    #[serde(default)]
1421    pub disruption_event_count: usize,
1422    /// Number of industry-specific GL accounts generated.
1423    #[serde(default)]
1424    pub industry_gl_account_count: usize,
1425    /// Number of period-close journal entries generated (tax provision + closing entries).
1426    #[serde(default)]
1427    pub period_close_je_count: usize,
1428}
1429
1430/// Enhanced orchestrator with full feature integration.
1431pub struct EnhancedOrchestrator {
1432    config: GeneratorConfig,
1433    phase_config: PhaseConfig,
1434    coa: Option<Arc<ChartOfAccounts>>,
1435    master_data: MasterDataSnapshot,
1436    seed: u64,
1437    multi_progress: Option<MultiProgress>,
1438    /// Resource guard for memory, disk, and CPU monitoring
1439    resource_guard: ResourceGuard,
1440    /// Output path for disk space monitoring
1441    output_path: Option<PathBuf>,
1442    /// Copula generators for preserving correlations (from fingerprint)
1443    copula_generators: Vec<CopulaGeneratorSpec>,
1444    /// Country pack registry for localized data generation
1445    country_pack_registry: datasynth_core::CountryPackRegistry,
1446    /// Optional streaming sink for phase-by-phase output
1447    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451    /// Create a new enhanced orchestrator.
1452    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453        datasynth_config::validate_config(&config)?;
1454
1455        let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457        // Build resource guard from config
1458        let resource_guard = Self::build_resource_guard(&config, None);
1459
1460        // Build country pack registry from config
1461        let country_pack_registry = match &config.country_packs {
1462            Some(cp) => {
1463                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464                    .map_err(|e| SynthError::config(e.to_string()))?
1465            }
1466            None => datasynth_core::CountryPackRegistry::builtin_only()
1467                .map_err(|e| SynthError::config(e.to_string()))?,
1468        };
1469
1470        Ok(Self {
1471            config,
1472            phase_config,
1473            coa: None,
1474            master_data: MasterDataSnapshot::default(),
1475            seed,
1476            multi_progress: None,
1477            resource_guard,
1478            output_path: None,
1479            copula_generators: Vec::new(),
1480            country_pack_registry,
1481            phase_sink: None,
1482        })
1483    }
1484
1485    /// Create with default phase config.
1486    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487        Self::new(config, PhaseConfig::default())
1488    }
1489
1490    /// Set a streaming phase sink for real-time output (builder pattern).
1491    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492        self.phase_sink = Some(sink);
1493        self
1494    }
1495
1496    /// Set a streaming phase sink on an existing orchestrator.
1497    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498        self.phase_sink = Some(sink);
1499    }
1500
1501    /// Emit a batch of items to the phase sink (if configured).
1502    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503        if let Some(ref sink) = self.phase_sink {
1504            for item in items {
1505                if let Ok(value) = serde_json::to_value(item) {
1506                    if let Err(e) = sink.emit(phase, type_name, &value) {
1507                        warn!(
1508                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509                        );
1510                    }
1511                }
1512            }
1513            if let Err(e) = sink.phase_complete(phase) {
1514                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515            }
1516        }
1517    }
1518
1519    /// Enable/disable progress bars.
1520    pub fn with_progress(mut self, show: bool) -> Self {
1521        self.phase_config.show_progress = show;
1522        if show {
1523            self.multi_progress = Some(MultiProgress::new());
1524        }
1525        self
1526    }
1527
1528    /// Set the output path for disk space monitoring.
1529    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530        let path = path.into();
1531        self.output_path = Some(path.clone());
1532        // Rebuild resource guard with the output path
1533        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534        self
1535    }
1536
1537    /// Access the country pack registry.
1538    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539        &self.country_pack_registry
1540    }
1541
1542    /// Look up a country pack by country code string.
1543    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544        self.country_pack_registry.get_by_str(country)
1545    }
1546
1547    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1548    /// company, defaulting to `"US"` if no companies are configured.
1549    fn primary_country_code(&self) -> &str {
1550        self.config
1551            .companies
1552            .first()
1553            .map(|c| c.country.as_str())
1554            .unwrap_or("US")
1555    }
1556
1557    /// Resolve the country pack for the primary (first) company.
1558    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559        self.country_pack_for(self.primary_country_code())
1560    }
1561
1562    /// Resolve the CoA framework from config/country-pack.
1563    fn resolve_coa_framework(&self) -> CoAFramework {
1564        if self.config.accounting_standards.enabled {
1565            match self.config.accounting_standards.framework {
1566                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567                    return CoAFramework::FrenchPcg;
1568                }
1569                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570                    return CoAFramework::GermanSkr04;
1571                }
1572                _ => {}
1573            }
1574        }
1575        // Fallback: derive from country pack
1576        let pack = self.primary_pack();
1577        match pack.accounting.framework.as_str() {
1578            "french_gaap" => CoAFramework::FrenchPcg,
1579            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580            _ => CoAFramework::UsGaap,
1581        }
1582    }
1583
1584    /// Check if copula generators are available.
1585    ///
1586    /// Returns true if the orchestrator has copula generators for preserving
1587    /// correlations (typically from fingerprint-based generation).
1588    pub fn has_copulas(&self) -> bool {
1589        !self.copula_generators.is_empty()
1590    }
1591
1592    /// Get the copula generators.
1593    ///
1594    /// Returns a reference to the copula generators for use during generation.
1595    /// These can be used to generate correlated samples that preserve the
1596    /// statistical relationships from the source data.
1597    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598        &self.copula_generators
1599    }
1600
1601    /// Get a mutable reference to the copula generators.
1602    ///
1603    /// Allows generators to sample from copulas during data generation.
1604    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605        &mut self.copula_generators
1606    }
1607
1608    /// Sample correlated values from a named copula.
1609    ///
1610    /// Returns None if the copula doesn't exist.
1611    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612        self.copula_generators
1613            .iter_mut()
1614            .find(|c| c.name == copula_name)
1615            .map(|c| c.generator.sample())
1616    }
1617
1618    /// Create an orchestrator from a fingerprint file.
1619    ///
1620    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1621    /// and creates an orchestrator configured to generate data matching
1622    /// the statistical properties of the original data.
1623    ///
1624    /// # Arguments
1625    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1626    /// * `phase_config` - Phase configuration for generation
1627    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1628    ///
1629    /// # Example
1630    /// ```no_run
1631    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1632    /// use std::path::Path;
1633    ///
1634    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1635    ///     Path::new("fingerprint.dsf"),
1636    ///     PhaseConfig::default(),
1637    ///     1.0,
1638    /// ).unwrap();
1639    /// ```
1640    pub fn from_fingerprint(
1641        fingerprint_path: &std::path::Path,
1642        phase_config: PhaseConfig,
1643        scale: f64,
1644    ) -> SynthResult<Self> {
1645        info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647        // Read the fingerprint
1648        let reader = FingerprintReader::new();
1649        let fingerprint = reader
1650            .read_from_file(fingerprint_path)
1651            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654    }
1655
1656    /// Create an orchestrator from a loaded fingerprint.
1657    ///
1658    /// # Arguments
1659    /// * `fingerprint` - The loaded fingerprint
1660    /// * `phase_config` - Phase configuration for generation
1661    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1662    pub fn from_fingerprint_data(
1663        fingerprint: Fingerprint,
1664        phase_config: PhaseConfig,
1665        scale: f64,
1666    ) -> SynthResult<Self> {
1667        info!(
1668            "Synthesizing config from fingerprint (version: {}, tables: {})",
1669            fingerprint.manifest.version,
1670            fingerprint.schema.tables.len()
1671        );
1672
1673        // Generate a seed for the synthesis
1674        let seed: u64 = rand::random();
1675        info!("Fingerprint synthesis seed: {}", seed);
1676
1677        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1678        let options = SynthesisOptions {
1679            scale,
1680            seed: Some(seed),
1681            preserve_correlations: true,
1682            inject_anomalies: true,
1683        };
1684        let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686        // Synthesize full result including copula generators
1687        let synthesis_result = synthesizer
1688            .synthesize_full(&fingerprint, seed)
1689            .map_err(|e| {
1690                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691            })?;
1692
1693        // Start with a base config from the fingerprint's industry if available
1694        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695            Self::base_config_for_industry(industry)
1696        } else {
1697            Self::base_config_for_industry("manufacturing")
1698        };
1699
1700        // Apply the synthesized patches
1701        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703        // Log synthesis results
1704        info!(
1705            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706            fingerprint.schema.tables.len(),
1707            scale,
1708            synthesis_result.copula_generators.len()
1709        );
1710
1711        if !synthesis_result.copula_generators.is_empty() {
1712            for spec in &synthesis_result.copula_generators {
1713                info!(
1714                    "  Copula '{}' for table '{}': {} columns",
1715                    spec.name,
1716                    spec.table,
1717                    spec.columns.len()
1718                );
1719            }
1720        }
1721
1722        // Create the orchestrator with the synthesized config
1723        let mut orchestrator = Self::new(config, phase_config)?;
1724
1725        // Store copula generators for use during generation
1726        orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728        Ok(orchestrator)
1729    }
1730
1731    /// Create a base config for a given industry.
1732    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733        use datasynth_config::presets::create_preset;
1734        use datasynth_config::TransactionVolume;
1735        use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737        let sector = match industry.to_lowercase().as_str() {
1738            "manufacturing" => IndustrySector::Manufacturing,
1739            "retail" => IndustrySector::Retail,
1740            "financial" | "financial_services" => IndustrySector::FinancialServices,
1741            "healthcare" => IndustrySector::Healthcare,
1742            "technology" | "tech" => IndustrySector::Technology,
1743            _ => IndustrySector::Manufacturing,
1744        };
1745
1746        // Create a preset with reasonable defaults
1747        create_preset(
1748            sector,
1749            1,  // company count
1750            12, // period months
1751            CoAComplexity::Medium,
1752            TransactionVolume::TenK,
1753        )
1754    }
1755
1756    /// Apply a config patch to a GeneratorConfig.
1757    fn apply_config_patch(
1758        mut config: GeneratorConfig,
1759        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760    ) -> GeneratorConfig {
1761        use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763        for (key, value) in patch.values() {
1764            match (key.as_str(), value) {
1765                // Transaction count is handled via TransactionVolume enum on companies
1766                // Log it but cannot directly set it (would need to modify company volumes)
1767                ("transactions.count", ConfigValue::Integer(n)) => {
1768                    info!(
1769                        "Fingerprint suggests {} transactions (apply via company volumes)",
1770                        n
1771                    );
1772                }
1773                ("global.period_months", ConfigValue::Integer(n)) => {
1774                    config.global.period_months = (*n).clamp(1, 120) as u32;
1775                }
1776                ("global.start_date", ConfigValue::String(s)) => {
1777                    config.global.start_date = s.clone();
1778                }
1779                ("global.seed", ConfigValue::Integer(n)) => {
1780                    config.global.seed = Some(*n as u64);
1781                }
1782                ("fraud.enabled", ConfigValue::Bool(b)) => {
1783                    config.fraud.enabled = *b;
1784                }
1785                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786                    config.fraud.fraud_rate = *f;
1787                }
1788                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789                    config.data_quality.enabled = *b;
1790                }
1791                // Handle anomaly injection paths (mapped to fraud config)
1792                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                _ => {
1799                    debug!("Ignoring unknown config patch key: {}", key);
1800                }
1801            }
1802        }
1803
1804        config
1805    }
1806
1807    /// Build a resource guard from the configuration.
1808    fn build_resource_guard(
1809        config: &GeneratorConfig,
1810        output_path: Option<PathBuf>,
1811    ) -> ResourceGuard {
1812        let mut builder = ResourceGuardBuilder::new();
1813
1814        // Configure memory limit if set
1815        if config.global.memory_limit_mb > 0 {
1816            builder = builder.memory_limit(config.global.memory_limit_mb);
1817        }
1818
1819        // Configure disk monitoring for output path
1820        if let Some(path) = output_path {
1821            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1822        }
1823
1824        // Use conservative degradation settings for production safety
1825        builder = builder.conservative();
1826
1827        builder.build()
1828    }
1829
1830    /// Check resources (memory, disk, CPU) and return degradation level.
1831    ///
1832    /// Returns an error if hard limits are exceeded.
1833    /// Returns Ok(DegradationLevel) indicating current resource state.
1834    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835        self.resource_guard.check()
1836    }
1837
1838    /// Check resources with logging.
1839    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840        let level = self.resource_guard.check()?;
1841
1842        if level != DegradationLevel::Normal {
1843            warn!(
1844                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845                phase,
1846                level,
1847                self.resource_guard.current_memory_mb(),
1848                self.resource_guard.available_disk_mb()
1849            );
1850        }
1851
1852        Ok(level)
1853    }
1854
1855    /// Get current degradation actions based on resource state.
1856    fn get_degradation_actions(&self) -> DegradationActions {
1857        self.resource_guard.get_actions()
1858    }
1859
1860    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1861    fn check_memory_limit(&self) -> SynthResult<()> {
1862        self.check_resources()?;
1863        Ok(())
1864    }
1865
1866    /// Run the complete generation workflow.
1867    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868        info!("Starting enhanced generation workflow");
1869        info!(
1870            "Config: industry={:?}, period_months={}, companies={}",
1871            self.config.global.industry,
1872            self.config.global.period_months,
1873            self.config.companies.len()
1874        );
1875
1876        // Set decimal serialization mode (thread-local, affects JSON output).
1877        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1878        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879        datasynth_core::serde_decimal::set_numeric_native(is_native);
1880        struct NumericModeGuard;
1881        impl Drop for NumericModeGuard {
1882            fn drop(&mut self) {
1883                datasynth_core::serde_decimal::set_numeric_native(false);
1884            }
1885        }
1886        let _numeric_guard = if is_native {
1887            Some(NumericModeGuard)
1888        } else {
1889            None
1890        };
1891
1892        // Initial resource check before starting
1893        let initial_level = self.check_resources_with_log("initial")?;
1894        if initial_level == DegradationLevel::Emergency {
1895            return Err(SynthError::resource(
1896                "Insufficient resources to start generation",
1897            ));
1898        }
1899
1900        let mut stats = EnhancedGenerationStatistics {
1901            companies_count: self.config.companies.len(),
1902            period_months: self.config.global.period_months,
1903            ..Default::default()
1904        };
1905
1906        // Phase 1: Chart of Accounts
1907        let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909        // Phase 2: Master Data
1910        self.phase_master_data(&mut stats)?;
1911
1912        // Emit master data to stream sink
1913        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917        // Phase 3: Document Flows + Subledger Linking
1918        let (mut document_flows, mut subledger, fa_journal_entries) =
1919            self.phase_document_flows(&mut stats)?;
1920
1921        // Emit document flows to stream sink
1922        self.emit_phase_items(
1923            "document_flows",
1924            "PurchaseOrder",
1925            &document_flows.purchase_orders,
1926        );
1927        self.emit_phase_items(
1928            "document_flows",
1929            "GoodsReceipt",
1930            &document_flows.goods_receipts,
1931        );
1932        self.emit_phase_items(
1933            "document_flows",
1934            "VendorInvoice",
1935            &document_flows.vendor_invoices,
1936        );
1937        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940        // Phase 3b: Opening Balances (before JE generation)
1941        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943        // Phase 3c: Convert opening balances to journal entries and prepend them.
1944        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1945        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1946        // balance map type.
1947        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948            .iter()
1949            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950            .collect();
1951        if !opening_balance_jes.is_empty() {
1952            debug!(
1953                "Prepending {} opening balance JEs to entries",
1954                opening_balance_jes.len()
1955            );
1956        }
1957
1958        // Phase 4: Journal Entries
1959        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1962        // starts from the correct initial state.
1963        if !opening_balance_jes.is_empty() {
1964            let mut combined = opening_balance_jes;
1965            combined.extend(entries);
1966            entries = combined;
1967        }
1968
1969        // Phase 4c: Append FA acquisition journal entries to main entries
1970        if !fa_journal_entries.is_empty() {
1971            debug!(
1972                "Appending {} FA acquisition JEs to main entries",
1973                fa_journal_entries.len()
1974            );
1975            entries.extend(fa_journal_entries);
1976        }
1977
1978        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1979        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981        // Get current degradation actions for optional phases
1982        let actions = self.get_degradation_actions();
1983
1984        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1985        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1988        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1989        if !sourcing.contracts.is_empty() {
1990            let mut linked_count = 0usize;
1991            // Collect (vendor_id, po_id) pairs from P2P chains
1992            let po_vendor_pairs: Vec<(String, String)> = document_flows
1993                .p2p_chains
1994                .iter()
1995                .map(|chain| {
1996                    (
1997                        chain.purchase_order.vendor_id.clone(),
1998                        chain.purchase_order.header.document_id.clone(),
1999                    )
2000                })
2001                .collect();
2002
2003            for chain in &mut document_flows.p2p_chains {
2004                if chain.purchase_order.contract_id.is_none() {
2005                    if let Some(contract) = sourcing
2006                        .contracts
2007                        .iter()
2008                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009                    {
2010                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011                        linked_count += 1;
2012                    }
2013                }
2014            }
2015
2016            // Populate reverse FK: purchase_order_ids on each contract
2017            for contract in &mut sourcing.contracts {
2018                let po_ids: Vec<String> = po_vendor_pairs
2019                    .iter()
2020                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021                    .map(|(_, po_id)| po_id.clone())
2022                    .collect();
2023                if !po_ids.is_empty() {
2024                    contract.purchase_order_ids = po_ids;
2025                }
2026            }
2027
2028            if linked_count > 0 {
2029                debug!(
2030                    "Linked {} purchase orders to S2C contracts by vendor match",
2031                    linked_count
2032                );
2033            }
2034        }
2035
2036        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2037        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039        // Phase 5c: Append IC journal entries to main entries
2040        if !intercompany.seller_journal_entries.is_empty()
2041            || !intercompany.buyer_journal_entries.is_empty()
2042        {
2043            let ic_je_count = intercompany.seller_journal_entries.len()
2044                + intercompany.buyer_journal_entries.len();
2045            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047            debug!(
2048                "Appended {} IC journal entries to main entries",
2049                ic_je_count
2050            );
2051        }
2052
2053        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2054        if !intercompany.elimination_entries.is_empty() {
2055            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056                &intercompany.elimination_entries,
2057            );
2058            if !elim_jes.is_empty() {
2059                debug!(
2060                    "Appended {} elimination journal entries to main entries",
2061                    elim_jes.len()
2062                );
2063                // IC elimination net-zero validation
2064                let elim_debit: rust_decimal::Decimal =
2065                    elim_jes.iter().map(|je| je.total_debit()).sum();
2066                let elim_credit: rust_decimal::Decimal =
2067                    elim_jes.iter().map(|je| je.total_credit()).sum();
2068                if elim_debit != elim_credit {
2069                    warn!(
2070                        "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2071                        elim_debit,
2072                        elim_credit,
2073                        elim_debit - elim_credit
2074                    );
2075                }
2076                entries.extend(elim_jes);
2077            }
2078        }
2079
2080        // Phase 5e: Wire IC source documents into document flow snapshot
2081        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2082            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2083                document_flows
2084                    .customer_invoices
2085                    .extend(ic_docs.seller_invoices.iter().cloned());
2086                document_flows
2087                    .purchase_orders
2088                    .extend(ic_docs.buyer_orders.iter().cloned());
2089                document_flows
2090                    .goods_receipts
2091                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2092                document_flows
2093                    .vendor_invoices
2094                    .extend(ic_docs.buyer_invoices.iter().cloned());
2095                debug!(
2096                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2097                    ic_docs.seller_invoices.len(),
2098                    ic_docs.buyer_orders.len(),
2099                    ic_docs.buyer_goods_receipts.len(),
2100                    ic_docs.buyer_invoices.len(),
2101                );
2102            }
2103        }
2104
2105        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2106        let hr = self.phase_hr_data(&mut stats)?;
2107
2108        // Phase 6b: Generate JEs from payroll runs
2109        if !hr.payroll_runs.is_empty() {
2110            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2111            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2112            entries.extend(payroll_jes);
2113        }
2114
2115        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2116        if !hr.pension_journal_entries.is_empty() {
2117            debug!(
2118                "Generated {} JEs from pension plans",
2119                hr.pension_journal_entries.len()
2120            );
2121            entries.extend(hr.pension_journal_entries.iter().cloned());
2122        }
2123
2124        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2125        if !hr.stock_comp_journal_entries.is_empty() {
2126            debug!(
2127                "Generated {} JEs from stock-based compensation",
2128                hr.stock_comp_journal_entries.len()
2129            );
2130            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2131        }
2132
2133        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2134        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2135
2136        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2137        if !manufacturing_snap.production_orders.is_empty() {
2138            let currency = self
2139                .config
2140                .companies
2141                .first()
2142                .map(|c| c.currency.as_str())
2143                .unwrap_or("USD");
2144            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2145                &manufacturing_snap.production_orders,
2146                &manufacturing_snap.quality_inspections,
2147                currency,
2148            );
2149            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2150            entries.extend(mfg_jes);
2151        }
2152
2153        // Phase 7a-warranty: Generate warranty provisions per company
2154        if !manufacturing_snap.quality_inspections.is_empty() {
2155            let framework = match self.config.accounting_standards.framework {
2156                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2157                _ => "US_GAAP",
2158            };
2159            for company in &self.config.companies {
2160                let company_orders: Vec<_> = manufacturing_snap
2161                    .production_orders
2162                    .iter()
2163                    .filter(|o| o.company_code == company.code)
2164                    .cloned()
2165                    .collect();
2166                let company_inspections: Vec<_> = manufacturing_snap
2167                    .quality_inspections
2168                    .iter()
2169                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2170                    .cloned()
2171                    .collect();
2172                if company_inspections.is_empty() {
2173                    continue;
2174                }
2175                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2176                let warranty_result = warranty_gen.generate(
2177                    &company.code,
2178                    &company_orders,
2179                    &company_inspections,
2180                    &company.currency,
2181                    framework,
2182                );
2183                if !warranty_result.journal_entries.is_empty() {
2184                    debug!(
2185                        "Generated {} warranty provision JEs for {}",
2186                        warranty_result.journal_entries.len(),
2187                        company.code
2188                    );
2189                    entries.extend(warranty_result.journal_entries);
2190                }
2191            }
2192        }
2193
2194        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2195        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2196        {
2197            let cogs_currency = self
2198                .config
2199                .companies
2200                .first()
2201                .map(|c| c.currency.as_str())
2202                .unwrap_or("USD");
2203            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2204                &document_flows.deliveries,
2205                &manufacturing_snap.production_orders,
2206                cogs_currency,
2207            );
2208            if !cogs_jes.is_empty() {
2209                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2210                entries.extend(cogs_jes);
2211            }
2212        }
2213
2214        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2215        //
2216        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2217        // subledger inventory positions.  Here we reconcile them so that position balances
2218        // reflect the actual stock movements within the generation period.
2219        if !manufacturing_snap.inventory_movements.is_empty()
2220            && !subledger.inventory_positions.is_empty()
2221        {
2222            use datasynth_core::models::MovementType as MfgMovementType;
2223            let mut receipt_count = 0usize;
2224            let mut issue_count = 0usize;
2225            for movement in &manufacturing_snap.inventory_movements {
2226                // Find a matching position by material code and company
2227                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2228                    p.material_id == movement.material_code
2229                        && p.company_code == movement.entity_code
2230                }) {
2231                    match movement.movement_type {
2232                        MfgMovementType::GoodsReceipt => {
2233                            // Increase stock and update weighted-average cost
2234                            pos.add_quantity(
2235                                movement.quantity,
2236                                movement.value,
2237                                movement.movement_date,
2238                            );
2239                            receipt_count += 1;
2240                        }
2241                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2242                            // Decrease stock (best-effort; silently skip if insufficient)
2243                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2244                            issue_count += 1;
2245                        }
2246                        _ => {}
2247                    }
2248                }
2249            }
2250            debug!(
2251                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2252                manufacturing_snap.inventory_movements.len(),
2253                receipt_count,
2254                issue_count,
2255            );
2256        }
2257
2258        // Update final entry/line-item stats after all JE-generating phases
2259        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2260        if !entries.is_empty() {
2261            stats.total_entries = entries.len() as u64;
2262            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2263            debug!(
2264                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2265                stats.total_entries, stats.total_line_items
2266            );
2267        }
2268
2269        // Phase 7b: Apply internal controls to journal entries
2270        if self.config.internal_controls.enabled && !entries.is_empty() {
2271            info!("Phase 7b: Applying internal controls to journal entries");
2272            let control_config = ControlGeneratorConfig {
2273                exception_rate: self.config.internal_controls.exception_rate,
2274                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2275                enable_sox_marking: true,
2276                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2277                    self.config.internal_controls.sox_materiality_threshold,
2278                )
2279                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2280                ..Default::default()
2281            };
2282            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2283            for entry in &mut entries {
2284                control_gen.apply_controls(entry, &coa);
2285            }
2286            let with_controls = entries
2287                .iter()
2288                .filter(|e| !e.header.control_ids.is_empty())
2289                .count();
2290            info!(
2291                "Applied controls to {} entries ({} with control IDs assigned)",
2292                entries.len(),
2293                with_controls
2294            );
2295        }
2296
2297        // Phase 7c: Extract SoD violations from annotated journal entries.
2298        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2299        // Here we materialise those flags into standalone SodViolation records.
2300        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2301            .iter()
2302            .filter(|e| e.header.sod_violation)
2303            .filter_map(|e| {
2304                e.header.sod_conflict_type.map(|ct| {
2305                    use datasynth_core::models::{RiskLevel, SodViolation};
2306                    let severity = match ct {
2307                        datasynth_core::models::SodConflictType::PaymentReleaser
2308                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2309                            RiskLevel::Critical
2310                        }
2311                        datasynth_core::models::SodConflictType::PreparerApprover
2312                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2313                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2314                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2315                            RiskLevel::High
2316                        }
2317                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2318                            RiskLevel::Medium
2319                        }
2320                    };
2321                    let action = format!(
2322                        "SoD conflict {:?} on entry {} ({})",
2323                        ct, e.header.document_id, e.header.company_code
2324                    );
2325                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2326                })
2327            })
2328            .collect();
2329        if !sod_violations.is_empty() {
2330            info!(
2331                "Phase 7c: Extracted {} SoD violations from {} entries",
2332                sod_violations.len(),
2333                entries.len()
2334            );
2335        }
2336
2337        // Emit journal entries to stream sink (after all JE-generating phases)
2338        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2339
2340        // Phase 8: Anomaly Injection (after all JE-generating phases)
2341        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2342
2343        // Emit anomaly labels to stream sink
2344        self.emit_phase_items(
2345            "anomaly_injection",
2346            "LabeledAnomaly",
2347            &anomaly_labels.labels,
2348        );
2349
2350        // Propagate fraud labels from journal entries to source documents.
2351        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2352        // instead of tracing through document_references.json.
2353        {
2354            use std::collections::HashMap;
2355            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs
2356            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2357            for je in &entries {
2358                if je.header.is_fraud {
2359                    if let Some(ref fraud_type) = je.header.fraud_type {
2360                        // Extract referenced document ID from the JE reference field
2361                        if let Some(ref reference) = je.header.reference {
2362                            fraud_map.insert(reference.clone(), *fraud_type);
2363                        }
2364                        // Also tag via journal_entry_id on document headers
2365                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2366                    }
2367                }
2368            }
2369            if !fraud_map.is_empty() {
2370                let mut propagated = 0usize;
2371                // Use DocumentHeader::propagate_fraud method for each doc type
2372                macro_rules! propagate_to {
2373                    ($collection:expr) => {
2374                        for doc in &mut $collection {
2375                            if doc.header.propagate_fraud(&fraud_map) {
2376                                propagated += 1;
2377                            }
2378                        }
2379                    };
2380                }
2381                propagate_to!(document_flows.purchase_orders);
2382                propagate_to!(document_flows.goods_receipts);
2383                propagate_to!(document_flows.vendor_invoices);
2384                propagate_to!(document_flows.payments);
2385                propagate_to!(document_flows.sales_orders);
2386                propagate_to!(document_flows.deliveries);
2387                propagate_to!(document_flows.customer_invoices);
2388                if propagated > 0 {
2389                    info!(
2390                        "Propagated fraud labels to {} document flow records",
2391                        propagated
2392                    );
2393                }
2394            }
2395        }
2396
2397        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2398        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2399
2400        // Emit red flags to stream sink
2401        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2402
2403        // Phase 26b: Collusion Ring Generation (after red flags)
2404        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2405
2406        // Emit collusion rings to stream sink
2407        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2408
2409        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2410        let balance_validation = self.phase_balance_validation(&entries)?;
2411
2412        // Phase 9b: GL-to-Subledger Reconciliation
2413        let subledger_reconciliation =
2414            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2415
2416        // Phase 10: Data Quality Injection
2417        let (data_quality_stats, quality_issues) =
2418            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2419
2420        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2421        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2422
2423        // Phase 11: Audit Data
2424        let audit = self.phase_audit_data(&entries, &mut stats)?;
2425
2426        // Phase 12: Banking KYC/AML Data
2427        let mut banking = self.phase_banking_data(&mut stats)?;
2428
2429        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2430        // Creates coherence between the accounting layer (payments, JEs) and the
2431        // banking layer (bank transactions). A vendor invoice payment now appears
2432        // on both sides with cross-references and fraud labels propagated.
2433        if self.phase_config.generate_banking
2434            && !document_flows.payments.is_empty()
2435            && !banking.accounts.is_empty()
2436        {
2437            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2438            if bridge_rate > 0.0 {
2439                let mut bridge =
2440                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2441                        self.seed,
2442                    );
2443                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2444                    &document_flows.payments,
2445                    &banking.customers,
2446                    &banking.accounts,
2447                    bridge_rate,
2448                );
2449                info!(
2450                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2451                    bridge_stats.bridged_count,
2452                    bridge_stats.transactions_emitted,
2453                    bridge_stats.fraud_propagated,
2454                );
2455                let bridged_count = bridged_txns.len();
2456                banking.transactions.extend(bridged_txns);
2457
2458                // Re-run velocity computation so bridged txns also get features
2459                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2460                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2461                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2462                        &mut banking.transactions,
2463                    );
2464                }
2465
2466                // Recompute suspicious count after bridging
2467                banking.suspicious_count = banking
2468                    .transactions
2469                    .iter()
2470                    .filter(|t| t.is_suspicious)
2471                    .count();
2472                stats.banking_transaction_count = banking.transactions.len();
2473                stats.banking_suspicious_count = banking.suspicious_count;
2474            }
2475        }
2476
2477        // Phase 13: Graph Export
2478        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2479
2480        // Phase 14: LLM Enrichment
2481        self.phase_llm_enrichment(&mut stats);
2482
2483        // Phase 15: Diffusion Enhancement
2484        self.phase_diffusion_enhancement(&mut stats);
2485
2486        // Phase 16: Causal Overlay
2487        self.phase_causal_overlay(&mut stats);
2488
2489        // Phase 17: Bank Reconciliation + Financial Statements
2490        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2491        // provision data (from accounting_standards / tax snapshots) can be wired in.
2492        let mut financial_reporting = self.phase_financial_reporting(
2493            &document_flows,
2494            &entries,
2495            &coa,
2496            &hr,
2497            &audit,
2498            &mut stats,
2499        )?;
2500
2501        // BS coherence check: assets = liabilities + equity
2502        {
2503            use datasynth_core::models::StatementType;
2504            for stmt in &financial_reporting.consolidated_statements {
2505                if stmt.statement_type == StatementType::BalanceSheet {
2506                    let total_assets: rust_decimal::Decimal = stmt
2507                        .line_items
2508                        .iter()
2509                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2510                        .map(|li| li.amount)
2511                        .sum();
2512                    let total_le: rust_decimal::Decimal = stmt
2513                        .line_items
2514                        .iter()
2515                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2516                        .map(|li| li.amount)
2517                        .sum();
2518                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2519                        warn!(
2520                            "BS equation imbalance: assets={}, L+E={}",
2521                            total_assets, total_le
2522                        );
2523                    }
2524                }
2525            }
2526        }
2527
2528        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2529        let accounting_standards =
2530            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2531
2532        // Phase 18a: Merge ECL journal entries into main GL
2533        if !accounting_standards.ecl_journal_entries.is_empty() {
2534            debug!(
2535                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2536                accounting_standards.ecl_journal_entries.len()
2537            );
2538            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2539        }
2540
2541        // Phase 18a: Merge provision journal entries into main GL
2542        if !accounting_standards.provision_journal_entries.is_empty() {
2543            debug!(
2544                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2545                accounting_standards.provision_journal_entries.len()
2546            );
2547            entries.extend(
2548                accounting_standards
2549                    .provision_journal_entries
2550                    .iter()
2551                    .cloned(),
2552            );
2553        }
2554
2555        // Phase 18b: OCPM Events (after all process data is available)
2556        let ocpm = self.phase_ocpm_events(
2557            &document_flows,
2558            &sourcing,
2559            &hr,
2560            &manufacturing_snap,
2561            &banking,
2562            &audit,
2563            &financial_reporting,
2564            &mut stats,
2565        )?;
2566
2567        // Emit OCPM events to stream sink
2568        if let Some(ref event_log) = ocpm.event_log {
2569            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2570        }
2571
2572        // Phase 19: Sales Quotes, Management KPIs, Budgets
2573        let sales_kpi_budgets =
2574            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2575
2576        // Phase 22: Treasury Data Generation
2577        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2578        // are included in the pre-tax income used by phase_tax_generation.
2579        let treasury =
2580            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2581
2582        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2583        if !treasury.journal_entries.is_empty() {
2584            debug!(
2585                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2586                treasury.journal_entries.len()
2587            );
2588            entries.extend(treasury.journal_entries.iter().cloned());
2589        }
2590
2591        // Phase 20: Tax Generation
2592        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2593
2594        // Phase 20 JEs: Merge tax posting journal entries into main GL
2595        if !tax.tax_posting_journal_entries.is_empty() {
2596            debug!(
2597                "Merging {} tax posting JEs into GL",
2598                tax.tax_posting_journal_entries.len()
2599            );
2600            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2601        }
2602
2603        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2604        // Build supplementary cash flow items from upstream JE data (depreciation,
2605        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2606        {
2607            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2608
2609            let framework_str = {
2610                use datasynth_config::schema::AccountingFrameworkConfig;
2611                match self
2612                    .config
2613                    .accounting_standards
2614                    .framework
2615                    .unwrap_or_default()
2616                {
2617                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2618                        "IFRS"
2619                    }
2620                    _ => "US_GAAP",
2621                }
2622            };
2623
2624            // Sum depreciation debits (account 6000) from close JEs
2625            let depreciation_total: rust_decimal::Decimal = entries
2626                .iter()
2627                .filter(|je| je.header.document_type == "CL")
2628                .flat_map(|je| je.lines.iter())
2629                .filter(|l| l.gl_account.starts_with("6000"))
2630                .map(|l| l.debit_amount)
2631                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2632
2633            // Sum interest expense debits (account 7100)
2634            let interest_paid: rust_decimal::Decimal = entries
2635                .iter()
2636                .flat_map(|je| je.lines.iter())
2637                .filter(|l| l.gl_account.starts_with("7100"))
2638                .map(|l| l.debit_amount)
2639                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2640
2641            // Sum tax expense debits (account 8000)
2642            let tax_paid: rust_decimal::Decimal = entries
2643                .iter()
2644                .flat_map(|je| je.lines.iter())
2645                .filter(|l| l.gl_account.starts_with("8000"))
2646                .map(|l| l.debit_amount)
2647                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2648
2649            // Sum capex debits on fixed assets (account 1500)
2650            let capex: rust_decimal::Decimal = entries
2651                .iter()
2652                .flat_map(|je| je.lines.iter())
2653                .filter(|l| l.gl_account.starts_with("1500"))
2654                .map(|l| l.debit_amount)
2655                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2656
2657            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2658            let dividends_paid: rust_decimal::Decimal = entries
2659                .iter()
2660                .flat_map(|je| je.lines.iter())
2661                .filter(|l| l.gl_account == "2170")
2662                .map(|l| l.debit_amount)
2663                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2664
2665            let cf_data = CashFlowSourceData {
2666                depreciation_total,
2667                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
2668                delta_ar: rust_decimal::Decimal::ZERO,
2669                delta_ap: rust_decimal::Decimal::ZERO,
2670                delta_inventory: rust_decimal::Decimal::ZERO,
2671                capex,
2672                debt_issuance: rust_decimal::Decimal::ZERO,
2673                debt_repayment: rust_decimal::Decimal::ZERO,
2674                interest_paid,
2675                tax_paid,
2676                dividends_paid,
2677                framework: framework_str.to_string(),
2678            };
2679
2680            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2681            if !enhanced_cf_items.is_empty() {
2682                // Merge into ALL cash flow statements (standalone + consolidated)
2683                use datasynth_core::models::StatementType;
2684                let merge_count = enhanced_cf_items.len();
2685                for stmt in financial_reporting
2686                    .financial_statements
2687                    .iter_mut()
2688                    .chain(financial_reporting.consolidated_statements.iter_mut())
2689                    .chain(
2690                        financial_reporting
2691                            .standalone_statements
2692                            .values_mut()
2693                            .flat_map(|v| v.iter_mut()),
2694                    )
2695                {
2696                    if stmt.statement_type == StatementType::CashFlowStatement {
2697                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2698                    }
2699                }
2700                info!(
2701                    "Enhanced cash flow: {} supplementary items merged into CF statements",
2702                    merge_count
2703                );
2704            }
2705        }
2706
2707        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2708        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2709        self.generate_notes_to_financial_statements(
2710            &mut financial_reporting,
2711            &accounting_standards,
2712            &tax,
2713            &hr,
2714            &audit,
2715            &treasury,
2716        );
2717
2718        // Phase 20b: Supplement segment reports from real JEs (v2.4)
2719        // When we have 2+ companies, derive segment data from actual journal entries
2720        // to complement or replace the FS-generator-based segments.
2721        if self.config.companies.len() >= 2 && !entries.is_empty() {
2722            let companies: Vec<(String, String)> = self
2723                .config
2724                .companies
2725                .iter()
2726                .map(|c| (c.code.clone(), c.name.clone()))
2727                .collect();
2728            let ic_elim: rust_decimal::Decimal =
2729                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2730            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2731                .unwrap_or(NaiveDate::MIN);
2732            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2733            let period_label = format!(
2734                "{}-{:02}",
2735                end_date.year(),
2736                (end_date - chrono::Days::new(1)).month()
2737            );
2738
2739            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2740            let (je_segments, je_recon) =
2741                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2742            if !je_segments.is_empty() {
2743                info!(
2744                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2745                    je_segments.len(),
2746                    ic_elim,
2747                );
2748                // Replace if existing segment_reports were empty; otherwise supplement
2749                if financial_reporting.segment_reports.is_empty() {
2750                    financial_reporting.segment_reports = je_segments;
2751                    financial_reporting.segment_reconciliations = vec![je_recon];
2752                } else {
2753                    financial_reporting.segment_reports.extend(je_segments);
2754                    financial_reporting.segment_reconciliations.push(je_recon);
2755                }
2756            }
2757        }
2758
2759        // Phase 21: ESG Data Generation
2760        let esg_snap =
2761            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2762
2763        // Phase 23: Project Accounting Data Generation
2764        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2765
2766        // Phase 24: Process Evolution + Organizational Events
2767        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2768
2769        // Phase 24b: Disruption Events
2770        let disruption_events = self.phase_disruption_events(&mut stats)?;
2771
2772        // Phase 27: Bi-Temporal Vendor Version Chains
2773        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2774
2775        // Phase 28: Entity Relationship Graph + Cross-Process Links
2776        let (entity_relationship_graph, cross_process_links) =
2777            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2778
2779        // Phase 29: Industry-specific GL accounts
2780        let industry_output = self.phase_industry_data(&mut stats);
2781
2782        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2783        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2784
2785        // Phase 19b: Hypergraph Export (after all data is available)
2786        self.phase_hypergraph_export(
2787            &coa,
2788            &entries,
2789            &document_flows,
2790            &sourcing,
2791            &hr,
2792            &manufacturing_snap,
2793            &banking,
2794            &audit,
2795            &financial_reporting,
2796            &ocpm,
2797            &compliance_regulations,
2798            &mut stats,
2799        )?;
2800
2801        // Phase 10c: Additional graph builders (approval, entity, banking)
2802        // These run after all data is available since they need banking/IC data.
2803        if self.phase_config.generate_graph_export {
2804            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2805        }
2806
2807        // Log informational messages for config sections not yet fully wired
2808        if self.config.streaming.enabled {
2809            info!("Note: streaming config is enabled but batch mode does not use it");
2810        }
2811        if self.config.vendor_network.enabled {
2812            debug!("Vendor network config available; relationship graph generation is partial");
2813        }
2814        if self.config.customer_segmentation.enabled {
2815            debug!("Customer segmentation config available; segment-aware generation is partial");
2816        }
2817
2818        // Log final resource statistics
2819        let resource_stats = self.resource_guard.stats();
2820        info!(
2821            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2822            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2823            resource_stats.disk.estimated_bytes_written,
2824            resource_stats.degradation_level
2825        );
2826
2827        // Flush any remaining stream sink data
2828        if let Some(ref sink) = self.phase_sink {
2829            if let Err(e) = sink.flush() {
2830                warn!("Stream sink flush failed: {e}");
2831            }
2832        }
2833
2834        // Build data lineage graph
2835        let lineage = self.build_lineage_graph();
2836
2837        // Evaluate quality gates if enabled in config
2838        let gate_result = if self.config.quality_gates.enabled {
2839            let profile_name = &self.config.quality_gates.profile;
2840            match datasynth_eval::gates::get_profile(profile_name) {
2841                Some(profile) => {
2842                    // Build an evaluation populated with actual generation metrics.
2843                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2844
2845                    // Populate balance sheet evaluation from balance validation results
2846                    if balance_validation.validated {
2847                        eval.coherence.balance =
2848                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2849                                equation_balanced: balance_validation.is_balanced,
2850                                max_imbalance: (balance_validation.total_debits
2851                                    - balance_validation.total_credits)
2852                                    .abs(),
2853                                periods_evaluated: 1,
2854                                periods_imbalanced: if balance_validation.is_balanced {
2855                                    0
2856                                } else {
2857                                    1
2858                                },
2859                                period_results: Vec::new(),
2860                                companies_evaluated: self.config.companies.len(),
2861                            });
2862                    }
2863
2864                    // Set coherence passes based on balance validation
2865                    eval.coherence.passes = balance_validation.is_balanced;
2866                    if !balance_validation.is_balanced {
2867                        eval.coherence
2868                            .failures
2869                            .push("Balance sheet equation not satisfied".to_string());
2870                    }
2871
2872                    // Set statistical score based on entry count (basic sanity)
2873                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2874                    eval.statistical.passes = !entries.is_empty();
2875
2876                    // Set quality score from data quality stats
2877                    eval.quality.overall_score = 0.9; // Default high for generated data
2878                    eval.quality.passes = true;
2879
2880                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2881                    info!(
2882                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2883                        profile_name, result.gates_passed, result.gates_total, result.summary
2884                    );
2885                    Some(result)
2886                }
2887                None => {
2888                    warn!(
2889                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2890                        profile_name
2891                    );
2892                    None
2893                }
2894            }
2895        } else {
2896            None
2897        };
2898
2899        // Generate internal controls if enabled
2900        let internal_controls = if self.config.internal_controls.enabled {
2901            InternalControl::standard_controls()
2902        } else {
2903            Vec::new()
2904        };
2905
2906        Ok(EnhancedGenerationResult {
2907            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2908            master_data: std::mem::take(&mut self.master_data),
2909            document_flows,
2910            subledger,
2911            ocpm,
2912            audit,
2913            banking,
2914            graph_export,
2915            sourcing,
2916            financial_reporting,
2917            hr,
2918            accounting_standards,
2919            manufacturing: manufacturing_snap,
2920            sales_kpi_budgets,
2921            tax,
2922            esg: esg_snap,
2923            treasury,
2924            project_accounting,
2925            process_evolution,
2926            organizational_events,
2927            disruption_events,
2928            intercompany,
2929            journal_entries: entries,
2930            anomaly_labels,
2931            balance_validation,
2932            data_quality_stats,
2933            quality_issues,
2934            statistics: stats,
2935            lineage: Some(lineage),
2936            gate_result,
2937            internal_controls,
2938            sod_violations,
2939            opening_balances,
2940            subledger_reconciliation,
2941            counterfactual_pairs,
2942            red_flags,
2943            collusion_rings,
2944            temporal_vendor_chains,
2945            entity_relationship_graph,
2946            cross_process_links,
2947            industry_output,
2948            compliance_regulations,
2949        })
2950    }
2951
2952    // ========================================================================
2953    // Generation Phase Methods
2954    // ========================================================================
2955
2956    /// Phase 1: Generate Chart of Accounts and update statistics.
2957    fn phase_chart_of_accounts(
2958        &mut self,
2959        stats: &mut EnhancedGenerationStatistics,
2960    ) -> SynthResult<Arc<ChartOfAccounts>> {
2961        info!("Phase 1: Generating Chart of Accounts");
2962        let coa = self.generate_coa()?;
2963        stats.accounts_count = coa.account_count();
2964        info!(
2965            "Chart of Accounts generated: {} accounts",
2966            stats.accounts_count
2967        );
2968        self.check_resources_with_log("post-coa")?;
2969        Ok(coa)
2970    }
2971
2972    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
2973    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
2974        if self.phase_config.generate_master_data {
2975            info!("Phase 2: Generating Master Data");
2976            self.generate_master_data()?;
2977            stats.vendor_count = self.master_data.vendors.len();
2978            stats.customer_count = self.master_data.customers.len();
2979            stats.material_count = self.master_data.materials.len();
2980            stats.asset_count = self.master_data.assets.len();
2981            stats.employee_count = self.master_data.employees.len();
2982            info!(
2983                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
2984                stats.vendor_count, stats.customer_count, stats.material_count,
2985                stats.asset_count, stats.employee_count
2986            );
2987            self.check_resources_with_log("post-master-data")?;
2988        } else {
2989            debug!("Phase 2: Skipped (master data generation disabled)");
2990        }
2991        Ok(())
2992    }
2993
2994    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
2995    fn phase_document_flows(
2996        &mut self,
2997        stats: &mut EnhancedGenerationStatistics,
2998    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
2999        let mut document_flows = DocumentFlowSnapshot::default();
3000        let mut subledger = SubledgerSnapshot::default();
3001        // Dunning JEs (interest + charges) accumulated here and merged into the
3002        // main FA-JE list below so they appear in the GL.
3003        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3004
3005        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3006            info!("Phase 3: Generating Document Flows");
3007            self.generate_document_flows(&mut document_flows)?;
3008            stats.p2p_chain_count = document_flows.p2p_chains.len();
3009            stats.o2c_chain_count = document_flows.o2c_chains.len();
3010            info!(
3011                "Document flows generated: {} P2P chains, {} O2C chains",
3012                stats.p2p_chain_count, stats.o2c_chain_count
3013            );
3014
3015            // Phase 3b: Link document flows to subledgers (for data coherence)
3016            debug!("Phase 3b: Linking document flows to subledgers");
3017            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3018            stats.ap_invoice_count = subledger.ap_invoices.len();
3019            stats.ar_invoice_count = subledger.ar_invoices.len();
3020            debug!(
3021                "Subledgers linked: {} AP invoices, {} AR invoices",
3022                stats.ap_invoice_count, stats.ar_invoice_count
3023            );
3024
3025            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3026            // Without this step the subledger is systematically overstated because
3027            // amount_remaining is set at invoice creation and never reduced by
3028            // the payments that were generated in the document-flow phase.
3029            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3030            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3031            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3032            debug!("Payment settlements applied to AP and AR subledgers");
3033
3034            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3035            // The as-of date is the last day of the configured period.
3036            if let Ok(start_date) =
3037                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3038            {
3039                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3040                    - chrono::Days::new(1);
3041                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3042                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3043                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3044                // derived from JE-level aggregation and will typically differ. This is a known
3045                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3046                // generated independently. A future reconciliation phase should align them by
3047                // using subledger totals as the authoritative source for BS Receivables.
3048                for company in &self.config.companies {
3049                    let ar_report = ARAgingReport::from_invoices(
3050                        company.code.clone(),
3051                        &subledger.ar_invoices,
3052                        as_of_date,
3053                    );
3054                    subledger.ar_aging_reports.push(ar_report);
3055
3056                    let ap_report = APAgingReport::from_invoices(
3057                        company.code.clone(),
3058                        &subledger.ap_invoices,
3059                        as_of_date,
3060                    );
3061                    subledger.ap_aging_reports.push(ap_report);
3062                }
3063                debug!(
3064                    "AR/AP aging reports built: {} AR, {} AP",
3065                    subledger.ar_aging_reports.len(),
3066                    subledger.ap_aging_reports.len()
3067                );
3068
3069                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3070                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3071                {
3072                    use datasynth_generators::DunningGenerator;
3073                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3074                    for company in &self.config.companies {
3075                        let currency = company.currency.as_str();
3076                        // Collect mutable references to AR invoices for this company
3077                        // (dunning generator updates dunning_info on invoices in-place).
3078                        let mut company_invoices: Vec<
3079                            datasynth_core::models::subledger::ar::ARInvoice,
3080                        > = subledger
3081                            .ar_invoices
3082                            .iter()
3083                            .filter(|inv| inv.company_code == company.code)
3084                            .cloned()
3085                            .collect();
3086
3087                        if company_invoices.is_empty() {
3088                            continue;
3089                        }
3090
3091                        let result = dunning_gen.execute_dunning_run(
3092                            &company.code,
3093                            as_of_date,
3094                            &mut company_invoices,
3095                            currency,
3096                        );
3097
3098                        // Write back updated dunning info to the main AR invoice list
3099                        for updated in &company_invoices {
3100                            if let Some(orig) = subledger
3101                                .ar_invoices
3102                                .iter_mut()
3103                                .find(|i| i.invoice_number == updated.invoice_number)
3104                            {
3105                                orig.dunning_info = updated.dunning_info.clone();
3106                            }
3107                        }
3108
3109                        subledger.dunning_runs.push(result.dunning_run);
3110                        subledger.dunning_letters.extend(result.letters);
3111                        // Dunning JEs (interest + charges) collected into local buffer.
3112                        dunning_journal_entries.extend(result.journal_entries);
3113                    }
3114                    debug!(
3115                        "Dunning runs complete: {} runs, {} letters",
3116                        subledger.dunning_runs.len(),
3117                        subledger.dunning_letters.len()
3118                    );
3119                }
3120            }
3121
3122            self.check_resources_with_log("post-document-flows")?;
3123        } else {
3124            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3125        }
3126
3127        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3128        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3129        if !self.master_data.assets.is_empty() {
3130            debug!("Generating FA subledger records");
3131            let company_code = self
3132                .config
3133                .companies
3134                .first()
3135                .map(|c| c.code.as_str())
3136                .unwrap_or("1000");
3137            let currency = self
3138                .config
3139                .companies
3140                .first()
3141                .map(|c| c.currency.as_str())
3142                .unwrap_or("USD");
3143
3144            let mut fa_gen = datasynth_generators::FAGenerator::new(
3145                datasynth_generators::FAGeneratorConfig::default(),
3146                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3147            );
3148
3149            for asset in &self.master_data.assets {
3150                let (record, je) = fa_gen.generate_asset_acquisition(
3151                    company_code,
3152                    &format!("{:?}", asset.asset_class),
3153                    &asset.description,
3154                    asset.acquisition_date,
3155                    currency,
3156                    asset.cost_center.as_deref(),
3157                );
3158                subledger.fa_records.push(record);
3159                fa_journal_entries.push(je);
3160            }
3161
3162            stats.fa_subledger_count = subledger.fa_records.len();
3163            debug!(
3164                "FA subledger records generated: {} (with {} acquisition JEs)",
3165                stats.fa_subledger_count,
3166                fa_journal_entries.len()
3167            );
3168        }
3169
3170        // Generate Inventory subledger records from master data materials
3171        if !self.master_data.materials.is_empty() {
3172            debug!("Generating Inventory subledger records");
3173            let first_company = self.config.companies.first();
3174            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3175            let inv_currency = first_company
3176                .map(|c| c.currency.clone())
3177                .unwrap_or_else(|| "USD".to_string());
3178
3179            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3180                datasynth_generators::InventoryGeneratorConfig::default(),
3181                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3182                inv_currency.clone(),
3183            );
3184
3185            for (i, material) in self.master_data.materials.iter().enumerate() {
3186                let plant = format!("PLANT{:02}", (i % 3) + 1);
3187                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3188                let initial_qty = rust_decimal::Decimal::from(
3189                    material
3190                        .safety_stock
3191                        .to_string()
3192                        .parse::<i64>()
3193                        .unwrap_or(100),
3194                );
3195
3196                let position = inv_gen.generate_position(
3197                    company_code,
3198                    &plant,
3199                    &storage_loc,
3200                    &material.material_id,
3201                    &material.description,
3202                    initial_qty,
3203                    Some(material.standard_cost),
3204                    &inv_currency,
3205                );
3206                subledger.inventory_positions.push(position);
3207            }
3208
3209            stats.inventory_subledger_count = subledger.inventory_positions.len();
3210            debug!(
3211                "Inventory subledger records generated: {}",
3212                stats.inventory_subledger_count
3213            );
3214        }
3215
3216        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3217        if !subledger.fa_records.is_empty() {
3218            if let Ok(start_date) =
3219                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3220            {
3221                let company_code = self
3222                    .config
3223                    .companies
3224                    .first()
3225                    .map(|c| c.code.as_str())
3226                    .unwrap_or("1000");
3227                let fiscal_year = start_date.year();
3228                let start_period = start_date.month();
3229                let end_period =
3230                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3231
3232                let depr_cfg = FaDepreciationScheduleConfig {
3233                    fiscal_year,
3234                    start_period,
3235                    end_period,
3236                    seed_offset: 800,
3237                };
3238                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3239                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3240                let run_count = runs.len();
3241                subledger.depreciation_runs = runs;
3242                debug!(
3243                    "Depreciation runs generated: {} runs for {} periods",
3244                    run_count, self.config.global.period_months
3245                );
3246            }
3247        }
3248
3249        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3250        if !subledger.inventory_positions.is_empty() {
3251            if let Ok(start_date) =
3252                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3253            {
3254                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3255                    - chrono::Days::new(1);
3256
3257                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3258                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3259
3260                for company in &self.config.companies {
3261                    let result = inv_val_gen.generate(
3262                        &company.code,
3263                        &subledger.inventory_positions,
3264                        as_of_date,
3265                    );
3266                    subledger.inventory_valuations.push(result);
3267                }
3268                debug!(
3269                    "Inventory valuations generated: {} company reports",
3270                    subledger.inventory_valuations.len()
3271                );
3272            }
3273        }
3274
3275        Ok((document_flows, subledger, fa_journal_entries))
3276    }
3277
3278    /// Phase 3c: Generate OCPM events from document flows.
3279    #[allow(clippy::too_many_arguments)]
3280    fn phase_ocpm_events(
3281        &mut self,
3282        document_flows: &DocumentFlowSnapshot,
3283        sourcing: &SourcingSnapshot,
3284        hr: &HrSnapshot,
3285        manufacturing: &ManufacturingSnapshot,
3286        banking: &BankingSnapshot,
3287        audit: &AuditSnapshot,
3288        financial_reporting: &FinancialReportingSnapshot,
3289        stats: &mut EnhancedGenerationStatistics,
3290    ) -> SynthResult<OcpmSnapshot> {
3291        let degradation = self.check_resources()?;
3292        if degradation >= DegradationLevel::Reduced {
3293            debug!(
3294                "Phase skipped due to resource pressure (degradation: {:?})",
3295                degradation
3296            );
3297            return Ok(OcpmSnapshot::default());
3298        }
3299        if self.phase_config.generate_ocpm_events {
3300            info!("Phase 3c: Generating OCPM Events");
3301            let ocpm_snapshot = self.generate_ocpm_events(
3302                document_flows,
3303                sourcing,
3304                hr,
3305                manufacturing,
3306                banking,
3307                audit,
3308                financial_reporting,
3309            )?;
3310            stats.ocpm_event_count = ocpm_snapshot.event_count;
3311            stats.ocpm_object_count = ocpm_snapshot.object_count;
3312            stats.ocpm_case_count = ocpm_snapshot.case_count;
3313            info!(
3314                "OCPM events generated: {} events, {} objects, {} cases",
3315                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3316            );
3317            self.check_resources_with_log("post-ocpm")?;
3318            Ok(ocpm_snapshot)
3319        } else {
3320            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3321            Ok(OcpmSnapshot::default())
3322        }
3323    }
3324
3325    /// Phase 4: Generate journal entries from document flows and standalone generation.
3326    fn phase_journal_entries(
3327        &mut self,
3328        coa: &Arc<ChartOfAccounts>,
3329        document_flows: &DocumentFlowSnapshot,
3330        _stats: &mut EnhancedGenerationStatistics,
3331    ) -> SynthResult<Vec<JournalEntry>> {
3332        let mut entries = Vec::new();
3333
3334        // Phase 4a: Generate JEs from document flows (for data coherence)
3335        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3336            debug!("Phase 4a: Generating JEs from document flows");
3337            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3338            debug!("Generated {} JEs from document flows", flow_entries.len());
3339            entries.extend(flow_entries);
3340        }
3341
3342        // Phase 4b: Generate standalone journal entries
3343        if self.phase_config.generate_journal_entries {
3344            info!("Phase 4: Generating Journal Entries");
3345            let je_entries = self.generate_journal_entries(coa)?;
3346            info!("Generated {} standalone journal entries", je_entries.len());
3347            entries.extend(je_entries);
3348        } else {
3349            debug!("Phase 4: Skipped (journal entry generation disabled)");
3350        }
3351
3352        if !entries.is_empty() {
3353            // Note: stats.total_entries/total_line_items are set in generate()
3354            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3355            self.check_resources_with_log("post-journal-entries")?;
3356        }
3357
3358        Ok(entries)
3359    }
3360
3361    /// Phase 5: Inject anomalies into journal entries.
3362    fn phase_anomaly_injection(
3363        &mut self,
3364        entries: &mut [JournalEntry],
3365        actions: &DegradationActions,
3366        stats: &mut EnhancedGenerationStatistics,
3367    ) -> SynthResult<AnomalyLabels> {
3368        if self.phase_config.inject_anomalies
3369            && !entries.is_empty()
3370            && !actions.skip_anomaly_injection
3371        {
3372            info!("Phase 5: Injecting Anomalies");
3373            let result = self.inject_anomalies(entries)?;
3374            stats.anomalies_injected = result.labels.len();
3375            info!("Injected {} anomalies", stats.anomalies_injected);
3376            self.check_resources_with_log("post-anomaly-injection")?;
3377            Ok(result)
3378        } else if actions.skip_anomaly_injection {
3379            warn!("Phase 5: Skipped due to resource degradation");
3380            Ok(AnomalyLabels::default())
3381        } else {
3382            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3383            Ok(AnomalyLabels::default())
3384        }
3385    }
3386
3387    /// Phase 6: Validate balance sheet equation on journal entries.
3388    fn phase_balance_validation(
3389        &mut self,
3390        entries: &[JournalEntry],
3391    ) -> SynthResult<BalanceValidationResult> {
3392        if self.phase_config.validate_balances && !entries.is_empty() {
3393            debug!("Phase 6: Validating Balances");
3394            let balance_validation = self.validate_journal_entries(entries)?;
3395            if balance_validation.is_balanced {
3396                debug!("Balance validation passed");
3397            } else {
3398                warn!(
3399                    "Balance validation found {} errors",
3400                    balance_validation.validation_errors.len()
3401                );
3402            }
3403            Ok(balance_validation)
3404        } else {
3405            Ok(BalanceValidationResult::default())
3406        }
3407    }
3408
3409    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3410    fn phase_data_quality_injection(
3411        &mut self,
3412        entries: &mut [JournalEntry],
3413        actions: &DegradationActions,
3414        stats: &mut EnhancedGenerationStatistics,
3415    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3416        if self.phase_config.inject_data_quality
3417            && !entries.is_empty()
3418            && !actions.skip_data_quality
3419        {
3420            info!("Phase 7: Injecting Data Quality Variations");
3421            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3422            stats.data_quality_issues = dq_stats.records_with_issues;
3423            info!("Injected {} data quality issues", stats.data_quality_issues);
3424            self.check_resources_with_log("post-data-quality")?;
3425            Ok((dq_stats, quality_issues))
3426        } else if actions.skip_data_quality {
3427            warn!("Phase 7: Skipped due to resource degradation");
3428            Ok((DataQualityStats::default(), Vec::new()))
3429        } else {
3430            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3431            Ok((DataQualityStats::default(), Vec::new()))
3432        }
3433    }
3434
3435    /// Phase 10b: Generate period-close journal entries.
3436    ///
3437    /// Generates:
3438    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3439    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3440    ///    for the configured period.
3441    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3442    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3443    ///    earnings via the Income Summary (3600) clearing account.
3444    fn phase_period_close(
3445        &mut self,
3446        entries: &mut Vec<JournalEntry>,
3447        subledger: &SubledgerSnapshot,
3448        stats: &mut EnhancedGenerationStatistics,
3449    ) -> SynthResult<()> {
3450        if !self.phase_config.generate_period_close || entries.is_empty() {
3451            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3452            return Ok(());
3453        }
3454
3455        info!("Phase 10b: Generating period-close journal entries");
3456
3457        use datasynth_core::accounts::{
3458            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3459        };
3460        use rust_decimal::Decimal;
3461
3462        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3463            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3464        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3465        // Posting date for close entries is the last day of the period
3466        let close_date = end_date - chrono::Days::new(1);
3467
3468        // Statutory tax rate (21% — configurable rates come in later tiers)
3469        let tax_rate = Decimal::new(21, 2); // 0.21
3470
3471        // Collect company codes from config
3472        let company_codes: Vec<String> = self
3473            .config
3474            .companies
3475            .iter()
3476            .map(|c| c.code.clone())
3477            .collect();
3478
3479        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3480        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3481        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3482
3483        // --- Depreciation JEs (per asset) ---
3484        // Compute period depreciation for each active fixed asset using straight-line method.
3485        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3486        let period_months = self.config.global.period_months;
3487        for asset in &subledger.fa_records {
3488            // Skip assets that are inactive / fully depreciated / non-depreciable
3489            use datasynth_core::models::subledger::fa::AssetStatus;
3490            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3491                continue;
3492            }
3493            let useful_life_months = asset.useful_life_months();
3494            if useful_life_months == 0 {
3495                // Land or CIP — not depreciated
3496                continue;
3497            }
3498            let salvage_value = asset.salvage_value();
3499            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3500            if depreciable_base == Decimal::ZERO {
3501                continue;
3502            }
3503            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3504                * Decimal::from(period_months))
3505            .round_dp(2);
3506            if period_depr <= Decimal::ZERO {
3507                continue;
3508            }
3509
3510            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3511            depr_header.document_type = "CL".to_string();
3512            depr_header.header_text = Some(format!(
3513                "Depreciation - {} {}",
3514                asset.asset_number, asset.description
3515            ));
3516            depr_header.created_by = "CLOSE_ENGINE".to_string();
3517            depr_header.source = TransactionSource::Automated;
3518            depr_header.business_process = Some(BusinessProcess::R2R);
3519
3520            let doc_id = depr_header.document_id;
3521            let mut depr_je = JournalEntry::new(depr_header);
3522
3523            // DR Depreciation Expense (6000)
3524            depr_je.add_line(JournalEntryLine::debit(
3525                doc_id,
3526                1,
3527                expense_accounts::DEPRECIATION.to_string(),
3528                period_depr,
3529            ));
3530            // CR Accumulated Depreciation (1510)
3531            depr_je.add_line(JournalEntryLine::credit(
3532                doc_id,
3533                2,
3534                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3535                period_depr,
3536            ));
3537
3538            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3539            close_jes.push(depr_je);
3540        }
3541
3542        if !subledger.fa_records.is_empty() {
3543            debug!(
3544                "Generated {} depreciation JEs from {} FA records",
3545                close_jes.len(),
3546                subledger.fa_records.len()
3547            );
3548        }
3549
3550        // --- Accrual entries (standard period-end accruals per company) ---
3551        // Generate standard accrued expense entries (utilities, rent, interest) using
3552        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3553        {
3554            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3555            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3556
3557            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3558            let accrual_items: &[(&str, &str, &str)] = &[
3559                ("Accrued Utilities", "6200", "2100"),
3560                ("Accrued Rent", "6300", "2100"),
3561                ("Accrued Interest", "6100", "2150"),
3562            ];
3563
3564            for company_code in &company_codes {
3565                // Estimate company revenue from existing JEs
3566                let company_revenue: Decimal = entries
3567                    .iter()
3568                    .filter(|e| e.header.company_code == *company_code)
3569                    .flat_map(|e| e.lines.iter())
3570                    .filter(|l| l.gl_account.starts_with('4'))
3571                    .map(|l| l.credit_amount - l.debit_amount)
3572                    .fold(Decimal::ZERO, |acc, v| acc + v);
3573
3574                if company_revenue <= Decimal::ZERO {
3575                    continue;
3576                }
3577
3578                // Use 0.5% of period revenue per accrual item as a proxy
3579                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3580                if accrual_base <= Decimal::ZERO {
3581                    continue;
3582                }
3583
3584                for (description, expense_acct, liability_acct) in accrual_items {
3585                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3586                        company_code,
3587                        description,
3588                        accrual_base,
3589                        expense_acct,
3590                        liability_acct,
3591                        close_date,
3592                        None,
3593                    );
3594                    close_jes.push(accrual_je);
3595                    if let Some(rev_je) = reversal_je {
3596                        close_jes.push(rev_je);
3597                    }
3598                }
3599            }
3600
3601            debug!(
3602                "Generated accrual entries for {} companies",
3603                company_codes.len()
3604            );
3605        }
3606
3607        for company_code in &company_codes {
3608            // Calculate net income for this company from existing JEs:
3609            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3610            // Revenue (4xxx): credit-normal, so net = credits - debits
3611            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3612            let mut total_revenue = Decimal::ZERO;
3613            let mut total_expenses = Decimal::ZERO;
3614
3615            for entry in entries.iter() {
3616                if entry.header.company_code != *company_code {
3617                    continue;
3618                }
3619                for line in &entry.lines {
3620                    let category = AccountCategory::from_account(&line.gl_account);
3621                    match category {
3622                        AccountCategory::Revenue => {
3623                            // Revenue is credit-normal: net revenue = credits - debits
3624                            total_revenue += line.credit_amount - line.debit_amount;
3625                        }
3626                        AccountCategory::Cogs
3627                        | AccountCategory::OperatingExpense
3628                        | AccountCategory::OtherIncomeExpense
3629                        | AccountCategory::Tax => {
3630                            // Expenses are debit-normal: net expense = debits - credits
3631                            total_expenses += line.debit_amount - line.credit_amount;
3632                        }
3633                        _ => {}
3634                    }
3635                }
3636            }
3637
3638            let pre_tax_income = total_revenue - total_expenses;
3639
3640            // Skip if no income statement activity
3641            if pre_tax_income == Decimal::ZERO {
3642                debug!(
3643                    "Company {}: no pre-tax income, skipping period close",
3644                    company_code
3645                );
3646                continue;
3647            }
3648
3649            // --- Tax provision / DTA JE ---
3650            if pre_tax_income > Decimal::ZERO {
3651                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3652                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3653
3654                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3655                tax_header.document_type = "CL".to_string();
3656                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3657                tax_header.created_by = "CLOSE_ENGINE".to_string();
3658                tax_header.source = TransactionSource::Automated;
3659                tax_header.business_process = Some(BusinessProcess::R2R);
3660
3661                let doc_id = tax_header.document_id;
3662                let mut tax_je = JournalEntry::new(tax_header);
3663
3664                // DR Tax Expense (8000)
3665                tax_je.add_line(JournalEntryLine::debit(
3666                    doc_id,
3667                    1,
3668                    tax_accounts::TAX_EXPENSE.to_string(),
3669                    tax_amount,
3670                ));
3671                // CR Income Tax Payable (2130)
3672                tax_je.add_line(JournalEntryLine::credit(
3673                    doc_id,
3674                    2,
3675                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3676                    tax_amount,
3677                ));
3678
3679                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3680                close_jes.push(tax_je);
3681            } else {
3682                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3683                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3684                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3685                if dta_amount > Decimal::ZERO {
3686                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3687                    dta_header.document_type = "CL".to_string();
3688                    dta_header.header_text =
3689                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3690                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3691                    dta_header.source = TransactionSource::Automated;
3692                    dta_header.business_process = Some(BusinessProcess::R2R);
3693
3694                    let doc_id = dta_header.document_id;
3695                    let mut dta_je = JournalEntry::new(dta_header);
3696
3697                    // DR Deferred Tax Asset (1600)
3698                    dta_je.add_line(JournalEntryLine::debit(
3699                        doc_id,
3700                        1,
3701                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3702                        dta_amount,
3703                    ));
3704                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3705                    // reflecting the benefit of the future deductible temporary difference.
3706                    dta_je.add_line(JournalEntryLine::credit(
3707                        doc_id,
3708                        2,
3709                        tax_accounts::TAX_EXPENSE.to_string(),
3710                        dta_amount,
3711                    ));
3712
3713                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3714                    close_jes.push(dta_je);
3715                    debug!(
3716                        "Company {}: loss year — recognised DTA of {}",
3717                        company_code, dta_amount
3718                    );
3719                }
3720            }
3721
3722            // --- Dividend JEs (v2.4) ---
3723            // If the entity is profitable after tax, declare a 10% dividend payout.
3724            // This runs AFTER tax provision so the dividend is based on post-tax income
3725            // but BEFORE the retained earnings close so the RE transfer reflects the
3726            // reduced balance.
3727            let tax_provision = if pre_tax_income > Decimal::ZERO {
3728                (pre_tax_income * tax_rate).round_dp(2)
3729            } else {
3730                Decimal::ZERO
3731            };
3732            let net_income = pre_tax_income - tax_provision;
3733
3734            if net_income > Decimal::ZERO {
3735                use datasynth_generators::DividendGenerator;
3736                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
3737                let mut div_gen = DividendGenerator::new(self.seed + 460);
3738                let currency_str = self
3739                    .config
3740                    .companies
3741                    .iter()
3742                    .find(|c| c.code == *company_code)
3743                    .map(|c| c.currency.as_str())
3744                    .unwrap_or("USD");
3745                let div_result = div_gen.generate(
3746                    company_code,
3747                    close_date,
3748                    Decimal::new(1, 0), // $1 per share placeholder
3749                    dividend_amount,
3750                    currency_str,
3751                );
3752                let div_je_count = div_result.journal_entries.len();
3753                close_jes.extend(div_result.journal_entries);
3754                debug!(
3755                    "Company {}: declared dividend of {} ({} JEs)",
3756                    company_code, dividend_amount, div_je_count
3757                );
3758            }
3759
3760            // --- Income statement closing JE ---
3761            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3762            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3763            // close the pre-tax loss into Retained Earnings as-is.
3764            if net_income != Decimal::ZERO {
3765                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3766                close_header.document_type = "CL".to_string();
3767                close_header.header_text =
3768                    Some(format!("Income statement close - {}", company_code));
3769                close_header.created_by = "CLOSE_ENGINE".to_string();
3770                close_header.source = TransactionSource::Automated;
3771                close_header.business_process = Some(BusinessProcess::R2R);
3772
3773                let doc_id = close_header.document_id;
3774                let mut close_je = JournalEntry::new(close_header);
3775
3776                let abs_net_income = net_income.abs();
3777
3778                if net_income > Decimal::ZERO {
3779                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3780                    close_je.add_line(JournalEntryLine::debit(
3781                        doc_id,
3782                        1,
3783                        equity_accounts::INCOME_SUMMARY.to_string(),
3784                        abs_net_income,
3785                    ));
3786                    close_je.add_line(JournalEntryLine::credit(
3787                        doc_id,
3788                        2,
3789                        equity_accounts::RETAINED_EARNINGS.to_string(),
3790                        abs_net_income,
3791                    ));
3792                } else {
3793                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3794                    close_je.add_line(JournalEntryLine::debit(
3795                        doc_id,
3796                        1,
3797                        equity_accounts::RETAINED_EARNINGS.to_string(),
3798                        abs_net_income,
3799                    ));
3800                    close_je.add_line(JournalEntryLine::credit(
3801                        doc_id,
3802                        2,
3803                        equity_accounts::INCOME_SUMMARY.to_string(),
3804                        abs_net_income,
3805                    ));
3806                }
3807
3808                debug_assert!(
3809                    close_je.is_balanced(),
3810                    "Income statement closing JE must be balanced"
3811                );
3812                close_jes.push(close_je);
3813            }
3814        }
3815
3816        let close_count = close_jes.len();
3817        if close_count > 0 {
3818            info!("Generated {} period-close journal entries", close_count);
3819            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3820            entries.extend(close_jes);
3821            stats.period_close_je_count = close_count;
3822
3823            // Update total entry/line-item stats
3824            stats.total_entries = entries.len() as u64;
3825            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3826        } else {
3827            debug!("No period-close entries generated (no income statement activity)");
3828        }
3829
3830        Ok(())
3831    }
3832
3833    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3834    fn phase_audit_data(
3835        &mut self,
3836        entries: &[JournalEntry],
3837        stats: &mut EnhancedGenerationStatistics,
3838    ) -> SynthResult<AuditSnapshot> {
3839        if self.phase_config.generate_audit {
3840            info!("Phase 8: Generating Audit Data");
3841            let audit_snapshot = self.generate_audit_data(entries)?;
3842            stats.audit_engagement_count = audit_snapshot.engagements.len();
3843            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3844            stats.audit_evidence_count = audit_snapshot.evidence.len();
3845            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3846            stats.audit_finding_count = audit_snapshot.findings.len();
3847            stats.audit_judgment_count = audit_snapshot.judgments.len();
3848            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3849            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3850            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3851            stats.audit_sample_count = audit_snapshot.samples.len();
3852            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3853            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3854            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3855            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3856            stats.audit_related_party_transaction_count =
3857                audit_snapshot.related_party_transactions.len();
3858            info!(
3859                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3860                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3861                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3862                 {} RP transactions",
3863                stats.audit_engagement_count,
3864                stats.audit_workpaper_count,
3865                stats.audit_evidence_count,
3866                stats.audit_risk_count,
3867                stats.audit_finding_count,
3868                stats.audit_judgment_count,
3869                stats.audit_confirmation_count,
3870                stats.audit_procedure_step_count,
3871                stats.audit_sample_count,
3872                stats.audit_analytical_result_count,
3873                stats.audit_ia_function_count,
3874                stats.audit_ia_report_count,
3875                stats.audit_related_party_count,
3876                stats.audit_related_party_transaction_count,
3877            );
3878            self.check_resources_with_log("post-audit")?;
3879            Ok(audit_snapshot)
3880        } else {
3881            debug!("Phase 8: Skipped (audit generation disabled)");
3882            Ok(AuditSnapshot::default())
3883        }
3884    }
3885
3886    /// Phase 9: Generate banking KYC/AML data.
3887    fn phase_banking_data(
3888        &mut self,
3889        stats: &mut EnhancedGenerationStatistics,
3890    ) -> SynthResult<BankingSnapshot> {
3891        if self.phase_config.generate_banking {
3892            info!("Phase 9: Generating Banking KYC/AML Data");
3893            let banking_snapshot = self.generate_banking_data()?;
3894            stats.banking_customer_count = banking_snapshot.customers.len();
3895            stats.banking_account_count = banking_snapshot.accounts.len();
3896            stats.banking_transaction_count = banking_snapshot.transactions.len();
3897            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3898            info!(
3899                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3900                stats.banking_customer_count, stats.banking_account_count,
3901                stats.banking_transaction_count, stats.banking_suspicious_count
3902            );
3903            self.check_resources_with_log("post-banking")?;
3904            Ok(banking_snapshot)
3905        } else {
3906            debug!("Phase 9: Skipped (banking generation disabled)");
3907            Ok(BankingSnapshot::default())
3908        }
3909    }
3910
3911    /// Phase 10: Export accounting network graphs for ML training.
3912    fn phase_graph_export(
3913        &mut self,
3914        entries: &[JournalEntry],
3915        coa: &Arc<ChartOfAccounts>,
3916        stats: &mut EnhancedGenerationStatistics,
3917    ) -> SynthResult<GraphExportSnapshot> {
3918        if self.phase_config.generate_graph_export && !entries.is_empty() {
3919            info!("Phase 10: Exporting Accounting Network Graphs");
3920            match self.export_graphs(entries, coa, stats) {
3921                Ok(snapshot) => {
3922                    info!(
3923                        "Graph export complete: {} graphs ({} nodes, {} edges)",
3924                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
3925                    );
3926                    Ok(snapshot)
3927                }
3928                Err(e) => {
3929                    warn!("Phase 10: Graph export failed: {}", e);
3930                    Ok(GraphExportSnapshot::default())
3931                }
3932            }
3933        } else {
3934            debug!("Phase 10: Skipped (graph export disabled or no entries)");
3935            Ok(GraphExportSnapshot::default())
3936        }
3937    }
3938
3939    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
3940    #[allow(clippy::too_many_arguments)]
3941    fn phase_hypergraph_export(
3942        &self,
3943        coa: &Arc<ChartOfAccounts>,
3944        entries: &[JournalEntry],
3945        document_flows: &DocumentFlowSnapshot,
3946        sourcing: &SourcingSnapshot,
3947        hr: &HrSnapshot,
3948        manufacturing: &ManufacturingSnapshot,
3949        banking: &BankingSnapshot,
3950        audit: &AuditSnapshot,
3951        financial_reporting: &FinancialReportingSnapshot,
3952        ocpm: &OcpmSnapshot,
3953        compliance: &ComplianceRegulationsSnapshot,
3954        stats: &mut EnhancedGenerationStatistics,
3955    ) -> SynthResult<()> {
3956        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
3957            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
3958            match self.export_hypergraph(
3959                coa,
3960                entries,
3961                document_flows,
3962                sourcing,
3963                hr,
3964                manufacturing,
3965                banking,
3966                audit,
3967                financial_reporting,
3968                ocpm,
3969                compliance,
3970                stats,
3971            ) {
3972                Ok(info) => {
3973                    info!(
3974                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
3975                        info.node_count, info.edge_count, info.hyperedge_count
3976                    );
3977                }
3978                Err(e) => {
3979                    warn!("Phase 10b: Hypergraph export failed: {}", e);
3980                }
3981            }
3982        } else {
3983            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
3984        }
3985        Ok(())
3986    }
3987
3988    /// Phase 11: LLM Enrichment.
3989    ///
3990    /// Uses an LLM provider (mock by default) to enrich vendor names with
3991    /// realistic, context-aware names. This phase is non-blocking: failures
3992    /// log a warning but do not stop the generation pipeline.
3993    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
3994        if !self.config.llm.enabled {
3995            debug!("Phase 11: Skipped (LLM enrichment disabled)");
3996            return;
3997        }
3998
3999        info!("Phase 11: Starting LLM Enrichment");
4000        let start = std::time::Instant::now();
4001
4002        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4003            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4004            // and the corresponding API key environment variable is present.
4005            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4006                let schema_provider = &self.config.llm.provider;
4007                let api_key_env = match schema_provider.as_str() {
4008                    "openai" => Some("OPENAI_API_KEY"),
4009                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4010                    "custom" => Some("LLM_API_KEY"),
4011                    _ => None,
4012                };
4013                if let Some(key_env) = api_key_env {
4014                    if std::env::var(key_env).is_ok() {
4015                        let llm_config = datasynth_core::llm::LlmConfig {
4016                            model: self.config.llm.model.clone(),
4017                            api_key_env: key_env.to_string(),
4018                            ..datasynth_core::llm::LlmConfig::default()
4019                        };
4020                        match HttpLlmProvider::new(llm_config) {
4021                            Ok(p) => Arc::new(p),
4022                            Err(e) => {
4023                                warn!(
4024                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4025                                    e
4026                                );
4027                                Arc::new(MockLlmProvider::new(self.seed))
4028                            }
4029                        }
4030                    } else {
4031                        Arc::new(MockLlmProvider::new(self.seed))
4032                    }
4033                } else {
4034                    Arc::new(MockLlmProvider::new(self.seed))
4035                }
4036            };
4037            let enricher = VendorLlmEnricher::new(provider);
4038
4039            let industry = format!("{:?}", self.config.global.industry);
4040            let max_enrichments = self
4041                .config
4042                .llm
4043                .max_vendor_enrichments
4044                .min(self.master_data.vendors.len());
4045
4046            let mut enriched_count = 0usize;
4047            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4048                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4049                    Ok(name) => {
4050                        vendor.name = name;
4051                        enriched_count += 1;
4052                    }
4053                    Err(e) => {
4054                        warn!(
4055                            "LLM vendor enrichment failed for {}: {}",
4056                            vendor.vendor_id, e
4057                        );
4058                    }
4059                }
4060            }
4061
4062            enriched_count
4063        }));
4064
4065        match result {
4066            Ok(enriched_count) => {
4067                stats.llm_vendors_enriched = enriched_count;
4068                let elapsed = start.elapsed();
4069                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4070                info!(
4071                    "Phase 11 complete: {} vendors enriched in {}ms",
4072                    enriched_count, stats.llm_enrichment_ms
4073                );
4074            }
4075            Err(_) => {
4076                let elapsed = start.elapsed();
4077                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4078                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4079            }
4080        }
4081    }
4082
4083    /// Phase 12: Diffusion Enhancement.
4084    ///
4085    /// Generates a sample set using the statistical diffusion backend to
4086    /// demonstrate distribution-matching data generation. This phase is
4087    /// non-blocking: failures log a warning but do not stop the pipeline.
4088    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4089        if !self.config.diffusion.enabled {
4090            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4091            return;
4092        }
4093
4094        info!("Phase 12: Starting Diffusion Enhancement");
4095        let start = std::time::Instant::now();
4096
4097        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4098            // Target distribution: transaction amounts (log-normal-like)
4099            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4100            let stds = vec![2000.0, 1.5, 1.0];
4101
4102            let diffusion_config = DiffusionConfig {
4103                n_steps: self.config.diffusion.n_steps,
4104                seed: self.seed,
4105                ..Default::default()
4106            };
4107
4108            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4109
4110            let n_samples = self.config.diffusion.sample_size;
4111            let n_features = 3; // amount, line_items, approval_level
4112            let samples = backend.generate(n_samples, n_features, self.seed);
4113
4114            samples.len()
4115        }));
4116
4117        match result {
4118            Ok(sample_count) => {
4119                stats.diffusion_samples_generated = sample_count;
4120                let elapsed = start.elapsed();
4121                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4122                info!(
4123                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4124                    sample_count, stats.diffusion_enhancement_ms
4125                );
4126            }
4127            Err(_) => {
4128                let elapsed = start.elapsed();
4129                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4130                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4131            }
4132        }
4133    }
4134
4135    /// Phase 13: Causal Overlay.
4136    ///
4137    /// Builds a structural causal model from a built-in template (e.g.,
4138    /// fraud_detection) and generates causal samples. Optionally validates
4139    /// that the output respects the causal structure. This phase is
4140    /// non-blocking: failures log a warning but do not stop the pipeline.
4141    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4142        if !self.config.causal.enabled {
4143            debug!("Phase 13: Skipped (causal generation disabled)");
4144            return;
4145        }
4146
4147        info!("Phase 13: Starting Causal Overlay");
4148        let start = std::time::Instant::now();
4149
4150        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4151            // Select template based on config
4152            let graph = match self.config.causal.template.as_str() {
4153                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4154                _ => CausalGraph::fraud_detection_template(),
4155            };
4156
4157            let scm = StructuralCausalModel::new(graph.clone())
4158                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4159
4160            let n_samples = self.config.causal.sample_size;
4161            let samples = scm
4162                .generate(n_samples, self.seed)
4163                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4164
4165            // Optionally validate causal structure
4166            let validation_passed = if self.config.causal.validate {
4167                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4168                if report.valid {
4169                    info!(
4170                        "Causal validation passed: all {} checks OK",
4171                        report.checks.len()
4172                    );
4173                } else {
4174                    warn!(
4175                        "Causal validation: {} violations detected: {:?}",
4176                        report.violations.len(),
4177                        report.violations
4178                    );
4179                }
4180                Some(report.valid)
4181            } else {
4182                None
4183            };
4184
4185            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4186        }));
4187
4188        match result {
4189            Ok(Ok((sample_count, validation_passed))) => {
4190                stats.causal_samples_generated = sample_count;
4191                stats.causal_validation_passed = validation_passed;
4192                let elapsed = start.elapsed();
4193                stats.causal_generation_ms = elapsed.as_millis() as u64;
4194                info!(
4195                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4196                    sample_count, stats.causal_generation_ms, validation_passed,
4197                );
4198            }
4199            Ok(Err(e)) => {
4200                let elapsed = start.elapsed();
4201                stats.causal_generation_ms = elapsed.as_millis() as u64;
4202                warn!("Phase 13: Causal generation failed: {}", e);
4203            }
4204            Err(_) => {
4205                let elapsed = start.elapsed();
4206                stats.causal_generation_ms = elapsed.as_millis() as u64;
4207                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4208            }
4209        }
4210    }
4211
4212    /// Phase 14: Generate S2C sourcing data.
4213    fn phase_sourcing_data(
4214        &mut self,
4215        stats: &mut EnhancedGenerationStatistics,
4216    ) -> SynthResult<SourcingSnapshot> {
4217        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4218            debug!("Phase 14: Skipped (sourcing generation disabled)");
4219            return Ok(SourcingSnapshot::default());
4220        }
4221        let degradation = self.check_resources()?;
4222        if degradation >= DegradationLevel::Reduced {
4223            debug!(
4224                "Phase skipped due to resource pressure (degradation: {:?})",
4225                degradation
4226            );
4227            return Ok(SourcingSnapshot::default());
4228        }
4229
4230        info!("Phase 14: Generating S2C Sourcing Data");
4231        let seed = self.seed;
4232
4233        // Gather vendor data from master data
4234        let vendor_ids: Vec<String> = self
4235            .master_data
4236            .vendors
4237            .iter()
4238            .map(|v| v.vendor_id.clone())
4239            .collect();
4240        if vendor_ids.is_empty() {
4241            debug!("Phase 14: Skipped (no vendors available)");
4242            return Ok(SourcingSnapshot::default());
4243        }
4244
4245        let categories: Vec<(String, String)> = vec![
4246            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4247            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4248            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4249            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4250            ("CAT-LOG".to_string(), "Logistics".to_string()),
4251        ];
4252        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4253            .iter()
4254            .map(|(id, name)| {
4255                (
4256                    id.clone(),
4257                    name.clone(),
4258                    rust_decimal::Decimal::from(100_000),
4259                )
4260            })
4261            .collect();
4262
4263        let company_code = self
4264            .config
4265            .companies
4266            .first()
4267            .map(|c| c.code.as_str())
4268            .unwrap_or("1000");
4269        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4270            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4271        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4272        let fiscal_year = start_date.year() as u16;
4273        let owner_ids: Vec<String> = self
4274            .master_data
4275            .employees
4276            .iter()
4277            .take(5)
4278            .map(|e| e.employee_id.clone())
4279            .collect();
4280        let owner_id = owner_ids
4281            .first()
4282            .map(std::string::String::as_str)
4283            .unwrap_or("BUYER-001");
4284
4285        // Step 1: Spend Analysis
4286        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4287        let spend_analyses =
4288            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4289
4290        // Step 2: Sourcing Projects
4291        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4292        let sourcing_projects = if owner_ids.is_empty() {
4293            Vec::new()
4294        } else {
4295            project_gen.generate(
4296                company_code,
4297                &categories_with_spend,
4298                &owner_ids,
4299                start_date,
4300                self.config.global.period_months,
4301            )
4302        };
4303        stats.sourcing_project_count = sourcing_projects.len();
4304
4305        // Step 3: Qualifications
4306        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4307        let mut qual_gen = QualificationGenerator::new(seed + 2);
4308        let qualifications = qual_gen.generate(
4309            company_code,
4310            &qual_vendor_ids,
4311            sourcing_projects.first().map(|p| p.project_id.as_str()),
4312            owner_id,
4313            start_date,
4314        );
4315
4316        // Step 4: RFx Events
4317        let mut rfx_gen = RfxGenerator::new(seed + 3);
4318        let rfx_events: Vec<RfxEvent> = sourcing_projects
4319            .iter()
4320            .map(|proj| {
4321                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4322                rfx_gen.generate(
4323                    company_code,
4324                    &proj.project_id,
4325                    &proj.category_id,
4326                    &qualified_vids,
4327                    owner_id,
4328                    start_date,
4329                    50000.0,
4330                )
4331            })
4332            .collect();
4333        stats.rfx_event_count = rfx_events.len();
4334
4335        // Step 5: Bids
4336        let mut bid_gen = BidGenerator::new(seed + 4);
4337        let mut all_bids = Vec::new();
4338        for rfx in &rfx_events {
4339            let bidder_count = vendor_ids.len().clamp(2, 5);
4340            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4341            let bids = bid_gen.generate(rfx, &responding, start_date);
4342            all_bids.extend(bids);
4343        }
4344        stats.bid_count = all_bids.len();
4345
4346        // Step 6: Bid Evaluations
4347        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4348        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4349            .iter()
4350            .map(|rfx| {
4351                let rfx_bids: Vec<SupplierBid> = all_bids
4352                    .iter()
4353                    .filter(|b| b.rfx_id == rfx.rfx_id)
4354                    .cloned()
4355                    .collect();
4356                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4357            })
4358            .collect();
4359
4360        // Step 7: Contracts from winning bids
4361        let mut contract_gen = ContractGenerator::new(seed + 6);
4362        let contracts: Vec<ProcurementContract> = bid_evaluations
4363            .iter()
4364            .zip(rfx_events.iter())
4365            .filter_map(|(eval, rfx)| {
4366                eval.ranked_bids.first().and_then(|winner| {
4367                    all_bids
4368                        .iter()
4369                        .find(|b| b.bid_id == winner.bid_id)
4370                        .map(|winning_bid| {
4371                            contract_gen.generate_from_bid(
4372                                winning_bid,
4373                                Some(&rfx.sourcing_project_id),
4374                                &rfx.category_id,
4375                                owner_id,
4376                                start_date,
4377                            )
4378                        })
4379                })
4380            })
4381            .collect();
4382        stats.contract_count = contracts.len();
4383
4384        // Step 8: Catalog Items
4385        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4386        let catalog_items = catalog_gen.generate(&contracts);
4387        stats.catalog_item_count = catalog_items.len();
4388
4389        // Step 9: Scorecards
4390        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4391        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4392            .iter()
4393            .fold(
4394                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4395                |mut acc, c| {
4396                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4397                    acc
4398                },
4399            )
4400            .into_iter()
4401            .collect();
4402        let scorecards = scorecard_gen.generate(
4403            company_code,
4404            &vendor_contracts,
4405            start_date,
4406            end_date,
4407            owner_id,
4408        );
4409        stats.scorecard_count = scorecards.len();
4410
4411        // Back-populate cross-references on sourcing projects (Task 35)
4412        // Link each project to its RFx events, contracts, and spend analyses
4413        let mut sourcing_projects = sourcing_projects;
4414        for project in &mut sourcing_projects {
4415            // Link RFx events generated for this project
4416            project.rfx_ids = rfx_events
4417                .iter()
4418                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4419                .map(|rfx| rfx.rfx_id.clone())
4420                .collect();
4421
4422            // Link contract awarded from this project's RFx
4423            project.contract_id = contracts
4424                .iter()
4425                .find(|c| {
4426                    c.sourcing_project_id
4427                        .as_deref()
4428                        .is_some_and(|sp| sp == project.project_id)
4429                })
4430                .map(|c| c.contract_id.clone());
4431
4432            // Link spend analysis for matching category (use category_id as the reference)
4433            project.spend_analysis_id = spend_analyses
4434                .iter()
4435                .find(|sa| sa.category_id == project.category_id)
4436                .map(|sa| sa.category_id.clone());
4437        }
4438
4439        info!(
4440            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4441            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4442            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4443        );
4444        self.check_resources_with_log("post-sourcing")?;
4445
4446        Ok(SourcingSnapshot {
4447            spend_analyses,
4448            sourcing_projects,
4449            qualifications,
4450            rfx_events,
4451            bids: all_bids,
4452            bid_evaluations,
4453            contracts,
4454            catalog_items,
4455            scorecards,
4456        })
4457    }
4458
4459    /// Build a [`GroupStructure`] from the current company configuration.
4460    ///
4461    /// The first company in the configuration is treated as the ultimate parent.
4462    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4463    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4464    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4465        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4466
4467        let parent_code = self
4468            .config
4469            .companies
4470            .first()
4471            .map(|c| c.code.clone())
4472            .unwrap_or_else(|| "PARENT".to_string());
4473
4474        let mut group = GroupStructure::new(parent_code);
4475
4476        for company in self.config.companies.iter().skip(1) {
4477            let sub =
4478                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4479            group.add_subsidiary(sub);
4480        }
4481
4482        group
4483    }
4484
4485    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4486    fn phase_intercompany(
4487        &mut self,
4488        journal_entries: &[JournalEntry],
4489        stats: &mut EnhancedGenerationStatistics,
4490    ) -> SynthResult<IntercompanySnapshot> {
4491        // Skip if intercompany is disabled in config
4492        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4493            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4494            return Ok(IntercompanySnapshot::default());
4495        }
4496
4497        // Intercompany requires at least 2 companies
4498        if self.config.companies.len() < 2 {
4499            debug!(
4500                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4501                self.config.companies.len()
4502            );
4503            return Ok(IntercompanySnapshot::default());
4504        }
4505
4506        info!("Phase 14b: Generating Intercompany Transactions");
4507
4508        // Build the group structure early — used by ISA 600 component auditor scope
4509        // and consolidated financial statement generators downstream.
4510        let group_structure = self.build_group_structure();
4511        debug!(
4512            "Group structure built: parent={}, subsidiaries={}",
4513            group_structure.parent_entity,
4514            group_structure.subsidiaries.len()
4515        );
4516
4517        let seed = self.seed;
4518        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4519            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4520        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4521
4522        // Build ownership structure from company configs
4523        // First company is treated as the parent, remaining are subsidiaries
4524        let parent_code = self.config.companies[0].code.clone();
4525        let mut ownership_structure =
4526            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4527
4528        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4529            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4530                format!("REL{:03}", i + 1),
4531                parent_code.clone(),
4532                company.code.clone(),
4533                rust_decimal::Decimal::from(100), // Default 100% ownership
4534                start_date,
4535            );
4536            ownership_structure.add_relationship(relationship);
4537        }
4538
4539        // Convert config transfer pricing method to core model enum
4540        let tp_method = match self.config.intercompany.transfer_pricing_method {
4541            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4542                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4543            }
4544            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4545                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4546            }
4547            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4548                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4549            }
4550            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4551                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4552            }
4553            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4554                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4555            }
4556        };
4557
4558        // Build IC generator config from schema config
4559        let ic_currency = self
4560            .config
4561            .companies
4562            .first()
4563            .map(|c| c.currency.clone())
4564            .unwrap_or_else(|| "USD".to_string());
4565        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4566            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4567            transfer_pricing_method: tp_method,
4568            markup_percent: rust_decimal::Decimal::from_f64_retain(
4569                self.config.intercompany.markup_percent,
4570            )
4571            .unwrap_or(rust_decimal::Decimal::from(5)),
4572            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4573            default_currency: ic_currency,
4574            ..Default::default()
4575        };
4576
4577        // Create IC generator
4578        let mut ic_generator = datasynth_generators::ICGenerator::new(
4579            ic_gen_config,
4580            ownership_structure.clone(),
4581            seed + 50,
4582        );
4583
4584        // Generate IC transactions for the period
4585        // Use ~3 transactions per day as a reasonable default
4586        let transactions_per_day = 3;
4587        let matched_pairs = ic_generator.generate_transactions_for_period(
4588            start_date,
4589            end_date,
4590            transactions_per_day,
4591        );
4592
4593        // Generate IC source P2P/O2C documents
4594        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4595        debug!(
4596            "Generated {} IC seller invoices, {} IC buyer POs",
4597            ic_doc_chains.seller_invoices.len(),
4598            ic_doc_chains.buyer_orders.len()
4599        );
4600
4601        // Generate journal entries from matched pairs
4602        let mut seller_entries = Vec::new();
4603        let mut buyer_entries = Vec::new();
4604        let fiscal_year = start_date.year();
4605
4606        for pair in &matched_pairs {
4607            let fiscal_period = pair.posting_date.month();
4608            let (seller_je, buyer_je) =
4609                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4610            seller_entries.push(seller_je);
4611            buyer_entries.push(buyer_je);
4612        }
4613
4614        // Run matching engine
4615        let matching_config = datasynth_generators::ICMatchingConfig {
4616            base_currency: self
4617                .config
4618                .companies
4619                .first()
4620                .map(|c| c.currency.clone())
4621                .unwrap_or_else(|| "USD".to_string()),
4622            ..Default::default()
4623        };
4624        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4625        matching_engine.load_matched_pairs(&matched_pairs);
4626        let matching_result = matching_engine.run_matching(end_date);
4627
4628        // Generate elimination entries if configured
4629        let mut elimination_entries = Vec::new();
4630        if self.config.intercompany.generate_eliminations {
4631            let elim_config = datasynth_generators::EliminationConfig {
4632                consolidation_entity: "GROUP".to_string(),
4633                base_currency: self
4634                    .config
4635                    .companies
4636                    .first()
4637                    .map(|c| c.currency.clone())
4638                    .unwrap_or_else(|| "USD".to_string()),
4639                ..Default::default()
4640            };
4641
4642            let mut elim_generator =
4643                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4644
4645            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4646            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4647                matching_result
4648                    .matched_balances
4649                    .iter()
4650                    .chain(matching_result.unmatched_balances.iter())
4651                    .cloned()
4652                    .collect();
4653
4654            // Build investment and equity maps from the group structure so that the
4655            // elimination generator can produce equity-investment elimination entries
4656            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4657            //
4658            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4659            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4660            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4661            //
4662            // Net assets are derived from the journal entries using account-range heuristics:
4663            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4664            // no JE data is available (IC phase runs early in the generation pipeline).
4665            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4666                std::collections::HashMap::new();
4667            let mut equity_amounts: std::collections::HashMap<
4668                String,
4669                std::collections::HashMap<String, rust_decimal::Decimal>,
4670            > = std::collections::HashMap::new();
4671            {
4672                use rust_decimal::Decimal;
4673                let hundred = Decimal::from(100u32);
4674                let ten_pct = Decimal::new(10, 2); // 0.10
4675                let thirty_pct = Decimal::new(30, 2); // 0.30
4676                let sixty_pct = Decimal::new(60, 2); // 0.60
4677                let parent_code = &group_structure.parent_entity;
4678                for sub in &group_structure.subsidiaries {
4679                    let net_assets = {
4680                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4681                        if na > Decimal::ZERO {
4682                            na
4683                        } else {
4684                            Decimal::from(1_000_000u64)
4685                        }
4686                    };
4687                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4688                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4689                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4690
4691                    // Split subsidiary equity into conventional components:
4692                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4693                    let mut eq_map = std::collections::HashMap::new();
4694                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4695                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4696                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4697                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4698                }
4699            }
4700
4701            let journal = elim_generator.generate_eliminations(
4702                &fiscal_period,
4703                end_date,
4704                &all_balances,
4705                &matched_pairs,
4706                &investment_amounts,
4707                &equity_amounts,
4708            );
4709
4710            elimination_entries = journal.entries.clone();
4711        }
4712
4713        let matched_pair_count = matched_pairs.len();
4714        let elimination_entry_count = elimination_entries.len();
4715        let match_rate = matching_result.match_rate;
4716
4717        stats.ic_matched_pair_count = matched_pair_count;
4718        stats.ic_elimination_count = elimination_entry_count;
4719        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4720
4721        info!(
4722            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4723            matched_pair_count,
4724            stats.ic_transaction_count,
4725            seller_entries.len(),
4726            buyer_entries.len(),
4727            elimination_entry_count,
4728            match_rate * 100.0
4729        );
4730        self.check_resources_with_log("post-intercompany")?;
4731
4732        // ----------------------------------------------------------------
4733        // NCI measurements: derive from group structure ownership percentages
4734        // ----------------------------------------------------------------
4735        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4736            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4737            use rust_decimal::Decimal;
4738
4739            let eight_pct = Decimal::new(8, 2); // 0.08
4740
4741            group_structure
4742                .subsidiaries
4743                .iter()
4744                .filter(|sub| {
4745                    sub.nci_percentage > Decimal::ZERO
4746                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4747                })
4748                .map(|sub| {
4749                    // Compute net assets from actual journal entries for this subsidiary.
4750                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4751                    // IC phase runs before the main JE batch has been populated).
4752                    let net_assets_from_jes =
4753                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4754
4755                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4756                        net_assets_from_jes.round_dp(2)
4757                    } else {
4758                        // Fallback: use a plausible base amount
4759                        Decimal::from(1_000_000u64)
4760                    };
4761
4762                    // Net income approximated as 8% of net assets
4763                    let net_income = (net_assets * eight_pct).round_dp(2);
4764
4765                    NciMeasurement::compute(
4766                        sub.entity_code.clone(),
4767                        sub.nci_percentage,
4768                        net_assets,
4769                        net_income,
4770                    )
4771                })
4772                .collect()
4773        };
4774
4775        if !nci_measurements.is_empty() {
4776            info!(
4777                "NCI measurements: {} subsidiaries with non-controlling interests",
4778                nci_measurements.len()
4779            );
4780        }
4781
4782        Ok(IntercompanySnapshot {
4783            group_structure: Some(group_structure),
4784            matched_pairs,
4785            seller_journal_entries: seller_entries,
4786            buyer_journal_entries: buyer_entries,
4787            elimination_entries,
4788            nci_measurements,
4789            ic_document_chains: Some(ic_doc_chains),
4790            matched_pair_count,
4791            elimination_entry_count,
4792            match_rate,
4793        })
4794    }
4795
4796    /// Phase 15: Generate bank reconciliations and financial statements.
4797    fn phase_financial_reporting(
4798        &mut self,
4799        document_flows: &DocumentFlowSnapshot,
4800        journal_entries: &[JournalEntry],
4801        coa: &Arc<ChartOfAccounts>,
4802        _hr: &HrSnapshot,
4803        _audit: &AuditSnapshot,
4804        stats: &mut EnhancedGenerationStatistics,
4805    ) -> SynthResult<FinancialReportingSnapshot> {
4806        let fs_enabled = self.phase_config.generate_financial_statements
4807            || self.config.financial_reporting.enabled;
4808        let br_enabled = self.phase_config.generate_bank_reconciliation;
4809
4810        if !fs_enabled && !br_enabled {
4811            debug!("Phase 15: Skipped (financial reporting disabled)");
4812            return Ok(FinancialReportingSnapshot::default());
4813        }
4814
4815        info!("Phase 15: Generating Financial Reporting Data");
4816
4817        let seed = self.seed;
4818        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4819            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4820
4821        let mut financial_statements = Vec::new();
4822        let mut bank_reconciliations = Vec::new();
4823        let mut trial_balances = Vec::new();
4824        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4825        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4826            Vec::new();
4827        // Standalone statements keyed by entity code
4828        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4829            std::collections::HashMap::new();
4830        // Consolidated statements (one per period)
4831        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4832        // Consolidation schedules (one per period)
4833        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4834
4835        // Generate financial statements from JE-derived trial balances.
4836        //
4837        // When journal entries are available, we use cumulative trial balances for
4838        // balance sheet accounts and current-period trial balances for income
4839        // statement accounts. We also track prior-period trial balances so the
4840        // generator can produce comparative amounts, and we build a proper
4841        // cash flow statement from working capital changes rather than random data.
4842        if fs_enabled {
4843            let has_journal_entries = !journal_entries.is_empty();
4844
4845            // Use FinancialStatementGenerator for balance sheet and income statement,
4846            // but build cash flow ourselves from TB data when JEs are available.
4847            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4848            // Separate generator for consolidated statements (different seed offset)
4849            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4850
4851            // Collect elimination JEs once (reused across periods)
4852            let elimination_entries: Vec<&JournalEntry> = journal_entries
4853                .iter()
4854                .filter(|je| je.header.is_elimination)
4855                .collect();
4856
4857            // Generate one set of statements per period, per entity
4858            for period in 0..self.config.global.period_months {
4859                let period_start = start_date + chrono::Months::new(period);
4860                let period_end =
4861                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4862                let fiscal_year = period_end.year() as u16;
4863                let fiscal_period = period_end.month() as u8;
4864                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4865
4866                // Build per-entity trial balances for this period (non-elimination JEs)
4867                // We accumulate them for the consolidation step.
4868                let mut entity_tb_map: std::collections::HashMap<
4869                    String,
4870                    std::collections::HashMap<String, rust_decimal::Decimal>,
4871                > = std::collections::HashMap::new();
4872
4873                // --- Standalone: one set of statements per company ---
4874                for (company_idx, company) in self.config.companies.iter().enumerate() {
4875                    let company_code = company.code.as_str();
4876                    let currency = company.currency.as_str();
4877                    // Use a unique seed offset per company to keep statements deterministic
4878                    // and distinct across companies
4879                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4880                    let mut company_fs_gen =
4881                        FinancialStatementGenerator::new(seed + company_seed_offset);
4882
4883                    if has_journal_entries {
4884                        let tb_entries = Self::build_cumulative_trial_balance(
4885                            journal_entries,
4886                            coa,
4887                            company_code,
4888                            start_date,
4889                            period_end,
4890                            fiscal_year,
4891                            fiscal_period,
4892                        );
4893
4894                        // Accumulate per-entity category balances for consolidation
4895                        let entity_cat_map =
4896                            entity_tb_map.entry(company_code.to_string()).or_default();
4897                        for tb_entry in &tb_entries {
4898                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4899                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4900                        }
4901
4902                        let stmts = company_fs_gen.generate(
4903                            company_code,
4904                            currency,
4905                            &tb_entries,
4906                            period_start,
4907                            period_end,
4908                            fiscal_year,
4909                            fiscal_period,
4910                            None,
4911                            "SYS-AUTOCLOSE",
4912                        );
4913
4914                        let mut entity_stmts = Vec::new();
4915                        for stmt in stmts {
4916                            if stmt.statement_type == StatementType::CashFlowStatement {
4917                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4918                                let cf_items = Self::build_cash_flow_from_trial_balances(
4919                                    &tb_entries,
4920                                    None,
4921                                    net_income,
4922                                );
4923                                entity_stmts.push(FinancialStatement {
4924                                    cash_flow_items: cf_items,
4925                                    ..stmt
4926                                });
4927                            } else {
4928                                entity_stmts.push(stmt);
4929                            }
4930                        }
4931
4932                        // Add to the flat financial_statements list (used by KPI/budget)
4933                        financial_statements.extend(entity_stmts.clone());
4934
4935                        // Store standalone per-entity
4936                        standalone_statements
4937                            .entry(company_code.to_string())
4938                            .or_default()
4939                            .extend(entity_stmts);
4940
4941                        // Only store trial balance for the first company in the period
4942                        // to avoid duplicates in the trial_balances list
4943                        if company_idx == 0 {
4944                            trial_balances.push(PeriodTrialBalance {
4945                                fiscal_year,
4946                                fiscal_period,
4947                                period_start,
4948                                period_end,
4949                                entries: tb_entries,
4950                            });
4951                        }
4952                    } else {
4953                        // Fallback: no JEs available
4954                        let tb_entries = Self::build_trial_balance_from_entries(
4955                            journal_entries,
4956                            coa,
4957                            company_code,
4958                            fiscal_year,
4959                            fiscal_period,
4960                        );
4961
4962                        let stmts = company_fs_gen.generate(
4963                            company_code,
4964                            currency,
4965                            &tb_entries,
4966                            period_start,
4967                            period_end,
4968                            fiscal_year,
4969                            fiscal_period,
4970                            None,
4971                            "SYS-AUTOCLOSE",
4972                        );
4973                        financial_statements.extend(stmts.clone());
4974                        standalone_statements
4975                            .entry(company_code.to_string())
4976                            .or_default()
4977                            .extend(stmts);
4978
4979                        if company_idx == 0 && !tb_entries.is_empty() {
4980                            trial_balances.push(PeriodTrialBalance {
4981                                fiscal_year,
4982                                fiscal_period,
4983                                period_start,
4984                                period_end,
4985                                entries: tb_entries,
4986                            });
4987                        }
4988                    }
4989                }
4990
4991                // --- Consolidated: aggregate all entities + apply eliminations ---
4992                // Use the primary (first) company's currency for the consolidated statement
4993                let group_currency = self
4994                    .config
4995                    .companies
4996                    .first()
4997                    .map(|c| c.currency.as_str())
4998                    .unwrap_or("USD");
4999
5000                // Build owned elimination entries for this period
5001                let period_eliminations: Vec<JournalEntry> = elimination_entries
5002                    .iter()
5003                    .filter(|je| {
5004                        je.header.fiscal_year == fiscal_year
5005                            && je.header.fiscal_period == fiscal_period
5006                    })
5007                    .map(|je| (*je).clone())
5008                    .collect();
5009
5010                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5011                    &entity_tb_map,
5012                    &period_eliminations,
5013                    &period_label,
5014                );
5015
5016                // Build a pseudo trial balance from consolidated line items for the
5017                // FinancialStatementGenerator to use (only for cash flow direction).
5018                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5019                    .line_items
5020                    .iter()
5021                    .map(|li| {
5022                        let net = li.post_elimination_total;
5023                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5024                            (net, rust_decimal::Decimal::ZERO)
5025                        } else {
5026                            (rust_decimal::Decimal::ZERO, -net)
5027                        };
5028                        datasynth_generators::TrialBalanceEntry {
5029                            account_code: li.account_category.clone(),
5030                            account_name: li.account_category.clone(),
5031                            category: li.account_category.clone(),
5032                            debit_balance: debit,
5033                            credit_balance: credit,
5034                        }
5035                    })
5036                    .collect();
5037
5038                let mut cons_stmts = cons_gen.generate(
5039                    "GROUP",
5040                    group_currency,
5041                    &cons_tb,
5042                    period_start,
5043                    period_end,
5044                    fiscal_year,
5045                    fiscal_period,
5046                    None,
5047                    "SYS-AUTOCLOSE",
5048                );
5049
5050                // Split consolidated line items by statement type.
5051                // The consolidation generator returns BS items first, then IS items,
5052                // identified by their CONS- prefix and category.
5053                let bs_categories: &[&str] = &[
5054                    "CASH",
5055                    "RECEIVABLES",
5056                    "INVENTORY",
5057                    "FIXEDASSETS",
5058                    "PAYABLES",
5059                    "ACCRUEDLIABILITIES",
5060                    "LONGTERMDEBT",
5061                    "EQUITY",
5062                ];
5063                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5064                    cons_line_items.into_iter().partition(|li| {
5065                        let upper = li.label.to_uppercase();
5066                        bs_categories.iter().any(|c| upper == *c)
5067                    });
5068
5069                for stmt in &mut cons_stmts {
5070                    stmt.is_consolidated = true;
5071                    match stmt.statement_type {
5072                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5073                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5074                        _ => {} // CF and equity change statements keep generator output
5075                    }
5076                }
5077
5078                consolidated_statements.extend(cons_stmts);
5079                consolidation_schedules.push(schedule);
5080            }
5081
5082            // Backward compat: if only 1 company, use existing code path logic
5083            // (prior_cumulative_tb for comparative amounts). Already handled above;
5084            // the prior_ref is omitted to keep this change minimal.
5085            let _ = &mut fs_gen; // suppress unused warning
5086
5087            stats.financial_statement_count = financial_statements.len();
5088            info!(
5089                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5090                stats.financial_statement_count,
5091                consolidated_statements.len(),
5092                has_journal_entries
5093            );
5094
5095            // ----------------------------------------------------------------
5096            // IFRS 8 / ASC 280: Operating Segment Reporting
5097            // ----------------------------------------------------------------
5098            // Build entity seeds from the company configuration.
5099            let entity_seeds: Vec<SegmentSeed> = self
5100                .config
5101                .companies
5102                .iter()
5103                .map(|c| SegmentSeed {
5104                    code: c.code.clone(),
5105                    name: c.name.clone(),
5106                    currency: c.currency.clone(),
5107                })
5108                .collect();
5109
5110            let mut seg_gen = SegmentGenerator::new(seed + 30);
5111
5112            // Generate one set of segment reports per period.
5113            // We extract consolidated revenue / profit / assets from the consolidated
5114            // financial statements produced above, falling back to simple sums when
5115            // no consolidated statements were generated (single-entity path).
5116            for period in 0..self.config.global.period_months {
5117                let period_end =
5118                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5119                let fiscal_year = period_end.year() as u16;
5120                let fiscal_period = period_end.month() as u8;
5121                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5122
5123                use datasynth_core::models::StatementType;
5124
5125                // Try to find consolidated income statement for this period
5126                let cons_is = consolidated_statements.iter().find(|s| {
5127                    s.fiscal_year == fiscal_year
5128                        && s.fiscal_period == fiscal_period
5129                        && s.statement_type == StatementType::IncomeStatement
5130                });
5131                let cons_bs = consolidated_statements.iter().find(|s| {
5132                    s.fiscal_year == fiscal_year
5133                        && s.fiscal_period == fiscal_period
5134                        && s.statement_type == StatementType::BalanceSheet
5135                });
5136
5137                // If consolidated statements not available fall back to the flat list
5138                let is_stmt = cons_is.or_else(|| {
5139                    financial_statements.iter().find(|s| {
5140                        s.fiscal_year == fiscal_year
5141                            && s.fiscal_period == fiscal_period
5142                            && s.statement_type == StatementType::IncomeStatement
5143                    })
5144                });
5145                let bs_stmt = cons_bs.or_else(|| {
5146                    financial_statements.iter().find(|s| {
5147                        s.fiscal_year == fiscal_year
5148                            && s.fiscal_period == fiscal_period
5149                            && s.statement_type == StatementType::BalanceSheet
5150                    })
5151                });
5152
5153                let consolidated_revenue = is_stmt
5154                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5155                    .map(|li| -li.amount) // revenue is stored as negative in IS
5156                    .unwrap_or(rust_decimal::Decimal::ZERO);
5157
5158                let consolidated_profit = is_stmt
5159                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5160                    .map(|li| li.amount)
5161                    .unwrap_or(rust_decimal::Decimal::ZERO);
5162
5163                let consolidated_assets = bs_stmt
5164                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5165                    .map(|li| li.amount)
5166                    .unwrap_or(rust_decimal::Decimal::ZERO);
5167
5168                // Skip periods where we have no financial data
5169                if consolidated_revenue == rust_decimal::Decimal::ZERO
5170                    && consolidated_assets == rust_decimal::Decimal::ZERO
5171                {
5172                    continue;
5173                }
5174
5175                let group_code = self
5176                    .config
5177                    .companies
5178                    .first()
5179                    .map(|c| c.code.as_str())
5180                    .unwrap_or("GROUP");
5181
5182                // Compute period depreciation from JEs with document type "CL" hitting account
5183                // 6000 (depreciation expense).  These are generated by phase_period_close.
5184                let total_depr: rust_decimal::Decimal = journal_entries
5185                    .iter()
5186                    .filter(|je| je.header.document_type == "CL")
5187                    .flat_map(|je| je.lines.iter())
5188                    .filter(|l| l.gl_account.starts_with("6000"))
5189                    .map(|l| l.debit_amount)
5190                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5191                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5192                    Some(total_depr)
5193                } else {
5194                    None
5195                };
5196
5197                let (segs, recon) = seg_gen.generate(
5198                    group_code,
5199                    &period_label,
5200                    consolidated_revenue,
5201                    consolidated_profit,
5202                    consolidated_assets,
5203                    &entity_seeds,
5204                    depr_param,
5205                );
5206                segment_reports.extend(segs);
5207                segment_reconciliations.push(recon);
5208            }
5209
5210            info!(
5211                "Segment reports generated: {} segments, {} reconciliations",
5212                segment_reports.len(),
5213                segment_reconciliations.len()
5214            );
5215        }
5216
5217        // Generate bank reconciliations from payment data
5218        if br_enabled && !document_flows.payments.is_empty() {
5219            let employee_ids: Vec<String> = self
5220                .master_data
5221                .employees
5222                .iter()
5223                .map(|e| e.employee_id.clone())
5224                .collect();
5225            let mut br_gen =
5226                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5227
5228            // Group payments by company code and period
5229            for company in &self.config.companies {
5230                let company_payments: Vec<PaymentReference> = document_flows
5231                    .payments
5232                    .iter()
5233                    .filter(|p| p.header.company_code == company.code)
5234                    .map(|p| PaymentReference {
5235                        id: p.header.document_id.clone(),
5236                        amount: if p.is_vendor { p.amount } else { -p.amount },
5237                        date: p.header.document_date,
5238                        reference: p
5239                            .check_number
5240                            .clone()
5241                            .or_else(|| p.wire_reference.clone())
5242                            .unwrap_or_else(|| p.header.document_id.clone()),
5243                    })
5244                    .collect();
5245
5246                if company_payments.is_empty() {
5247                    continue;
5248                }
5249
5250                let bank_account_id = format!("{}-MAIN", company.code);
5251
5252                // Generate one reconciliation per period
5253                for period in 0..self.config.global.period_months {
5254                    let period_start = start_date + chrono::Months::new(period);
5255                    let period_end =
5256                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5257
5258                    let period_payments: Vec<PaymentReference> = company_payments
5259                        .iter()
5260                        .filter(|p| p.date >= period_start && p.date <= period_end)
5261                        .cloned()
5262                        .collect();
5263
5264                    let recon = br_gen.generate(
5265                        &company.code,
5266                        &bank_account_id,
5267                        period_start,
5268                        period_end,
5269                        &company.currency,
5270                        &period_payments,
5271                    );
5272                    bank_reconciliations.push(recon);
5273                }
5274            }
5275            info!(
5276                "Bank reconciliations generated: {} reconciliations",
5277                bank_reconciliations.len()
5278            );
5279        }
5280
5281        stats.bank_reconciliation_count = bank_reconciliations.len();
5282        self.check_resources_with_log("post-financial-reporting")?;
5283
5284        if !trial_balances.is_empty() {
5285            info!(
5286                "Period-close trial balances captured: {} periods",
5287                trial_balances.len()
5288            );
5289        }
5290
5291        // Notes to financial statements are generated in a separate post-processing step
5292        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5293        // phases have completed, so that deferred tax and provision data can be wired in.
5294        let notes_to_financial_statements = Vec::new();
5295
5296        Ok(FinancialReportingSnapshot {
5297            financial_statements,
5298            standalone_statements,
5299            consolidated_statements,
5300            consolidation_schedules,
5301            bank_reconciliations,
5302            trial_balances,
5303            segment_reports,
5304            segment_reconciliations,
5305            notes_to_financial_statements,
5306        })
5307    }
5308
5309    /// Populate notes to financial statements using fully-resolved snapshots.
5310    ///
5311    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5312    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5313    /// can be wired into the notes context.  The method mutates
5314    /// `financial_reporting.notes_to_financial_statements` in-place.
5315    fn generate_notes_to_financial_statements(
5316        &self,
5317        financial_reporting: &mut FinancialReportingSnapshot,
5318        accounting_standards: &AccountingStandardsSnapshot,
5319        tax: &TaxSnapshot,
5320        hr: &HrSnapshot,
5321        audit: &AuditSnapshot,
5322        treasury: &TreasurySnapshot,
5323    ) {
5324        use datasynth_config::schema::AccountingFrameworkConfig;
5325        use datasynth_core::models::StatementType;
5326        use datasynth_generators::period_close::notes_generator::{
5327            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5328        };
5329
5330        let seed = self.seed;
5331        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5332        {
5333            Ok(d) => d,
5334            Err(_) => return,
5335        };
5336
5337        let mut notes_gen = NotesGenerator::new(seed + 4235);
5338
5339        for company in &self.config.companies {
5340            let last_period_end = start_date
5341                + chrono::Months::new(self.config.global.period_months)
5342                - chrono::Days::new(1);
5343            let fiscal_year = last_period_end.year() as u16;
5344
5345            // Extract relevant amounts from the already-generated financial statements
5346            let entity_is = financial_reporting
5347                .standalone_statements
5348                .get(&company.code)
5349                .and_then(|stmts| {
5350                    stmts.iter().find(|s| {
5351                        s.fiscal_year == fiscal_year
5352                            && s.statement_type == StatementType::IncomeStatement
5353                    })
5354                });
5355            let entity_bs = financial_reporting
5356                .standalone_statements
5357                .get(&company.code)
5358                .and_then(|stmts| {
5359                    stmts.iter().find(|s| {
5360                        s.fiscal_year == fiscal_year
5361                            && s.statement_type == StatementType::BalanceSheet
5362                    })
5363                });
5364
5365            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5366            let revenue_amount = entity_is
5367                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5368                .map(|li| li.amount);
5369            let ppe_gross = entity_bs
5370                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5371                .map(|li| li.amount);
5372
5373            let framework = match self
5374                .config
5375                .accounting_standards
5376                .framework
5377                .unwrap_or_default()
5378            {
5379                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5380                    "IFRS".to_string()
5381                }
5382                _ => "US GAAP".to_string(),
5383            };
5384
5385            // ---- Deferred tax (IAS 12 / ASC 740) ----
5386            // Sum closing DTA and DTL from rollforward entries for this entity.
5387            let (entity_dta, entity_dtl) = {
5388                let mut dta = rust_decimal::Decimal::ZERO;
5389                let mut dtl = rust_decimal::Decimal::ZERO;
5390                for rf in &tax.deferred_tax.rollforwards {
5391                    if rf.entity_code == company.code {
5392                        dta += rf.closing_dta;
5393                        dtl += rf.closing_dtl;
5394                    }
5395                }
5396                (
5397                    if dta > rust_decimal::Decimal::ZERO {
5398                        Some(dta)
5399                    } else {
5400                        None
5401                    },
5402                    if dtl > rust_decimal::Decimal::ZERO {
5403                        Some(dtl)
5404                    } else {
5405                        None
5406                    },
5407                )
5408            };
5409
5410            // ---- Provisions (IAS 37 / ASC 450) ----
5411            // Filter provisions to this entity; sum best_estimate amounts.
5412            let entity_provisions: Vec<_> = accounting_standards
5413                .provisions
5414                .iter()
5415                .filter(|p| p.entity_code == company.code)
5416                .collect();
5417            let provision_count = entity_provisions.len();
5418            let total_provisions = if provision_count > 0 {
5419                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5420            } else {
5421                None
5422            };
5423
5424            // ---- Pension data from HR snapshot ----
5425            let entity_pension_plan_count = hr
5426                .pension_plans
5427                .iter()
5428                .filter(|p| p.entity_code == company.code)
5429                .count();
5430            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5431                let sum: rust_decimal::Decimal = hr
5432                    .pension_disclosures
5433                    .iter()
5434                    .filter(|d| {
5435                        hr.pension_plans
5436                            .iter()
5437                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5438                    })
5439                    .map(|d| d.net_pension_liability)
5440                    .sum();
5441                let plan_assets_sum: rust_decimal::Decimal = hr
5442                    .pension_plan_assets
5443                    .iter()
5444                    .filter(|a| {
5445                        hr.pension_plans
5446                            .iter()
5447                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5448                    })
5449                    .map(|a| a.fair_value_closing)
5450                    .sum();
5451                if entity_pension_plan_count > 0 {
5452                    Some(sum + plan_assets_sum)
5453                } else {
5454                    None
5455                }
5456            };
5457            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5458                let sum: rust_decimal::Decimal = hr
5459                    .pension_plan_assets
5460                    .iter()
5461                    .filter(|a| {
5462                        hr.pension_plans
5463                            .iter()
5464                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5465                    })
5466                    .map(|a| a.fair_value_closing)
5467                    .sum();
5468                if entity_pension_plan_count > 0 {
5469                    Some(sum)
5470                } else {
5471                    None
5472                }
5473            };
5474
5475            // ---- Audit data: related parties + subsequent events ----
5476            // Audit snapshot covers all entities; use total counts (common case = single entity).
5477            let rp_count = audit.related_party_transactions.len();
5478            let se_count = audit.subsequent_events.len();
5479            let adjusting_count = audit
5480                .subsequent_events
5481                .iter()
5482                .filter(|e| {
5483                    matches!(
5484                        e.classification,
5485                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5486                    )
5487                })
5488                .count();
5489
5490            let ctx = NotesGeneratorContext {
5491                entity_code: company.code.clone(),
5492                framework,
5493                period: format!("FY{}", fiscal_year),
5494                period_end: last_period_end,
5495                currency: company.currency.clone(),
5496                revenue_amount,
5497                total_ppe_gross: ppe_gross,
5498                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5499                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5500                deferred_tax_asset: entity_dta,
5501                deferred_tax_liability: entity_dtl,
5502                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5503                provision_count,
5504                total_provisions,
5505                // Pension data from HR snapshot
5506                pension_plan_count: entity_pension_plan_count,
5507                total_dbo: entity_total_dbo,
5508                total_plan_assets: entity_total_plan_assets,
5509                // Audit data
5510                related_party_transaction_count: rp_count,
5511                subsequent_event_count: se_count,
5512                adjusting_event_count: adjusting_count,
5513                ..NotesGeneratorContext::default()
5514            };
5515
5516            let entity_notes = notes_gen.generate(&ctx);
5517            let standard_note_count = entity_notes.len() as u32;
5518            info!(
5519                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5520                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5521            );
5522            financial_reporting
5523                .notes_to_financial_statements
5524                .extend(entity_notes);
5525
5526            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5527            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5528                .debt_instruments
5529                .iter()
5530                .filter(|d| d.entity_id == company.code)
5531                .map(|d| {
5532                    (
5533                        format!("{:?}", d.instrument_type),
5534                        d.principal,
5535                        d.maturity_date.to_string(),
5536                    )
5537                })
5538                .collect();
5539
5540            let hedge_count = treasury.hedge_relationships.len();
5541            let effective_hedges = treasury
5542                .hedge_relationships
5543                .iter()
5544                .filter(|h| h.is_effective)
5545                .count();
5546            let total_notional: rust_decimal::Decimal = treasury
5547                .hedging_instruments
5548                .iter()
5549                .map(|h| h.notional_amount)
5550                .sum();
5551            let total_fair_value: rust_decimal::Decimal = treasury
5552                .hedging_instruments
5553                .iter()
5554                .map(|h| h.fair_value)
5555                .sum();
5556
5557            // Join provision_movements with provisions to get entity/type info
5558            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5559                .provisions
5560                .iter()
5561                .filter(|p| p.entity_code == company.code)
5562                .map(|p| p.id.as_str())
5563                .collect();
5564            let provision_movements: Vec<(
5565                String,
5566                rust_decimal::Decimal,
5567                rust_decimal::Decimal,
5568                rust_decimal::Decimal,
5569            )> = accounting_standards
5570                .provision_movements
5571                .iter()
5572                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5573                .map(|m| {
5574                    let prov_type = accounting_standards
5575                        .provisions
5576                        .iter()
5577                        .find(|p| p.id == m.provision_id)
5578                        .map(|p| format!("{:?}", p.provision_type))
5579                        .unwrap_or_else(|| "Unknown".to_string());
5580                    (prov_type, m.opening, m.additions, m.closing)
5581                })
5582                .collect();
5583
5584            let enhanced_ctx = EnhancedNotesContext {
5585                entity_code: company.code.clone(),
5586                period: format!("FY{}", fiscal_year),
5587                currency: company.currency.clone(),
5588                // Inventory breakdown: best-effort using zero (would need balance tracker)
5589                finished_goods_value: rust_decimal::Decimal::ZERO,
5590                wip_value: rust_decimal::Decimal::ZERO,
5591                raw_materials_value: rust_decimal::Decimal::ZERO,
5592                debt_instruments,
5593                hedge_count,
5594                effective_hedges,
5595                total_notional,
5596                total_fair_value,
5597                provision_movements,
5598            };
5599
5600            let enhanced_notes =
5601                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5602            if !enhanced_notes.is_empty() {
5603                info!(
5604                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5605                    company.code,
5606                    enhanced_notes.len(),
5607                    enhanced_ctx.debt_instruments.len(),
5608                    hedge_count,
5609                    enhanced_ctx.provision_movements.len(),
5610                );
5611                financial_reporting
5612                    .notes_to_financial_statements
5613                    .extend(enhanced_notes);
5614            }
5615        }
5616    }
5617
5618    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5619    ///
5620    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5621    /// posted in the journal entries flows through to the trial balance, using the real
5622    /// GL account numbers from the CoA.
5623    fn build_trial_balance_from_entries(
5624        journal_entries: &[JournalEntry],
5625        coa: &ChartOfAccounts,
5626        company_code: &str,
5627        fiscal_year: u16,
5628        fiscal_period: u8,
5629    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5630        use rust_decimal::Decimal;
5631
5632        // Accumulate total debits and credits per GL account
5633        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5634        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5635
5636        for je in journal_entries {
5637            // Filter to matching company, fiscal year, and period
5638            if je.header.company_code != company_code
5639                || je.header.fiscal_year != fiscal_year
5640                || je.header.fiscal_period != fiscal_period
5641            {
5642                continue;
5643            }
5644
5645            for line in &je.lines {
5646                let acct = &line.gl_account;
5647                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5648                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5649            }
5650        }
5651
5652        // Build a TrialBalanceEntry for each account that had activity
5653        let mut all_accounts: Vec<&String> = account_debits
5654            .keys()
5655            .chain(account_credits.keys())
5656            .collect::<std::collections::HashSet<_>>()
5657            .into_iter()
5658            .collect();
5659        all_accounts.sort();
5660
5661        let mut entries = Vec::new();
5662
5663        for acct_number in all_accounts {
5664            let debit = account_debits
5665                .get(acct_number)
5666                .copied()
5667                .unwrap_or(Decimal::ZERO);
5668            let credit = account_credits
5669                .get(acct_number)
5670                .copied()
5671                .unwrap_or(Decimal::ZERO);
5672
5673            if debit.is_zero() && credit.is_zero() {
5674                continue;
5675            }
5676
5677            // Look up account name from CoA, fall back to "Account {code}"
5678            let account_name = coa
5679                .get_account(acct_number)
5680                .map(|gl| gl.short_description.clone())
5681                .unwrap_or_else(|| format!("Account {acct_number}"));
5682
5683            // Map account code prefix to the category strings expected by
5684            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5685            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5686            // OperatingExpenses).
5687            let category = Self::category_from_account_code(acct_number);
5688
5689            entries.push(datasynth_generators::TrialBalanceEntry {
5690                account_code: acct_number.clone(),
5691                account_name,
5692                category,
5693                debit_balance: debit,
5694                credit_balance: credit,
5695            });
5696        }
5697
5698        entries
5699    }
5700
5701    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5702    /// (and including) the given period end date.
5703    ///
5704    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5705    /// while income statement accounts (revenue, expenses) show only the current period.
5706    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5707    fn build_cumulative_trial_balance(
5708        journal_entries: &[JournalEntry],
5709        coa: &ChartOfAccounts,
5710        company_code: &str,
5711        start_date: NaiveDate,
5712        period_end: NaiveDate,
5713        fiscal_year: u16,
5714        fiscal_period: u8,
5715    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5716        use rust_decimal::Decimal;
5717
5718        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5719        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5720        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5721
5722        // Accumulate debits/credits for income statement accounts (current period only)
5723        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5724        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5725
5726        for je in journal_entries {
5727            if je.header.company_code != company_code {
5728                continue;
5729            }
5730
5731            for line in &je.lines {
5732                let acct = &line.gl_account;
5733                let category = Self::category_from_account_code(acct);
5734                let is_bs_account = matches!(
5735                    category.as_str(),
5736                    "Cash"
5737                        | "Receivables"
5738                        | "Inventory"
5739                        | "FixedAssets"
5740                        | "Payables"
5741                        | "AccruedLiabilities"
5742                        | "LongTermDebt"
5743                        | "Equity"
5744                );
5745
5746                if is_bs_account {
5747                    // Balance sheet: accumulate from start through period_end
5748                    if je.header.document_date <= period_end
5749                        && je.header.document_date >= start_date
5750                    {
5751                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5752                            line.debit_amount;
5753                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5754                            line.credit_amount;
5755                    }
5756                } else {
5757                    // Income statement: current period only
5758                    if je.header.fiscal_year == fiscal_year
5759                        && je.header.fiscal_period == fiscal_period
5760                    {
5761                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5762                            line.debit_amount;
5763                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5764                            line.credit_amount;
5765                    }
5766                }
5767            }
5768        }
5769
5770        // Merge all accounts
5771        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5772        all_accounts.extend(bs_debits.keys().cloned());
5773        all_accounts.extend(bs_credits.keys().cloned());
5774        all_accounts.extend(is_debits.keys().cloned());
5775        all_accounts.extend(is_credits.keys().cloned());
5776
5777        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5778        sorted_accounts.sort();
5779
5780        let mut entries = Vec::new();
5781
5782        for acct_number in &sorted_accounts {
5783            let category = Self::category_from_account_code(acct_number);
5784            let is_bs_account = matches!(
5785                category.as_str(),
5786                "Cash"
5787                    | "Receivables"
5788                    | "Inventory"
5789                    | "FixedAssets"
5790                    | "Payables"
5791                    | "AccruedLiabilities"
5792                    | "LongTermDebt"
5793                    | "Equity"
5794            );
5795
5796            let (debit, credit) = if is_bs_account {
5797                (
5798                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5799                    bs_credits
5800                        .get(acct_number)
5801                        .copied()
5802                        .unwrap_or(Decimal::ZERO),
5803                )
5804            } else {
5805                (
5806                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5807                    is_credits
5808                        .get(acct_number)
5809                        .copied()
5810                        .unwrap_or(Decimal::ZERO),
5811                )
5812            };
5813
5814            if debit.is_zero() && credit.is_zero() {
5815                continue;
5816            }
5817
5818            let account_name = coa
5819                .get_account(acct_number)
5820                .map(|gl| gl.short_description.clone())
5821                .unwrap_or_else(|| format!("Account {acct_number}"));
5822
5823            entries.push(datasynth_generators::TrialBalanceEntry {
5824                account_code: acct_number.clone(),
5825                account_name,
5826                category,
5827                debit_balance: debit,
5828                credit_balance: credit,
5829            });
5830        }
5831
5832        entries
5833    }
5834
5835    /// Build a JE-derived cash flow statement using the indirect method.
5836    ///
5837    /// Compares current and prior cumulative trial balances to derive working capital
5838    /// changes, producing a coherent cash flow statement tied to actual journal entries.
5839    fn build_cash_flow_from_trial_balances(
5840        current_tb: &[datasynth_generators::TrialBalanceEntry],
5841        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5842        net_income: rust_decimal::Decimal,
5843    ) -> Vec<CashFlowItem> {
5844        use rust_decimal::Decimal;
5845
5846        // Helper: aggregate a TB by category and return net (debit - credit)
5847        let aggregate =
5848            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5849                let mut map: HashMap<String, Decimal> = HashMap::new();
5850                for entry in tb {
5851                    let net = entry.debit_balance - entry.credit_balance;
5852                    *map.entry(entry.category.clone()).or_default() += net;
5853                }
5854                map
5855            };
5856
5857        let current = aggregate(current_tb);
5858        let prior = prior_tb.map(aggregate);
5859
5860        // Get balance for a category, defaulting to zero
5861        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5862            *map.get(key).unwrap_or(&Decimal::ZERO)
5863        };
5864
5865        // Compute change: current - prior (or current if no prior)
5866        let change = |key: &str| -> Decimal {
5867            let curr = get(&current, key);
5868            match &prior {
5869                Some(p) => curr - get(p, key),
5870                None => curr,
5871            }
5872        };
5873
5874        // Operating activities (indirect method)
5875        // Depreciation add-back: approximate from FixedAssets decrease
5876        let fixed_asset_change = change("FixedAssets");
5877        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5878            -fixed_asset_change
5879        } else {
5880            Decimal::ZERO
5881        };
5882
5883        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
5884        let ar_change = change("Receivables");
5885        let inventory_change = change("Inventory");
5886        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
5887        let ap_change = change("Payables");
5888        let accrued_change = change("AccruedLiabilities");
5889
5890        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5891            + (-ap_change)
5892            + (-accrued_change);
5893
5894        // Investing activities
5895        let capex = if fixed_asset_change > Decimal::ZERO {
5896            -fixed_asset_change
5897        } else {
5898            Decimal::ZERO
5899        };
5900        let investing_cf = capex;
5901
5902        // Financing activities
5903        let debt_change = -change("LongTermDebt");
5904        let equity_change = -change("Equity");
5905        let financing_cf = debt_change + equity_change;
5906
5907        let net_change = operating_cf + investing_cf + financing_cf;
5908
5909        vec![
5910            CashFlowItem {
5911                item_code: "CF-NI".to_string(),
5912                label: "Net Income".to_string(),
5913                category: CashFlowCategory::Operating,
5914                amount: net_income,
5915                amount_prior: None,
5916                sort_order: 1,
5917                is_total: false,
5918            },
5919            CashFlowItem {
5920                item_code: "CF-DEP".to_string(),
5921                label: "Depreciation & Amortization".to_string(),
5922                category: CashFlowCategory::Operating,
5923                amount: depreciation_addback,
5924                amount_prior: None,
5925                sort_order: 2,
5926                is_total: false,
5927            },
5928            CashFlowItem {
5929                item_code: "CF-AR".to_string(),
5930                label: "Change in Accounts Receivable".to_string(),
5931                category: CashFlowCategory::Operating,
5932                amount: -ar_change,
5933                amount_prior: None,
5934                sort_order: 3,
5935                is_total: false,
5936            },
5937            CashFlowItem {
5938                item_code: "CF-AP".to_string(),
5939                label: "Change in Accounts Payable".to_string(),
5940                category: CashFlowCategory::Operating,
5941                amount: -ap_change,
5942                amount_prior: None,
5943                sort_order: 4,
5944                is_total: false,
5945            },
5946            CashFlowItem {
5947                item_code: "CF-INV".to_string(),
5948                label: "Change in Inventory".to_string(),
5949                category: CashFlowCategory::Operating,
5950                amount: -inventory_change,
5951                amount_prior: None,
5952                sort_order: 5,
5953                is_total: false,
5954            },
5955            CashFlowItem {
5956                item_code: "CF-OP".to_string(),
5957                label: "Net Cash from Operating Activities".to_string(),
5958                category: CashFlowCategory::Operating,
5959                amount: operating_cf,
5960                amount_prior: None,
5961                sort_order: 6,
5962                is_total: true,
5963            },
5964            CashFlowItem {
5965                item_code: "CF-CAPEX".to_string(),
5966                label: "Capital Expenditures".to_string(),
5967                category: CashFlowCategory::Investing,
5968                amount: capex,
5969                amount_prior: None,
5970                sort_order: 7,
5971                is_total: false,
5972            },
5973            CashFlowItem {
5974                item_code: "CF-INV-T".to_string(),
5975                label: "Net Cash from Investing Activities".to_string(),
5976                category: CashFlowCategory::Investing,
5977                amount: investing_cf,
5978                amount_prior: None,
5979                sort_order: 8,
5980                is_total: true,
5981            },
5982            CashFlowItem {
5983                item_code: "CF-DEBT".to_string(),
5984                label: "Net Borrowings / (Repayments)".to_string(),
5985                category: CashFlowCategory::Financing,
5986                amount: debt_change,
5987                amount_prior: None,
5988                sort_order: 9,
5989                is_total: false,
5990            },
5991            CashFlowItem {
5992                item_code: "CF-EQ".to_string(),
5993                label: "Equity Changes".to_string(),
5994                category: CashFlowCategory::Financing,
5995                amount: equity_change,
5996                amount_prior: None,
5997                sort_order: 10,
5998                is_total: false,
5999            },
6000            CashFlowItem {
6001                item_code: "CF-FIN-T".to_string(),
6002                label: "Net Cash from Financing Activities".to_string(),
6003                category: CashFlowCategory::Financing,
6004                amount: financing_cf,
6005                amount_prior: None,
6006                sort_order: 11,
6007                is_total: true,
6008            },
6009            CashFlowItem {
6010                item_code: "CF-NET".to_string(),
6011                label: "Net Change in Cash".to_string(),
6012                category: CashFlowCategory::Operating,
6013                amount: net_change,
6014                amount_prior: None,
6015                sort_order: 12,
6016                is_total: true,
6017            },
6018        ]
6019    }
6020
6021    /// Calculate net income from a set of trial balance entries.
6022    ///
6023    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6024    fn calculate_net_income_from_tb(
6025        tb: &[datasynth_generators::TrialBalanceEntry],
6026    ) -> rust_decimal::Decimal {
6027        use rust_decimal::Decimal;
6028
6029        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6030        for entry in tb {
6031            let net = entry.debit_balance - entry.credit_balance;
6032            *aggregated.entry(entry.category.clone()).or_default() += net;
6033        }
6034
6035        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6036        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6037        let opex = *aggregated
6038            .get("OperatingExpenses")
6039            .unwrap_or(&Decimal::ZERO);
6040        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6041        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6042
6043        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6044        // other_income is typically negative (credit), other_expenses is typically positive
6045        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6046        let tax_rate = Decimal::new(25, 2); // 0.25
6047        let tax = operating_income * tax_rate;
6048        operating_income - tax
6049    }
6050
6051    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6052    ///
6053    /// Uses the first two digits of the account code to classify into the categories
6054    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6055    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6056    /// OperatingExpenses, OtherIncome, OtherExpenses.
6057    fn category_from_account_code(code: &str) -> String {
6058        let prefix: String = code.chars().take(2).collect();
6059        match prefix.as_str() {
6060            "10" => "Cash",
6061            "11" => "Receivables",
6062            "12" | "13" | "14" => "Inventory",
6063            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6064            "20" => "Payables",
6065            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6066            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6067            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6068            "40" | "41" | "42" | "43" | "44" => "Revenue",
6069            "50" | "51" | "52" => "CostOfSales",
6070            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6071                "OperatingExpenses"
6072            }
6073            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6074            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6075            _ => "OperatingExpenses",
6076        }
6077        .to_string()
6078    }
6079
6080    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6081    fn phase_hr_data(
6082        &mut self,
6083        stats: &mut EnhancedGenerationStatistics,
6084    ) -> SynthResult<HrSnapshot> {
6085        if !self.phase_config.generate_hr {
6086            debug!("Phase 16: Skipped (HR generation disabled)");
6087            return Ok(HrSnapshot::default());
6088        }
6089
6090        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6091
6092        let seed = self.seed;
6093        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6094            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6095        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6096        let company_code = self
6097            .config
6098            .companies
6099            .first()
6100            .map(|c| c.code.as_str())
6101            .unwrap_or("1000");
6102        let currency = self
6103            .config
6104            .companies
6105            .first()
6106            .map(|c| c.currency.as_str())
6107            .unwrap_or("USD");
6108
6109        let employee_ids: Vec<String> = self
6110            .master_data
6111            .employees
6112            .iter()
6113            .map(|e| e.employee_id.clone())
6114            .collect();
6115
6116        if employee_ids.is_empty() {
6117            debug!("Phase 16: Skipped (no employees available)");
6118            return Ok(HrSnapshot::default());
6119        }
6120
6121        // Extract cost-center pool from master data employees for cross-reference
6122        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6123        let cost_center_ids: Vec<String> = self
6124            .master_data
6125            .employees
6126            .iter()
6127            .filter_map(|e| e.cost_center.clone())
6128            .collect::<std::collections::HashSet<_>>()
6129            .into_iter()
6130            .collect();
6131
6132        let mut snapshot = HrSnapshot::default();
6133
6134        // Generate payroll runs (one per month)
6135        if self.config.hr.payroll.enabled {
6136            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6137                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6138
6139            // Look up country pack for payroll deductions and labels
6140            let payroll_pack = self.primary_pack();
6141
6142            // Store the pack on the generator so generate() resolves
6143            // localized deduction rates and labels from it.
6144            payroll_gen.set_country_pack(payroll_pack.clone());
6145
6146            let employees_with_salary: Vec<(
6147                String,
6148                rust_decimal::Decimal,
6149                Option<String>,
6150                Option<String>,
6151            )> = self
6152                .master_data
6153                .employees
6154                .iter()
6155                .map(|e| {
6156                    // Use the employee's actual annual base salary.
6157                    // Fall back to $60,000 / yr if somehow zero.
6158                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6159                        e.base_salary
6160                    } else {
6161                        rust_decimal::Decimal::from(60_000)
6162                    };
6163                    (
6164                        e.employee_id.clone(),
6165                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6166                        e.cost_center.clone(),
6167                        e.department_id.clone(),
6168                    )
6169                })
6170                .collect();
6171
6172            // Use generate_with_changes when employee change history is available
6173            // so that salary adjustments, transfers, etc. are reflected in payroll.
6174            let change_history = &self.master_data.employee_change_history;
6175            let has_changes = !change_history.is_empty();
6176            if has_changes {
6177                debug!(
6178                    "Payroll will incorporate {} employee change events",
6179                    change_history.len()
6180                );
6181            }
6182
6183            for month in 0..self.config.global.period_months {
6184                let period_start = start_date + chrono::Months::new(month);
6185                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6186                let (run, items) = if has_changes {
6187                    payroll_gen.generate_with_changes(
6188                        company_code,
6189                        &employees_with_salary,
6190                        period_start,
6191                        period_end,
6192                        currency,
6193                        change_history,
6194                    )
6195                } else {
6196                    payroll_gen.generate(
6197                        company_code,
6198                        &employees_with_salary,
6199                        period_start,
6200                        period_end,
6201                        currency,
6202                    )
6203                };
6204                snapshot.payroll_runs.push(run);
6205                snapshot.payroll_run_count += 1;
6206                snapshot.payroll_line_item_count += items.len();
6207                snapshot.payroll_line_items.extend(items);
6208            }
6209        }
6210
6211        // Generate time entries
6212        if self.config.hr.time_attendance.enabled {
6213            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6214                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6215            let entries = time_gen.generate(
6216                &employee_ids,
6217                start_date,
6218                end_date,
6219                &self.config.hr.time_attendance,
6220            );
6221            snapshot.time_entry_count = entries.len();
6222            snapshot.time_entries = entries;
6223        }
6224
6225        // Generate expense reports
6226        if self.config.hr.expenses.enabled {
6227            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6228                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6229            expense_gen.set_country_pack(self.primary_pack().clone());
6230            let company_currency = self
6231                .config
6232                .companies
6233                .first()
6234                .map(|c| c.currency.as_str())
6235                .unwrap_or("USD");
6236            let reports = expense_gen.generate_with_currency(
6237                &employee_ids,
6238                start_date,
6239                end_date,
6240                &self.config.hr.expenses,
6241                company_currency,
6242            );
6243            snapshot.expense_report_count = reports.len();
6244            snapshot.expense_reports = reports;
6245        }
6246
6247        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6248        if self.config.hr.payroll.enabled {
6249            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6250            let employee_pairs: Vec<(String, String)> = self
6251                .master_data
6252                .employees
6253                .iter()
6254                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6255                .collect();
6256            let enrollments =
6257                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6258            snapshot.benefit_enrollment_count = enrollments.len();
6259            snapshot.benefit_enrollments = enrollments;
6260        }
6261
6262        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6263        if self.phase_config.generate_hr {
6264            let entity_name = self
6265                .config
6266                .companies
6267                .first()
6268                .map(|c| c.name.as_str())
6269                .unwrap_or("Entity");
6270            let period_months = self.config.global.period_months;
6271            let period_label = {
6272                let y = start_date.year();
6273                let m = start_date.month();
6274                if period_months >= 12 {
6275                    format!("FY{y}")
6276                } else {
6277                    format!("{y}-{m:02}")
6278                }
6279            };
6280            let reporting_date =
6281                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6282
6283            // Compute average annual salary from actual payroll data when available.
6284            // PayrollRun.total_gross covers all employees for one pay period; we sum
6285            // across all runs and divide by employee_count to get per-employee total,
6286            // then annualise for sub-annual periods.
6287            let avg_salary: Option<rust_decimal::Decimal> = {
6288                let employee_count = employee_ids.len();
6289                if self.config.hr.payroll.enabled
6290                    && employee_count > 0
6291                    && !snapshot.payroll_runs.is_empty()
6292                {
6293                    // Sum total gross pay across all payroll runs for this company
6294                    let total_gross: rust_decimal::Decimal = snapshot
6295                        .payroll_runs
6296                        .iter()
6297                        .filter(|r| r.company_code == company_code)
6298                        .map(|r| r.total_gross)
6299                        .sum();
6300                    if total_gross > rust_decimal::Decimal::ZERO {
6301                        // Annualise: total_gross covers `period_months` months of pay
6302                        let annual_total = if period_months > 0 && period_months < 12 {
6303                            total_gross * rust_decimal::Decimal::from(12u32)
6304                                / rust_decimal::Decimal::from(period_months)
6305                        } else {
6306                            total_gross
6307                        };
6308                        Some(
6309                            (annual_total / rust_decimal::Decimal::from(employee_count))
6310                                .round_dp(2),
6311                        )
6312                    } else {
6313                        None
6314                    }
6315                } else {
6316                    None
6317                }
6318            };
6319
6320            let mut pension_gen =
6321                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6322            let pension_snap = pension_gen.generate(
6323                company_code,
6324                entity_name,
6325                &period_label,
6326                reporting_date,
6327                employee_ids.len(),
6328                currency,
6329                avg_salary,
6330                period_months,
6331            );
6332            snapshot.pension_plan_count = pension_snap.plans.len();
6333            snapshot.pension_plans = pension_snap.plans;
6334            snapshot.pension_obligations = pension_snap.obligations;
6335            snapshot.pension_plan_assets = pension_snap.plan_assets;
6336            snapshot.pension_disclosures = pension_snap.disclosures;
6337            // Pension JEs are returned here so they can be added to entries
6338            // in the caller (stored temporarily on snapshot for transfer).
6339            // We embed them in the hr snapshot for simplicity; the orchestrator
6340            // will extract and extend `entries`.
6341            snapshot.pension_journal_entries = pension_snap.journal_entries;
6342        }
6343
6344        // Generate stock-based compensation (ASC 718 / IFRS 2)
6345        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6346            let period_months = self.config.global.period_months;
6347            let period_label = {
6348                let y = start_date.year();
6349                let m = start_date.month();
6350                if period_months >= 12 {
6351                    format!("FY{y}")
6352                } else {
6353                    format!("{y}-{m:02}")
6354                }
6355            };
6356            let reporting_date =
6357                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6358
6359            let mut stock_comp_gen =
6360                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6361            let stock_snap = stock_comp_gen.generate(
6362                company_code,
6363                &employee_ids,
6364                start_date,
6365                &period_label,
6366                reporting_date,
6367                currency,
6368            );
6369            snapshot.stock_grant_count = stock_snap.grants.len();
6370            snapshot.stock_grants = stock_snap.grants;
6371            snapshot.stock_comp_expenses = stock_snap.expenses;
6372            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6373        }
6374
6375        stats.payroll_run_count = snapshot.payroll_run_count;
6376        stats.time_entry_count = snapshot.time_entry_count;
6377        stats.expense_report_count = snapshot.expense_report_count;
6378        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6379        stats.pension_plan_count = snapshot.pension_plan_count;
6380        stats.stock_grant_count = snapshot.stock_grant_count;
6381
6382        info!(
6383            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6384            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6385            snapshot.time_entry_count, snapshot.expense_report_count,
6386            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6387            snapshot.stock_grant_count
6388        );
6389        self.check_resources_with_log("post-hr")?;
6390
6391        Ok(snapshot)
6392    }
6393
6394    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6395    fn phase_accounting_standards(
6396        &mut self,
6397        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6398        journal_entries: &[JournalEntry],
6399        stats: &mut EnhancedGenerationStatistics,
6400    ) -> SynthResult<AccountingStandardsSnapshot> {
6401        if !self.phase_config.generate_accounting_standards {
6402            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6403            return Ok(AccountingStandardsSnapshot::default());
6404        }
6405        info!("Phase 17: Generating Accounting Standards Data");
6406
6407        let seed = self.seed;
6408        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6409            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6410        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6411        let company_code = self
6412            .config
6413            .companies
6414            .first()
6415            .map(|c| c.code.as_str())
6416            .unwrap_or("1000");
6417        let currency = self
6418            .config
6419            .companies
6420            .first()
6421            .map(|c| c.currency.as_str())
6422            .unwrap_or("USD");
6423
6424        // Convert config framework to standards framework.
6425        // If the user explicitly set a framework in the YAML config, use that.
6426        // Otherwise, fall back to the country pack's accounting.framework field,
6427        // and if that is also absent or unrecognised, default to US GAAP.
6428        let framework = match self.config.accounting_standards.framework {
6429            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6430                datasynth_standards::framework::AccountingFramework::UsGaap
6431            }
6432            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6433                datasynth_standards::framework::AccountingFramework::Ifrs
6434            }
6435            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6436                datasynth_standards::framework::AccountingFramework::DualReporting
6437            }
6438            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6439                datasynth_standards::framework::AccountingFramework::FrenchGaap
6440            }
6441            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6442                datasynth_standards::framework::AccountingFramework::GermanGaap
6443            }
6444            None => {
6445                // Derive framework from the primary company's country pack
6446                let pack = self.primary_pack();
6447                let pack_fw = pack.accounting.framework.as_str();
6448                match pack_fw {
6449                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6450                    "dual_reporting" => {
6451                        datasynth_standards::framework::AccountingFramework::DualReporting
6452                    }
6453                    "french_gaap" => {
6454                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6455                    }
6456                    "german_gaap" | "hgb" => {
6457                        datasynth_standards::framework::AccountingFramework::GermanGaap
6458                    }
6459                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6460                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6461                }
6462            }
6463        };
6464
6465        let mut snapshot = AccountingStandardsSnapshot::default();
6466
6467        // Revenue recognition
6468        if self.config.accounting_standards.revenue_recognition.enabled {
6469            let customer_ids: Vec<String> = self
6470                .master_data
6471                .customers
6472                .iter()
6473                .map(|c| c.customer_id.clone())
6474                .collect();
6475
6476            if !customer_ids.is_empty() {
6477                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6478                let contracts = rev_gen.generate(
6479                    company_code,
6480                    &customer_ids,
6481                    start_date,
6482                    end_date,
6483                    currency,
6484                    &self.config.accounting_standards.revenue_recognition,
6485                    framework,
6486                );
6487                snapshot.revenue_contract_count = contracts.len();
6488                snapshot.contracts = contracts;
6489            }
6490        }
6491
6492        // Impairment testing
6493        if self.config.accounting_standards.impairment.enabled {
6494            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6495                .master_data
6496                .assets
6497                .iter()
6498                .map(|a| {
6499                    (
6500                        a.asset_id.clone(),
6501                        a.description.clone(),
6502                        a.acquisition_cost,
6503                    )
6504                })
6505                .collect();
6506
6507            if !asset_data.is_empty() {
6508                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6509                let tests = imp_gen.generate(
6510                    company_code,
6511                    &asset_data,
6512                    end_date,
6513                    &self.config.accounting_standards.impairment,
6514                    framework,
6515                );
6516                snapshot.impairment_test_count = tests.len();
6517                snapshot.impairment_tests = tests;
6518            }
6519        }
6520
6521        // Business combinations (IFRS 3 / ASC 805)
6522        if self
6523            .config
6524            .accounting_standards
6525            .business_combinations
6526            .enabled
6527        {
6528            let bc_config = &self.config.accounting_standards.business_combinations;
6529            let framework_str = match framework {
6530                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6531                _ => "US_GAAP",
6532            };
6533            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6534            let bc_snap = bc_gen.generate(
6535                company_code,
6536                currency,
6537                start_date,
6538                end_date,
6539                bc_config.acquisition_count,
6540                framework_str,
6541            );
6542            snapshot.business_combination_count = bc_snap.combinations.len();
6543            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6544            snapshot.business_combinations = bc_snap.combinations;
6545        }
6546
6547        // Expected Credit Loss (IFRS 9 / ASC 326)
6548        if self
6549            .config
6550            .accounting_standards
6551            .expected_credit_loss
6552            .enabled
6553        {
6554            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6555            let framework_str = match framework {
6556                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6557                _ => "ASC_326",
6558            };
6559
6560            // Use AR aging data from the subledger snapshot if available;
6561            // otherwise generate synthetic bucket exposures.
6562            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6563
6564            let mut ecl_gen = EclGenerator::new(seed + 43);
6565
6566            // Collect combined bucket totals across all company AR aging reports.
6567            let bucket_exposures: Vec<(
6568                datasynth_core::models::subledger::ar::AgingBucket,
6569                rust_decimal::Decimal,
6570            )> = if ar_aging_reports.is_empty() {
6571                // No AR aging data — synthesise plausible bucket exposures.
6572                use datasynth_core::models::subledger::ar::AgingBucket;
6573                vec![
6574                    (
6575                        AgingBucket::Current,
6576                        rust_decimal::Decimal::from(500_000_u32),
6577                    ),
6578                    (
6579                        AgingBucket::Days1To30,
6580                        rust_decimal::Decimal::from(120_000_u32),
6581                    ),
6582                    (
6583                        AgingBucket::Days31To60,
6584                        rust_decimal::Decimal::from(45_000_u32),
6585                    ),
6586                    (
6587                        AgingBucket::Days61To90,
6588                        rust_decimal::Decimal::from(15_000_u32),
6589                    ),
6590                    (
6591                        AgingBucket::Over90Days,
6592                        rust_decimal::Decimal::from(8_000_u32),
6593                    ),
6594                ]
6595            } else {
6596                use datasynth_core::models::subledger::ar::AgingBucket;
6597                // Sum bucket totals from all reports.
6598                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6599                    std::collections::HashMap::new();
6600                for report in ar_aging_reports {
6601                    for (bucket, amount) in &report.bucket_totals {
6602                        *totals.entry(*bucket).or_default() += amount;
6603                    }
6604                }
6605                AgingBucket::all()
6606                    .into_iter()
6607                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6608                    .collect()
6609            };
6610
6611            let ecl_snap = ecl_gen.generate(
6612                company_code,
6613                end_date,
6614                &bucket_exposures,
6615                ecl_config,
6616                &period_label,
6617                framework_str,
6618            );
6619
6620            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6621            snapshot.ecl_models = ecl_snap.ecl_models;
6622            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6623            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6624        }
6625
6626        // Provisions and contingencies (IAS 37 / ASC 450)
6627        {
6628            let framework_str = match framework {
6629                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6630                _ => "US_GAAP",
6631            };
6632
6633            // Compute actual revenue from the journal entries generated so far.
6634            // The `journal_entries` slice passed to this phase contains all GL entries
6635            // up to and including Period Close. Fall back to a minimum of 100_000 to
6636            // avoid degenerate zero-based provision amounts on first-period datasets.
6637            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6638                .max(rust_decimal::Decimal::from(100_000_u32));
6639
6640            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6641
6642            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6643            let prov_snap = prov_gen.generate(
6644                company_code,
6645                currency,
6646                revenue_proxy,
6647                end_date,
6648                &period_label,
6649                framework_str,
6650                None, // prior_opening: no carry-forward data in single-period runs
6651            );
6652
6653            snapshot.provision_count = prov_snap.provisions.len();
6654            snapshot.provisions = prov_snap.provisions;
6655            snapshot.provision_movements = prov_snap.movements;
6656            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6657            snapshot.provision_journal_entries = prov_snap.journal_entries;
6658        }
6659
6660        // IAS 21 Functional Currency Translation
6661        // For each company whose functional currency differs from the presentation
6662        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6663        {
6664            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6665
6666            let presentation_currency = self
6667                .config
6668                .global
6669                .presentation_currency
6670                .clone()
6671                .unwrap_or_else(|| self.config.global.group_currency.clone());
6672
6673            // Build a minimal rate table populated with approximate rates from
6674            // the FX model base rates (USD-based) so we can do the translation.
6675            let mut rate_table = FxRateTable::new(&presentation_currency);
6676
6677            // Populate with base rates against USD; if presentation_currency is
6678            // not USD we do a best-effort two-step conversion using the table's
6679            // triangulation support.
6680            let base_rates = base_rates_usd();
6681            for (ccy, rate) in &base_rates {
6682                rate_table.add_rate(FxRate::new(
6683                    ccy,
6684                    "USD",
6685                    RateType::Closing,
6686                    end_date,
6687                    *rate,
6688                    "SYNTHETIC",
6689                ));
6690                // Average rate = 98% of closing (approximation).
6691                // 0.98 = 98/100 = Decimal::new(98, 2)
6692                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6693                rate_table.add_rate(FxRate::new(
6694                    ccy,
6695                    "USD",
6696                    RateType::Average,
6697                    end_date,
6698                    avg,
6699                    "SYNTHETIC",
6700                ));
6701            }
6702
6703            let mut translation_results = Vec::new();
6704            for company in &self.config.companies {
6705                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6706                // to ensure the translation produces non-trivial CTA amounts.
6707                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6708                    .max(rust_decimal::Decimal::from(100_000_u32));
6709
6710                let func_ccy = company
6711                    .functional_currency
6712                    .clone()
6713                    .unwrap_or_else(|| company.currency.clone());
6714
6715                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6716                    &company.code,
6717                    &func_ccy,
6718                    &presentation_currency,
6719                    &ias21_period_label,
6720                    end_date,
6721                    company_revenue,
6722                    &rate_table,
6723                );
6724                translation_results.push(result);
6725            }
6726
6727            snapshot.currency_translation_count = translation_results.len();
6728            snapshot.currency_translation_results = translation_results;
6729        }
6730
6731        stats.revenue_contract_count = snapshot.revenue_contract_count;
6732        stats.impairment_test_count = snapshot.impairment_test_count;
6733        stats.business_combination_count = snapshot.business_combination_count;
6734        stats.ecl_model_count = snapshot.ecl_model_count;
6735        stats.provision_count = snapshot.provision_count;
6736
6737        info!(
6738            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6739            snapshot.revenue_contract_count,
6740            snapshot.impairment_test_count,
6741            snapshot.business_combination_count,
6742            snapshot.ecl_model_count,
6743            snapshot.provision_count,
6744            snapshot.currency_translation_count
6745        );
6746        self.check_resources_with_log("post-accounting-standards")?;
6747
6748        Ok(snapshot)
6749    }
6750
6751    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6752    fn phase_manufacturing(
6753        &mut self,
6754        stats: &mut EnhancedGenerationStatistics,
6755    ) -> SynthResult<ManufacturingSnapshot> {
6756        if !self.phase_config.generate_manufacturing {
6757            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6758            return Ok(ManufacturingSnapshot::default());
6759        }
6760        info!("Phase 18: Generating Manufacturing Data");
6761
6762        let seed = self.seed;
6763        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6764            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6765        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6766        let company_code = self
6767            .config
6768            .companies
6769            .first()
6770            .map(|c| c.code.as_str())
6771            .unwrap_or("1000");
6772
6773        let material_data: Vec<(String, String)> = self
6774            .master_data
6775            .materials
6776            .iter()
6777            .map(|m| (m.material_id.clone(), m.description.clone()))
6778            .collect();
6779
6780        if material_data.is_empty() {
6781            debug!("Phase 18: Skipped (no materials available)");
6782            return Ok(ManufacturingSnapshot::default());
6783        }
6784
6785        let mut snapshot = ManufacturingSnapshot::default();
6786
6787        // Generate production orders
6788        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6789        let production_orders = prod_gen.generate(
6790            company_code,
6791            &material_data,
6792            start_date,
6793            end_date,
6794            &self.config.manufacturing.production_orders,
6795            &self.config.manufacturing.costing,
6796            &self.config.manufacturing.routing,
6797        );
6798        snapshot.production_order_count = production_orders.len();
6799
6800        // Generate quality inspections from production orders
6801        let inspection_data: Vec<(String, String, String)> = production_orders
6802            .iter()
6803            .map(|po| {
6804                (
6805                    po.order_id.clone(),
6806                    po.material_id.clone(),
6807                    po.material_description.clone(),
6808                )
6809            })
6810            .collect();
6811
6812        snapshot.production_orders = production_orders;
6813
6814        if !inspection_data.is_empty() {
6815            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6816            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6817            snapshot.quality_inspection_count = inspections.len();
6818            snapshot.quality_inspections = inspections;
6819        }
6820
6821        // Generate cycle counts (one per month)
6822        let storage_locations: Vec<(String, String)> = material_data
6823            .iter()
6824            .enumerate()
6825            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6826            .collect();
6827
6828        let employee_ids: Vec<String> = self
6829            .master_data
6830            .employees
6831            .iter()
6832            .map(|e| e.employee_id.clone())
6833            .collect();
6834        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6835            .with_employee_pool(employee_ids);
6836        let mut cycle_count_total = 0usize;
6837        for month in 0..self.config.global.period_months {
6838            let count_date = start_date + chrono::Months::new(month);
6839            let items_per_count = storage_locations.len().clamp(10, 50);
6840            let cc = cc_gen.generate(
6841                company_code,
6842                &storage_locations,
6843                count_date,
6844                items_per_count,
6845            );
6846            snapshot.cycle_counts.push(cc);
6847            cycle_count_total += 1;
6848        }
6849        snapshot.cycle_count_count = cycle_count_total;
6850
6851        // Generate BOM components
6852        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6853        let bom_components = bom_gen.generate(company_code, &material_data);
6854        snapshot.bom_component_count = bom_components.len();
6855        snapshot.bom_components = bom_components;
6856
6857        // Generate inventory movements — link GoodsIssue movements to real production order IDs
6858        let currency = self
6859            .config
6860            .companies
6861            .first()
6862            .map(|c| c.currency.as_str())
6863            .unwrap_or("USD");
6864        let production_order_ids: Vec<String> = snapshot
6865            .production_orders
6866            .iter()
6867            .map(|po| po.order_id.clone())
6868            .collect();
6869        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6870        let inventory_movements = inv_mov_gen.generate_with_production_orders(
6871            company_code,
6872            &material_data,
6873            start_date,
6874            end_date,
6875            2,
6876            currency,
6877            &production_order_ids,
6878        );
6879        snapshot.inventory_movement_count = inventory_movements.len();
6880        snapshot.inventory_movements = inventory_movements;
6881
6882        stats.production_order_count = snapshot.production_order_count;
6883        stats.quality_inspection_count = snapshot.quality_inspection_count;
6884        stats.cycle_count_count = snapshot.cycle_count_count;
6885        stats.bom_component_count = snapshot.bom_component_count;
6886        stats.inventory_movement_count = snapshot.inventory_movement_count;
6887
6888        info!(
6889            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6890            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6891            snapshot.bom_component_count, snapshot.inventory_movement_count
6892        );
6893        self.check_resources_with_log("post-manufacturing")?;
6894
6895        Ok(snapshot)
6896    }
6897
6898    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
6899    fn phase_sales_kpi_budgets(
6900        &mut self,
6901        coa: &Arc<ChartOfAccounts>,
6902        financial_reporting: &FinancialReportingSnapshot,
6903        stats: &mut EnhancedGenerationStatistics,
6904    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6905        if !self.phase_config.generate_sales_kpi_budgets {
6906            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6907            return Ok(SalesKpiBudgetsSnapshot::default());
6908        }
6909        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6910
6911        let seed = self.seed;
6912        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6913            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6914        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6915        let company_code = self
6916            .config
6917            .companies
6918            .first()
6919            .map(|c| c.code.as_str())
6920            .unwrap_or("1000");
6921
6922        let mut snapshot = SalesKpiBudgetsSnapshot::default();
6923
6924        // Sales Quotes
6925        if self.config.sales_quotes.enabled {
6926            let customer_data: Vec<(String, String)> = self
6927                .master_data
6928                .customers
6929                .iter()
6930                .map(|c| (c.customer_id.clone(), c.name.clone()))
6931                .collect();
6932            let material_data: Vec<(String, String)> = self
6933                .master_data
6934                .materials
6935                .iter()
6936                .map(|m| (m.material_id.clone(), m.description.clone()))
6937                .collect();
6938
6939            if !customer_data.is_empty() && !material_data.is_empty() {
6940                let employee_ids: Vec<String> = self
6941                    .master_data
6942                    .employees
6943                    .iter()
6944                    .map(|e| e.employee_id.clone())
6945                    .collect();
6946                let customer_ids: Vec<String> = self
6947                    .master_data
6948                    .customers
6949                    .iter()
6950                    .map(|c| c.customer_id.clone())
6951                    .collect();
6952                let company_currency = self
6953                    .config
6954                    .companies
6955                    .first()
6956                    .map(|c| c.currency.as_str())
6957                    .unwrap_or("USD");
6958
6959                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
6960                    .with_pools(employee_ids, customer_ids);
6961                let quotes = quote_gen.generate_with_currency(
6962                    company_code,
6963                    &customer_data,
6964                    &material_data,
6965                    start_date,
6966                    end_date,
6967                    &self.config.sales_quotes,
6968                    company_currency,
6969                );
6970                snapshot.sales_quote_count = quotes.len();
6971                snapshot.sales_quotes = quotes;
6972            }
6973        }
6974
6975        // Management KPIs
6976        if self.config.financial_reporting.management_kpis.enabled {
6977            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
6978            let mut kpis = kpi_gen.generate(
6979                company_code,
6980                start_date,
6981                end_date,
6982                &self.config.financial_reporting.management_kpis,
6983            );
6984
6985            // Override financial KPIs with actual data from financial statements
6986            {
6987                use rust_decimal::Decimal;
6988
6989                if let Some(income_stmt) =
6990                    financial_reporting.financial_statements.iter().find(|fs| {
6991                        fs.statement_type == StatementType::IncomeStatement
6992                            && fs.company_code == company_code
6993                    })
6994                {
6995                    // Extract revenue and COGS from income statement line items
6996                    let total_revenue: Decimal = income_stmt
6997                        .line_items
6998                        .iter()
6999                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7000                        .map(|li| li.amount)
7001                        .sum();
7002                    let total_cogs: Decimal = income_stmt
7003                        .line_items
7004                        .iter()
7005                        .filter(|li| {
7006                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7007                                && !li.is_total
7008                        })
7009                        .map(|li| li.amount.abs())
7010                        .sum();
7011                    let total_opex: Decimal = income_stmt
7012                        .line_items
7013                        .iter()
7014                        .filter(|li| {
7015                            li.section.contains("Expense")
7016                                && !li.is_total
7017                                && !li.section.contains("Cost")
7018                        })
7019                        .map(|li| li.amount.abs())
7020                        .sum();
7021
7022                    if total_revenue > Decimal::ZERO {
7023                        let hundred = Decimal::from(100);
7024                        let gross_margin_pct =
7025                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7026                        let operating_income = total_revenue - total_cogs - total_opex;
7027                        let op_margin_pct =
7028                            (operating_income * hundred / total_revenue).round_dp(2);
7029
7030                        // Override gross margin and operating margin KPIs
7031                        for kpi in &mut kpis {
7032                            if kpi.name == "Gross Margin" {
7033                                kpi.value = gross_margin_pct;
7034                            } else if kpi.name == "Operating Margin" {
7035                                kpi.value = op_margin_pct;
7036                            }
7037                        }
7038                    }
7039                }
7040
7041                // Override Current Ratio from balance sheet
7042                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7043                    fs.statement_type == StatementType::BalanceSheet
7044                        && fs.company_code == company_code
7045                }) {
7046                    let current_assets: Decimal = bs
7047                        .line_items
7048                        .iter()
7049                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7050                        .map(|li| li.amount)
7051                        .sum();
7052                    let current_liabilities: Decimal = bs
7053                        .line_items
7054                        .iter()
7055                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7056                        .map(|li| li.amount.abs())
7057                        .sum();
7058
7059                    if current_liabilities > Decimal::ZERO {
7060                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7061                        for kpi in &mut kpis {
7062                            if kpi.name == "Current Ratio" {
7063                                kpi.value = current_ratio;
7064                            }
7065                        }
7066                    }
7067                }
7068            }
7069
7070            snapshot.kpi_count = kpis.len();
7071            snapshot.kpis = kpis;
7072        }
7073
7074        // Budgets
7075        if self.config.financial_reporting.budgets.enabled {
7076            let account_data: Vec<(String, String)> = coa
7077                .accounts
7078                .iter()
7079                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7080                .collect();
7081
7082            if !account_data.is_empty() {
7083                let fiscal_year = start_date.year() as u32;
7084                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7085                let budget = budget_gen.generate(
7086                    company_code,
7087                    fiscal_year,
7088                    &account_data,
7089                    &self.config.financial_reporting.budgets,
7090                );
7091                snapshot.budget_line_count = budget.line_items.len();
7092                snapshot.budgets.push(budget);
7093            }
7094        }
7095
7096        stats.sales_quote_count = snapshot.sales_quote_count;
7097        stats.kpi_count = snapshot.kpi_count;
7098        stats.budget_line_count = snapshot.budget_line_count;
7099
7100        info!(
7101            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7102            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7103        );
7104        self.check_resources_with_log("post-sales-kpi-budgets")?;
7105
7106        Ok(snapshot)
7107    }
7108
7109    /// Compute pre-tax income for a single company from actual journal entries.
7110    ///
7111    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7112    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7113    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7114    /// and the period-close engine so that all three use a consistent definition.
7115    fn compute_pre_tax_income(
7116        company_code: &str,
7117        journal_entries: &[JournalEntry],
7118    ) -> rust_decimal::Decimal {
7119        use datasynth_core::accounts::AccountCategory;
7120        use rust_decimal::Decimal;
7121
7122        let mut total_revenue = Decimal::ZERO;
7123        let mut total_expenses = Decimal::ZERO;
7124
7125        for je in journal_entries {
7126            if je.header.company_code != company_code {
7127                continue;
7128            }
7129            for line in &je.lines {
7130                let cat = AccountCategory::from_account(&line.gl_account);
7131                match cat {
7132                    AccountCategory::Revenue => {
7133                        total_revenue += line.credit_amount - line.debit_amount;
7134                    }
7135                    AccountCategory::Cogs
7136                    | AccountCategory::OperatingExpense
7137                    | AccountCategory::OtherIncomeExpense => {
7138                        total_expenses += line.debit_amount - line.credit_amount;
7139                    }
7140                    _ => {}
7141                }
7142            }
7143        }
7144
7145        let pti = (total_revenue - total_expenses).round_dp(2);
7146        if pti == rust_decimal::Decimal::ZERO {
7147            // No income statement activity yet — fall back to a synthetic value so the
7148            // tax provision generator can still produce meaningful output.
7149            rust_decimal::Decimal::from(1_000_000u32)
7150        } else {
7151            pti
7152        }
7153    }
7154
7155    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7156    fn phase_tax_generation(
7157        &mut self,
7158        document_flows: &DocumentFlowSnapshot,
7159        journal_entries: &[JournalEntry],
7160        stats: &mut EnhancedGenerationStatistics,
7161    ) -> SynthResult<TaxSnapshot> {
7162        if !self.phase_config.generate_tax {
7163            debug!("Phase 20: Skipped (tax generation disabled)");
7164            return Ok(TaxSnapshot::default());
7165        }
7166        info!("Phase 20: Generating Tax Data");
7167
7168        let seed = self.seed;
7169        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7170            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7171        let fiscal_year = start_date.year();
7172        let company_code = self
7173            .config
7174            .companies
7175            .first()
7176            .map(|c| c.code.as_str())
7177            .unwrap_or("1000");
7178
7179        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7180            seed + 370,
7181            self.config.tax.clone(),
7182        );
7183
7184        let pack = self.primary_pack().clone();
7185        let (jurisdictions, codes) =
7186            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7187
7188        // Generate tax provisions for each company
7189        let mut provisions = Vec::new();
7190        if self.config.tax.provisions.enabled {
7191            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7192            for company in &self.config.companies {
7193                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7194                let statutory_rate = rust_decimal::Decimal::new(
7195                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7196                    2,
7197                );
7198                let provision = provision_gen.generate(
7199                    &company.code,
7200                    start_date,
7201                    pre_tax_income,
7202                    statutory_rate,
7203                );
7204                provisions.push(provision);
7205            }
7206        }
7207
7208        // Generate tax lines from document invoices
7209        let mut tax_lines = Vec::new();
7210        if !codes.is_empty() {
7211            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7212                datasynth_generators::TaxLineGeneratorConfig::default(),
7213                codes.clone(),
7214                seed + 372,
7215            );
7216
7217            // Tax lines from vendor invoices (input tax)
7218            // Use the first company's country as buyer country
7219            let buyer_country = self
7220                .config
7221                .companies
7222                .first()
7223                .map(|c| c.country.as_str())
7224                .unwrap_or("US");
7225            for vi in &document_flows.vendor_invoices {
7226                let lines = tax_line_gen.generate_for_document(
7227                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7228                    &vi.header.document_id,
7229                    buyer_country, // seller approx same country
7230                    buyer_country,
7231                    vi.payable_amount,
7232                    vi.header.document_date,
7233                    None,
7234                );
7235                tax_lines.extend(lines);
7236            }
7237
7238            // Tax lines from customer invoices (output tax)
7239            for ci in &document_flows.customer_invoices {
7240                let lines = tax_line_gen.generate_for_document(
7241                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7242                    &ci.header.document_id,
7243                    buyer_country, // seller is the company
7244                    buyer_country,
7245                    ci.total_gross_amount,
7246                    ci.header.document_date,
7247                    None,
7248                );
7249                tax_lines.extend(lines);
7250            }
7251        }
7252
7253        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7254        let deferred_tax = {
7255            let companies: Vec<(&str, &str)> = self
7256                .config
7257                .companies
7258                .iter()
7259                .map(|c| (c.code.as_str(), c.country.as_str()))
7260                .collect();
7261            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7262            deferred_gen.generate(&companies, start_date, journal_entries)
7263        };
7264
7265        // Build a document_id → posting_date map so each tax JE uses its
7266        // source document's date rather than a blanket period-end date.
7267        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7268            std::collections::HashMap::new();
7269        for vi in &document_flows.vendor_invoices {
7270            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7271        }
7272        for ci in &document_flows.customer_invoices {
7273            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7274        }
7275
7276        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7277        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7278        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7279            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7280                &tax_lines,
7281                company_code,
7282                &doc_dates,
7283                end_date,
7284            );
7285            debug!("Generated {} tax posting JEs", jes.len());
7286            jes
7287        } else {
7288            Vec::new()
7289        };
7290
7291        let snapshot = TaxSnapshot {
7292            jurisdiction_count: jurisdictions.len(),
7293            code_count: codes.len(),
7294            jurisdictions,
7295            codes,
7296            tax_provisions: provisions,
7297            tax_lines,
7298            tax_returns: Vec::new(),
7299            withholding_records: Vec::new(),
7300            tax_anomaly_labels: Vec::new(),
7301            deferred_tax,
7302            tax_posting_journal_entries,
7303        };
7304
7305        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7306        stats.tax_code_count = snapshot.code_count;
7307        stats.tax_provision_count = snapshot.tax_provisions.len();
7308        stats.tax_line_count = snapshot.tax_lines.len();
7309
7310        info!(
7311            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7312            snapshot.jurisdiction_count,
7313            snapshot.code_count,
7314            snapshot.tax_provisions.len(),
7315            snapshot.deferred_tax.temporary_differences.len(),
7316            snapshot.deferred_tax.journal_entries.len(),
7317            snapshot.tax_posting_journal_entries.len(),
7318        );
7319        self.check_resources_with_log("post-tax")?;
7320
7321        Ok(snapshot)
7322    }
7323
7324    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7325    fn phase_esg_generation(
7326        &mut self,
7327        document_flows: &DocumentFlowSnapshot,
7328        manufacturing: &ManufacturingSnapshot,
7329        stats: &mut EnhancedGenerationStatistics,
7330    ) -> SynthResult<EsgSnapshot> {
7331        if !self.phase_config.generate_esg {
7332            debug!("Phase 21: Skipped (ESG generation disabled)");
7333            return Ok(EsgSnapshot::default());
7334        }
7335        let degradation = self.check_resources()?;
7336        if degradation >= DegradationLevel::Reduced {
7337            debug!(
7338                "Phase skipped due to resource pressure (degradation: {:?})",
7339                degradation
7340            );
7341            return Ok(EsgSnapshot::default());
7342        }
7343        info!("Phase 21: Generating ESG Data");
7344
7345        let seed = self.seed;
7346        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7347            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7348        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7349        let entity_id = self
7350            .config
7351            .companies
7352            .first()
7353            .map(|c| c.code.as_str())
7354            .unwrap_or("1000");
7355
7356        let esg_cfg = &self.config.esg;
7357        let mut snapshot = EsgSnapshot::default();
7358
7359        // Energy consumption (feeds into scope 1 & 2 emissions)
7360        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7361            esg_cfg.environmental.energy.clone(),
7362            seed + 80,
7363        );
7364        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7365
7366        // Water usage
7367        let facility_count = esg_cfg.environmental.energy.facility_count;
7368        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7369        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7370
7371        // Waste
7372        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7373            seed + 82,
7374            esg_cfg.environmental.waste.diversion_target,
7375            facility_count,
7376        );
7377        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7378
7379        // Emissions (scope 1, 2, 3)
7380        let mut emission_gen =
7381            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7382
7383        // Build EnergyInput from energy_records
7384        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7385            .iter()
7386            .map(|e| datasynth_generators::EnergyInput {
7387                facility_id: e.facility_id.clone(),
7388                energy_type: match e.energy_source {
7389                    EnergySourceType::NaturalGas => {
7390                        datasynth_generators::EnergyInputType::NaturalGas
7391                    }
7392                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7393                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7394                    _ => datasynth_generators::EnergyInputType::Electricity,
7395                },
7396                consumption_kwh: e.consumption_kwh,
7397                period: e.period,
7398            })
7399            .collect();
7400
7401        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7402        if !manufacturing.production_orders.is_empty() {
7403            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7404                &manufacturing.production_orders,
7405                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7406                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7407            );
7408            if !mfg_energy.is_empty() {
7409                info!(
7410                    "ESG: {} energy inputs derived from {} production orders",
7411                    mfg_energy.len(),
7412                    manufacturing.production_orders.len(),
7413                );
7414                energy_inputs.extend(mfg_energy);
7415            }
7416        }
7417
7418        let mut emissions = Vec::new();
7419        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7420        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7421
7422        // Scope 3: use vendor spend data from actual payments
7423        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7424            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7425            for payment in &document_flows.payments {
7426                if payment.is_vendor {
7427                    *totals
7428                        .entry(payment.business_partner_id.clone())
7429                        .or_default() += payment.amount;
7430                }
7431            }
7432            totals
7433        };
7434        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7435            .master_data
7436            .vendors
7437            .iter()
7438            .map(|v| {
7439                let spend = vendor_payment_totals
7440                    .get(&v.vendor_id)
7441                    .copied()
7442                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7443                datasynth_generators::VendorSpendInput {
7444                    vendor_id: v.vendor_id.clone(),
7445                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7446                    spend,
7447                    country: v.country.clone(),
7448                }
7449            })
7450            .collect();
7451        if !vendor_spend.is_empty() {
7452            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7453                entity_id,
7454                &vendor_spend,
7455                start_date,
7456                end_date,
7457            ));
7458        }
7459
7460        // Business travel & commuting (scope 3)
7461        let headcount = self.master_data.employees.len() as u32;
7462        if headcount > 0 {
7463            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7464            emissions.extend(emission_gen.generate_scope3_business_travel(
7465                entity_id,
7466                travel_spend,
7467                start_date,
7468            ));
7469            emissions
7470                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7471        }
7472
7473        snapshot.emission_count = emissions.len();
7474        snapshot.emissions = emissions;
7475        snapshot.energy = energy_records;
7476
7477        // Social: Workforce diversity, pay equity, safety
7478        let mut workforce_gen =
7479            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7480        let total_headcount = headcount.max(100);
7481        snapshot.diversity =
7482            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7483        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7484
7485        // v2.4: Derive additional workforce diversity metrics from actual employee data
7486        if !self.master_data.employees.is_empty() {
7487            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7488                entity_id,
7489                &self.master_data.employees,
7490                end_date,
7491            );
7492            if !hr_diversity.is_empty() {
7493                info!(
7494                    "ESG: {} diversity metrics derived from {} actual employees",
7495                    hr_diversity.len(),
7496                    self.master_data.employees.len(),
7497                );
7498                snapshot.diversity.extend(hr_diversity);
7499            }
7500        }
7501
7502        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7503            entity_id,
7504            facility_count,
7505            start_date,
7506            end_date,
7507        );
7508
7509        // Compute safety metrics
7510        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7511        let safety_metric = workforce_gen.compute_safety_metrics(
7512            entity_id,
7513            &snapshot.safety_incidents,
7514            total_hours,
7515            start_date,
7516        );
7517        snapshot.safety_metrics = vec![safety_metric];
7518
7519        // Governance
7520        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7521            seed + 85,
7522            esg_cfg.governance.board_size,
7523            esg_cfg.governance.independence_target,
7524        );
7525        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7526
7527        // Supplier ESG assessments
7528        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7529            esg_cfg.supply_chain_esg.clone(),
7530            seed + 86,
7531        );
7532        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7533            .master_data
7534            .vendors
7535            .iter()
7536            .map(|v| datasynth_generators::VendorInput {
7537                vendor_id: v.vendor_id.clone(),
7538                country: v.country.clone(),
7539                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7540                quality_score: None,
7541            })
7542            .collect();
7543        snapshot.supplier_assessments =
7544            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7545
7546        // Disclosures
7547        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7548            seed + 87,
7549            esg_cfg.reporting.clone(),
7550            esg_cfg.climate_scenarios.clone(),
7551        );
7552        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7553        snapshot.disclosures = disclosure_gen.generate_disclosures(
7554            entity_id,
7555            &snapshot.materiality,
7556            start_date,
7557            end_date,
7558        );
7559        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7560        snapshot.disclosure_count = snapshot.disclosures.len();
7561
7562        // Anomaly injection
7563        if esg_cfg.anomaly_rate > 0.0 {
7564            let mut anomaly_injector =
7565                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7566            let mut labels = Vec::new();
7567            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7568            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7569            labels.extend(
7570                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7571            );
7572            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7573            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7574            snapshot.anomaly_labels = labels;
7575        }
7576
7577        stats.esg_emission_count = snapshot.emission_count;
7578        stats.esg_disclosure_count = snapshot.disclosure_count;
7579
7580        info!(
7581            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7582            snapshot.emission_count,
7583            snapshot.disclosure_count,
7584            snapshot.supplier_assessments.len()
7585        );
7586        self.check_resources_with_log("post-esg")?;
7587
7588        Ok(snapshot)
7589    }
7590
7591    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7592    fn phase_treasury_data(
7593        &mut self,
7594        document_flows: &DocumentFlowSnapshot,
7595        subledger: &SubledgerSnapshot,
7596        intercompany: &IntercompanySnapshot,
7597        stats: &mut EnhancedGenerationStatistics,
7598    ) -> SynthResult<TreasurySnapshot> {
7599        if !self.phase_config.generate_treasury {
7600            debug!("Phase 22: Skipped (treasury generation disabled)");
7601            return Ok(TreasurySnapshot::default());
7602        }
7603        let degradation = self.check_resources()?;
7604        if degradation >= DegradationLevel::Reduced {
7605            debug!(
7606                "Phase skipped due to resource pressure (degradation: {:?})",
7607                degradation
7608            );
7609            return Ok(TreasurySnapshot::default());
7610        }
7611        info!("Phase 22: Generating Treasury Data");
7612
7613        let seed = self.seed;
7614        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7615            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7616        let currency = self
7617            .config
7618            .companies
7619            .first()
7620            .map(|c| c.currency.as_str())
7621            .unwrap_or("USD");
7622        let entity_id = self
7623            .config
7624            .companies
7625            .first()
7626            .map(|c| c.code.as_str())
7627            .unwrap_or("1000");
7628
7629        let mut snapshot = TreasurySnapshot::default();
7630
7631        // Generate debt instruments
7632        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7633            self.config.treasury.debt.clone(),
7634            seed + 90,
7635        );
7636        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7637
7638        // Generate hedging instruments (IR swaps for floating-rate debt)
7639        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7640            self.config.treasury.hedging.clone(),
7641            seed + 91,
7642        );
7643        for debt in &snapshot.debt_instruments {
7644            if debt.rate_type == InterestRateType::Variable {
7645                let swap = hedge_gen.generate_ir_swap(
7646                    currency,
7647                    debt.principal,
7648                    debt.origination_date,
7649                    debt.maturity_date,
7650                );
7651                snapshot.hedging_instruments.push(swap);
7652            }
7653        }
7654
7655        // Build FX exposures from foreign-currency payments and generate
7656        // FX forwards + hedge relationship designations via generate() API.
7657        {
7658            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7659            for payment in &document_flows.payments {
7660                if payment.currency != currency {
7661                    let entry = fx_map
7662                        .entry(payment.currency.clone())
7663                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7664                    entry.0 += payment.amount;
7665                    // Use the latest settlement date among grouped payments
7666                    if payment.header.document_date > entry.1 {
7667                        entry.1 = payment.header.document_date;
7668                    }
7669                }
7670            }
7671            if !fx_map.is_empty() {
7672                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7673                    .into_iter()
7674                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7675                        datasynth_generators::treasury::FxExposure {
7676                            currency_pair: format!("{foreign_ccy}/{currency}"),
7677                            foreign_currency: foreign_ccy,
7678                            net_amount,
7679                            settlement_date,
7680                            description: "AP payment FX exposure".to_string(),
7681                        }
7682                    })
7683                    .collect();
7684                let (fx_instruments, fx_relationships) =
7685                    hedge_gen.generate(start_date, &fx_exposures);
7686                snapshot.hedging_instruments.extend(fx_instruments);
7687                snapshot.hedge_relationships.extend(fx_relationships);
7688            }
7689        }
7690
7691        // Inject anomalies if configured
7692        if self.config.treasury.anomaly_rate > 0.0 {
7693            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7694                seed + 92,
7695                self.config.treasury.anomaly_rate,
7696            );
7697            let mut labels = Vec::new();
7698            labels.extend(
7699                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7700            );
7701            snapshot.treasury_anomaly_labels = labels;
7702        }
7703
7704        // Generate cash positions from payment flows
7705        if self.config.treasury.cash_positioning.enabled {
7706            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7707
7708            // AP payments as outflows
7709            for payment in &document_flows.payments {
7710                cash_flows.push(datasynth_generators::treasury::CashFlow {
7711                    date: payment.header.document_date,
7712                    account_id: format!("{entity_id}-MAIN"),
7713                    amount: payment.amount,
7714                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7715                });
7716            }
7717
7718            // Customer receipts (from O2C chains) as inflows
7719            for chain in &document_flows.o2c_chains {
7720                if let Some(ref receipt) = chain.customer_receipt {
7721                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7722                        date: receipt.header.document_date,
7723                        account_id: format!("{entity_id}-MAIN"),
7724                        amount: receipt.amount,
7725                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7726                    });
7727                }
7728                // Remainder receipts (follow-up to partial payments)
7729                for receipt in &chain.remainder_receipts {
7730                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7731                        date: receipt.header.document_date,
7732                        account_id: format!("{entity_id}-MAIN"),
7733                        amount: receipt.amount,
7734                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7735                    });
7736                }
7737            }
7738
7739            if !cash_flows.is_empty() {
7740                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7741                    self.config.treasury.cash_positioning.clone(),
7742                    seed + 93,
7743                );
7744                let account_id = format!("{entity_id}-MAIN");
7745                snapshot.cash_positions = cash_gen.generate(
7746                    entity_id,
7747                    &account_id,
7748                    currency,
7749                    &cash_flows,
7750                    start_date,
7751                    start_date + chrono::Months::new(self.config.global.period_months),
7752                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7753                );
7754            }
7755        }
7756
7757        // Generate cash forecasts from AR/AP aging
7758        if self.config.treasury.cash_forecasting.enabled {
7759            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7760
7761            // Build AR aging items from subledger AR invoices
7762            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7763                .ar_invoices
7764                .iter()
7765                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7766                .map(|inv| {
7767                    let days_past_due = if inv.due_date < end_date {
7768                        (end_date - inv.due_date).num_days().max(0) as u32
7769                    } else {
7770                        0
7771                    };
7772                    datasynth_generators::treasury::ArAgingItem {
7773                        expected_date: inv.due_date,
7774                        amount: inv.amount_remaining,
7775                        days_past_due,
7776                        document_id: inv.invoice_number.clone(),
7777                    }
7778                })
7779                .collect();
7780
7781            // Build AP aging items from subledger AP invoices
7782            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7783                .ap_invoices
7784                .iter()
7785                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7786                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7787                    payment_date: inv.due_date,
7788                    amount: inv.amount_remaining,
7789                    document_id: inv.invoice_number.clone(),
7790                })
7791                .collect();
7792
7793            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7794                self.config.treasury.cash_forecasting.clone(),
7795                seed + 94,
7796            );
7797            let forecast = forecast_gen.generate(
7798                entity_id,
7799                currency,
7800                end_date,
7801                &ar_items,
7802                &ap_items,
7803                &[], // scheduled disbursements - empty for now
7804            );
7805            snapshot.cash_forecasts.push(forecast);
7806        }
7807
7808        // Generate cash pools and sweeps
7809        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7810            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7811            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7812                self.config.treasury.cash_pooling.clone(),
7813                seed + 95,
7814            );
7815
7816            // Create a pool from available accounts
7817            let account_ids: Vec<String> = snapshot
7818                .cash_positions
7819                .iter()
7820                .map(|cp| cp.bank_account_id.clone())
7821                .collect::<std::collections::HashSet<_>>()
7822                .into_iter()
7823                .collect();
7824
7825            if let Some(pool) =
7826                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7827            {
7828                // Generate sweeps - build participant balances from last cash position per account
7829                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7830                for cp in &snapshot.cash_positions {
7831                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7832                }
7833
7834                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7835                    latest_balances
7836                        .into_iter()
7837                        .filter(|(id, _)| pool.participant_accounts.contains(id))
7838                        .map(
7839                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
7840                                account_id: id,
7841                                balance,
7842                            },
7843                        )
7844                        .collect();
7845
7846                let sweeps =
7847                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7848                snapshot.cash_pool_sweeps = sweeps;
7849                snapshot.cash_pools.push(pool);
7850            }
7851        }
7852
7853        // Generate bank guarantees
7854        if self.config.treasury.bank_guarantees.enabled {
7855            let vendor_names: Vec<String> = self
7856                .master_data
7857                .vendors
7858                .iter()
7859                .map(|v| v.name.clone())
7860                .collect();
7861            if !vendor_names.is_empty() {
7862                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7863                    self.config.treasury.bank_guarantees.clone(),
7864                    seed + 96,
7865                );
7866                snapshot.bank_guarantees =
7867                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7868            }
7869        }
7870
7871        // Generate netting runs from intercompany matched pairs
7872        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7873            let entity_ids: Vec<String> = self
7874                .config
7875                .companies
7876                .iter()
7877                .map(|c| c.code.clone())
7878                .collect();
7879            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7880                .matched_pairs
7881                .iter()
7882                .map(|mp| {
7883                    (
7884                        mp.seller_company.clone(),
7885                        mp.buyer_company.clone(),
7886                        mp.amount,
7887                    )
7888                })
7889                .collect();
7890            if entity_ids.len() >= 2 {
7891                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7892                    self.config.treasury.netting.clone(),
7893                    seed + 97,
7894                );
7895                snapshot.netting_runs = netting_gen.generate(
7896                    &entity_ids,
7897                    currency,
7898                    start_date,
7899                    self.config.global.period_months,
7900                    &ic_amounts,
7901                );
7902            }
7903        }
7904
7905        // Generate treasury journal entries from the instruments we just created.
7906        {
7907            use datasynth_generators::treasury::TreasuryAccounting;
7908
7909            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7910            let mut treasury_jes = Vec::new();
7911
7912            // Debt interest accrual JEs
7913            if !snapshot.debt_instruments.is_empty() {
7914                let debt_jes =
7915                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7916                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7917                treasury_jes.extend(debt_jes);
7918            }
7919
7920            // Hedge mark-to-market JEs
7921            if !snapshot.hedging_instruments.is_empty() {
7922                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
7923                    &snapshot.hedging_instruments,
7924                    &snapshot.hedge_relationships,
7925                    end_date,
7926                    entity_id,
7927                );
7928                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
7929                treasury_jes.extend(hedge_jes);
7930            }
7931
7932            // Cash pool sweep JEs
7933            if !snapshot.cash_pool_sweeps.is_empty() {
7934                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
7935                    &snapshot.cash_pool_sweeps,
7936                    entity_id,
7937                );
7938                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
7939                treasury_jes.extend(sweep_jes);
7940            }
7941
7942            if !treasury_jes.is_empty() {
7943                debug!("Total treasury journal entries: {}", treasury_jes.len());
7944            }
7945            snapshot.journal_entries = treasury_jes;
7946        }
7947
7948        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
7949        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
7950        stats.cash_position_count = snapshot.cash_positions.len();
7951        stats.cash_forecast_count = snapshot.cash_forecasts.len();
7952        stats.cash_pool_count = snapshot.cash_pools.len();
7953
7954        info!(
7955            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
7956            snapshot.debt_instruments.len(),
7957            snapshot.hedging_instruments.len(),
7958            snapshot.cash_positions.len(),
7959            snapshot.cash_forecasts.len(),
7960            snapshot.cash_pools.len(),
7961            snapshot.bank_guarantees.len(),
7962            snapshot.netting_runs.len(),
7963            snapshot.journal_entries.len(),
7964        );
7965        self.check_resources_with_log("post-treasury")?;
7966
7967        Ok(snapshot)
7968    }
7969
7970    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
7971    fn phase_project_accounting(
7972        &mut self,
7973        document_flows: &DocumentFlowSnapshot,
7974        hr: &HrSnapshot,
7975        stats: &mut EnhancedGenerationStatistics,
7976    ) -> SynthResult<ProjectAccountingSnapshot> {
7977        if !self.phase_config.generate_project_accounting {
7978            debug!("Phase 23: Skipped (project accounting disabled)");
7979            return Ok(ProjectAccountingSnapshot::default());
7980        }
7981        let degradation = self.check_resources()?;
7982        if degradation >= DegradationLevel::Reduced {
7983            debug!(
7984                "Phase skipped due to resource pressure (degradation: {:?})",
7985                degradation
7986            );
7987            return Ok(ProjectAccountingSnapshot::default());
7988        }
7989        info!("Phase 23: Generating Project Accounting Data");
7990
7991        let seed = self.seed;
7992        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7993            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7994        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7995        let company_code = self
7996            .config
7997            .companies
7998            .first()
7999            .map(|c| c.code.as_str())
8000            .unwrap_or("1000");
8001
8002        let mut snapshot = ProjectAccountingSnapshot::default();
8003
8004        // Generate projects with WBS hierarchies
8005        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8006            self.config.project_accounting.clone(),
8007            seed + 95,
8008        );
8009        let pool = project_gen.generate(company_code, start_date, end_date);
8010        snapshot.projects = pool.projects.clone();
8011
8012        // Link source documents to projects for cost allocation
8013        {
8014            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8015                Vec::new();
8016
8017            // Time entries
8018            for te in &hr.time_entries {
8019                let total_hours = te.hours_regular + te.hours_overtime;
8020                if total_hours > 0.0 {
8021                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8022                        id: te.entry_id.clone(),
8023                        entity_id: company_code.to_string(),
8024                        date: te.date,
8025                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8026                            .unwrap_or(rust_decimal::Decimal::ZERO),
8027                        source_type: CostSourceType::TimeEntry,
8028                        hours: Some(
8029                            rust_decimal::Decimal::from_f64_retain(total_hours)
8030                                .unwrap_or(rust_decimal::Decimal::ZERO),
8031                        ),
8032                    });
8033                }
8034            }
8035
8036            // Expense reports
8037            for er in &hr.expense_reports {
8038                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8039                    id: er.report_id.clone(),
8040                    entity_id: company_code.to_string(),
8041                    date: er.submission_date,
8042                    amount: er.total_amount,
8043                    source_type: CostSourceType::ExpenseReport,
8044                    hours: None,
8045                });
8046            }
8047
8048            // Purchase orders
8049            for po in &document_flows.purchase_orders {
8050                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8051                    id: po.header.document_id.clone(),
8052                    entity_id: company_code.to_string(),
8053                    date: po.header.document_date,
8054                    amount: po.total_net_amount,
8055                    source_type: CostSourceType::PurchaseOrder,
8056                    hours: None,
8057                });
8058            }
8059
8060            // Vendor invoices
8061            for vi in &document_flows.vendor_invoices {
8062                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8063                    id: vi.header.document_id.clone(),
8064                    entity_id: company_code.to_string(),
8065                    date: vi.header.document_date,
8066                    amount: vi.payable_amount,
8067                    source_type: CostSourceType::VendorInvoice,
8068                    hours: None,
8069                });
8070            }
8071
8072            if !source_docs.is_empty() && !pool.projects.is_empty() {
8073                let mut cost_gen =
8074                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8075                        self.config.project_accounting.cost_allocation.clone(),
8076                        seed + 99,
8077                    );
8078                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8079            }
8080        }
8081
8082        // Generate change orders
8083        if self.config.project_accounting.change_orders.enabled {
8084            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8085                self.config.project_accounting.change_orders.clone(),
8086                seed + 96,
8087            );
8088            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8089        }
8090
8091        // Generate milestones
8092        if self.config.project_accounting.milestones.enabled {
8093            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8094                self.config.project_accounting.milestones.clone(),
8095                seed + 97,
8096            );
8097            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8098        }
8099
8100        // Generate earned value metrics (needs cost lines, so only if we have projects)
8101        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8102            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8103                self.config.project_accounting.earned_value.clone(),
8104                seed + 98,
8105            );
8106            snapshot.earned_value_metrics =
8107                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8108        }
8109
8110        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8111        if self.config.project_accounting.revenue_recognition.enabled
8112            && !snapshot.projects.is_empty()
8113            && !snapshot.cost_lines.is_empty()
8114        {
8115            use datasynth_generators::project_accounting::RevenueGenerator;
8116            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8117            let avg_contract_value =
8118                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8119                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8120
8121            // Build contract value tuples: only customer-type projects get revenue recognition.
8122            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8123            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8124                snapshot
8125                    .projects
8126                    .iter()
8127                    .filter(|p| {
8128                        matches!(
8129                            p.project_type,
8130                            datasynth_core::models::ProjectType::Customer
8131                        )
8132                    })
8133                    .map(|p| {
8134                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8135                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8136                        // budget × 1.25 → contract value
8137                        } else {
8138                            avg_contract_value
8139                        };
8140                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8141                        (p.project_id.clone(), cv, etc)
8142                    })
8143                    .collect();
8144
8145            if !contract_values.is_empty() {
8146                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8147                snapshot.revenue_records = rev_gen.generate(
8148                    &snapshot.projects,
8149                    &snapshot.cost_lines,
8150                    &contract_values,
8151                    start_date,
8152                    end_date,
8153                );
8154                debug!(
8155                    "Generated {} revenue recognition records for {} customer projects",
8156                    snapshot.revenue_records.len(),
8157                    contract_values.len()
8158                );
8159            }
8160        }
8161
8162        stats.project_count = snapshot.projects.len();
8163        stats.project_change_order_count = snapshot.change_orders.len();
8164        stats.project_cost_line_count = snapshot.cost_lines.len();
8165
8166        info!(
8167            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8168            snapshot.projects.len(),
8169            snapshot.change_orders.len(),
8170            snapshot.milestones.len(),
8171            snapshot.earned_value_metrics.len()
8172        );
8173        self.check_resources_with_log("post-project-accounting")?;
8174
8175        Ok(snapshot)
8176    }
8177
8178    /// Phase 24: Generate process evolution and organizational events.
8179    fn phase_evolution_events(
8180        &mut self,
8181        stats: &mut EnhancedGenerationStatistics,
8182    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8183        if !self.phase_config.generate_evolution_events {
8184            debug!("Phase 24: Skipped (evolution events disabled)");
8185            return Ok((Vec::new(), Vec::new()));
8186        }
8187        info!("Phase 24: Generating Process Evolution + Organizational Events");
8188
8189        let seed = self.seed;
8190        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8191            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8192        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8193
8194        // Process evolution events
8195        let mut proc_gen =
8196            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8197                seed + 100,
8198            );
8199        let process_events = proc_gen.generate_events(start_date, end_date);
8200
8201        // Organizational events
8202        let company_codes: Vec<String> = self
8203            .config
8204            .companies
8205            .iter()
8206            .map(|c| c.code.clone())
8207            .collect();
8208        let mut org_gen =
8209            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8210                seed + 101,
8211            );
8212        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8213
8214        stats.process_evolution_event_count = process_events.len();
8215        stats.organizational_event_count = org_events.len();
8216
8217        info!(
8218            "Evolution events generated: {} process evolution, {} organizational",
8219            process_events.len(),
8220            org_events.len()
8221        );
8222        self.check_resources_with_log("post-evolution-events")?;
8223
8224        Ok((process_events, org_events))
8225    }
8226
8227    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8228    /// data recovery, and regulatory changes).
8229    fn phase_disruption_events(
8230        &self,
8231        stats: &mut EnhancedGenerationStatistics,
8232    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8233        if !self.config.organizational_events.enabled {
8234            debug!("Phase 24b: Skipped (organizational events disabled)");
8235            return Ok(Vec::new());
8236        }
8237        info!("Phase 24b: Generating Disruption Events");
8238
8239        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8240            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8241        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8242
8243        let company_codes: Vec<String> = self
8244            .config
8245            .companies
8246            .iter()
8247            .map(|c| c.code.clone())
8248            .collect();
8249
8250        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8251        let events = gen.generate(start_date, end_date, &company_codes);
8252
8253        stats.disruption_event_count = events.len();
8254        info!("Disruption events generated: {} events", events.len());
8255        self.check_resources_with_log("post-disruption-events")?;
8256
8257        Ok(events)
8258    }
8259
8260    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8261    ///
8262    /// Produces paired examples where each pair contains the original clean JE
8263    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8264    /// split transaction). Useful for training anomaly detection models with
8265    /// known ground truth.
8266    fn phase_counterfactuals(
8267        &self,
8268        journal_entries: &[JournalEntry],
8269        stats: &mut EnhancedGenerationStatistics,
8270    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8271        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8272            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8273            return Ok(Vec::new());
8274        }
8275        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8276
8277        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8278
8279        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8280
8281        // Rotating set of specs to produce diverse mutation types
8282        let specs = [
8283            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8284            CounterfactualSpec::ShiftDate { days: -14 },
8285            CounterfactualSpec::SelfApprove,
8286            CounterfactualSpec::SplitTransaction { split_count: 3 },
8287        ];
8288
8289        let pairs: Vec<_> = journal_entries
8290            .iter()
8291            .enumerate()
8292            .map(|(i, je)| {
8293                let spec = &specs[i % specs.len()];
8294                gen.generate(je, spec)
8295            })
8296            .collect();
8297
8298        stats.counterfactual_pair_count = pairs.len();
8299        info!(
8300            "Counterfactual pairs generated: {} pairs from {} journal entries",
8301            pairs.len(),
8302            journal_entries.len()
8303        );
8304        self.check_resources_with_log("post-counterfactuals")?;
8305
8306        Ok(pairs)
8307    }
8308
8309    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8310    ///
8311    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8312    /// fraudulent, then generates probabilistic red flags on all chain documents.
8313    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8314    /// to produce realistic ML training data.
8315    fn phase_red_flags(
8316        &self,
8317        anomaly_labels: &AnomalyLabels,
8318        document_flows: &DocumentFlowSnapshot,
8319        stats: &mut EnhancedGenerationStatistics,
8320    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8321        if !self.config.fraud.enabled {
8322            debug!("Phase 26: Skipped (fraud generation disabled)");
8323            return Ok(Vec::new());
8324        }
8325        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8326
8327        use datasynth_generators::fraud::RedFlagGenerator;
8328
8329        let generator = RedFlagGenerator::new();
8330        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8331
8332        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8333        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8334            .labels
8335            .iter()
8336            .filter(|label| label.anomaly_type.is_intentional())
8337            .map(|label| label.document_id.as_str())
8338            .collect();
8339
8340        let mut flags = Vec::new();
8341
8342        // Iterate P2P chains: use the purchase order document ID as the chain key.
8343        for chain in &document_flows.p2p_chains {
8344            let doc_id = &chain.purchase_order.header.document_id;
8345            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8346            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8347        }
8348
8349        // Iterate O2C chains: use the sales order document ID as the chain key.
8350        for chain in &document_flows.o2c_chains {
8351            let doc_id = &chain.sales_order.header.document_id;
8352            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8353            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8354        }
8355
8356        stats.red_flag_count = flags.len();
8357        info!(
8358            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8359            flags.len(),
8360            document_flows.p2p_chains.len(),
8361            document_flows.o2c_chains.len(),
8362            fraud_doc_ids.len()
8363        );
8364        self.check_resources_with_log("post-red-flags")?;
8365
8366        Ok(flags)
8367    }
8368
8369    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8370    ///
8371    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8372    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8373    /// advance them over the simulation period.
8374    fn phase_collusion_rings(
8375        &mut self,
8376        stats: &mut EnhancedGenerationStatistics,
8377    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8378        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8379            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8380            return Ok(Vec::new());
8381        }
8382        info!("Phase 26b: Generating Collusion Rings");
8383
8384        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8385            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8386        let months = self.config.global.period_months;
8387
8388        let employee_ids: Vec<String> = self
8389            .master_data
8390            .employees
8391            .iter()
8392            .map(|e| e.employee_id.clone())
8393            .collect();
8394        let vendor_ids: Vec<String> = self
8395            .master_data
8396            .vendors
8397            .iter()
8398            .map(|v| v.vendor_id.clone())
8399            .collect();
8400
8401        let mut generator =
8402            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8403        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8404
8405        stats.collusion_ring_count = rings.len();
8406        info!(
8407            "Collusion rings generated: {} rings, total members: {}",
8408            rings.len(),
8409            rings
8410                .iter()
8411                .map(datasynth_generators::fraud::CollusionRing::size)
8412                .sum::<usize>()
8413        );
8414        self.check_resources_with_log("post-collusion-rings")?;
8415
8416        Ok(rings)
8417    }
8418
8419    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8420    ///
8421    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8422    /// master data changes over time, supporting bi-temporal audit queries.
8423    fn phase_temporal_attributes(
8424        &mut self,
8425        stats: &mut EnhancedGenerationStatistics,
8426    ) -> SynthResult<
8427        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8428    > {
8429        if !self.config.temporal_attributes.enabled {
8430            debug!("Phase 27: Skipped (temporal attributes disabled)");
8431            return Ok(Vec::new());
8432        }
8433        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8434
8435        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8436            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8437
8438        // Build a TemporalAttributeConfig from the user's config.
8439        // Since Phase 27 is already gated on temporal_attributes.enabled,
8440        // default to enabling version chains so users get actual mutations.
8441        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8442            || self.config.temporal_attributes.enabled;
8443        let temporal_config = {
8444            let ta = &self.config.temporal_attributes;
8445            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8446                .enabled(ta.enabled)
8447                .closed_probability(ta.valid_time.closed_probability)
8448                .avg_validity_days(ta.valid_time.avg_validity_days)
8449                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8450                .with_version_chains(if generate_version_chains {
8451                    ta.avg_versions_per_entity
8452                } else {
8453                    1.0
8454                })
8455                .build()
8456        };
8457        // Apply backdating settings if configured
8458        let temporal_config = if self
8459            .config
8460            .temporal_attributes
8461            .transaction_time
8462            .allow_backdating
8463        {
8464            let mut c = temporal_config;
8465            c.transaction_time.allow_backdating = true;
8466            c.transaction_time.backdating_probability = self
8467                .config
8468                .temporal_attributes
8469                .transaction_time
8470                .backdating_probability;
8471            c.transaction_time.max_backdate_days = self
8472                .config
8473                .temporal_attributes
8474                .transaction_time
8475                .max_backdate_days;
8476            c
8477        } else {
8478            temporal_config
8479        };
8480        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8481            temporal_config,
8482            self.seed + 130,
8483            start_date,
8484        );
8485
8486        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8487            self.seed + 130,
8488            datasynth_core::GeneratorType::Vendor,
8489        );
8490
8491        let chains: Vec<_> = self
8492            .master_data
8493            .vendors
8494            .iter()
8495            .map(|vendor| {
8496                let id = uuid_factory.next();
8497                gen.generate_version_chain(vendor.clone(), id)
8498            })
8499            .collect();
8500
8501        stats.temporal_version_chain_count = chains.len();
8502        info!("Temporal version chains generated: {} chains", chains.len());
8503        self.check_resources_with_log("post-temporal-attributes")?;
8504
8505        Ok(chains)
8506    }
8507
8508    /// Phase 28: Build entity relationship graph and cross-process links.
8509    ///
8510    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8511    /// `EntityGraph` from master-data vendor/customer entities and
8512    /// journal-entry-derived transaction summaries.
8513    ///
8514    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8515    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8516    /// generates inventory-movement cross-process links.
8517    fn phase_entity_relationships(
8518        &self,
8519        journal_entries: &[JournalEntry],
8520        document_flows: &DocumentFlowSnapshot,
8521        stats: &mut EnhancedGenerationStatistics,
8522    ) -> SynthResult<(
8523        Option<datasynth_core::models::EntityGraph>,
8524        Vec<datasynth_core::models::CrossProcessLink>,
8525    )> {
8526        use datasynth_generators::relationships::{
8527            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8528            TransactionSummary,
8529        };
8530
8531        let rs_enabled = self.config.relationship_strength.enabled;
8532        let cpl_enabled = self.config.cross_process_links.enabled
8533            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8534
8535        if !rs_enabled && !cpl_enabled {
8536            debug!(
8537                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8538            );
8539            return Ok((None, Vec::new()));
8540        }
8541
8542        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8543
8544        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8545            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8546
8547        let company_code = self
8548            .config
8549            .companies
8550            .first()
8551            .map(|c| c.code.as_str())
8552            .unwrap_or("1000");
8553
8554        // Build the generator with matching config flags
8555        let gen_config = EntityGraphConfig {
8556            enabled: rs_enabled,
8557            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8558                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8559                enable_return_flows: false,
8560                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8561                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8562                // Use higher link rate for small datasets to avoid probabilistic empty results
8563                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8564                    1.0
8565                } else {
8566                    0.30
8567                },
8568                ..Default::default()
8569            },
8570            strength_config: datasynth_generators::relationships::StrengthConfig {
8571                transaction_volume_weight: self
8572                    .config
8573                    .relationship_strength
8574                    .calculation
8575                    .transaction_volume_weight,
8576                transaction_count_weight: self
8577                    .config
8578                    .relationship_strength
8579                    .calculation
8580                    .transaction_count_weight,
8581                duration_weight: self
8582                    .config
8583                    .relationship_strength
8584                    .calculation
8585                    .relationship_duration_weight,
8586                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8587                mutual_connections_weight: self
8588                    .config
8589                    .relationship_strength
8590                    .calculation
8591                    .mutual_connections_weight,
8592                recency_half_life_days: self
8593                    .config
8594                    .relationship_strength
8595                    .calculation
8596                    .recency_half_life_days,
8597            },
8598            ..Default::default()
8599        };
8600
8601        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8602
8603        // --- Part 1: Entity Relationship Graph ---
8604        let entity_graph = if rs_enabled {
8605            // Build EntitySummary lists from master data
8606            let vendor_summaries: Vec<EntitySummary> = self
8607                .master_data
8608                .vendors
8609                .iter()
8610                .map(|v| {
8611                    EntitySummary::new(
8612                        &v.vendor_id,
8613                        &v.name,
8614                        datasynth_core::models::GraphEntityType::Vendor,
8615                        start_date,
8616                    )
8617                })
8618                .collect();
8619
8620            let customer_summaries: Vec<EntitySummary> = self
8621                .master_data
8622                .customers
8623                .iter()
8624                .map(|c| {
8625                    EntitySummary::new(
8626                        &c.customer_id,
8627                        &c.name,
8628                        datasynth_core::models::GraphEntityType::Customer,
8629                        start_date,
8630                    )
8631                })
8632                .collect();
8633
8634            // Build transaction summaries from journal entries.
8635            // Key = (company_code, trading_partner) for entries that have a
8636            // trading partner.  This captures intercompany flows and any JE
8637            // whose line items carry a trading_partner reference.
8638            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8639                std::collections::HashMap::new();
8640
8641            for je in journal_entries {
8642                let cc = je.header.company_code.clone();
8643                let posting_date = je.header.posting_date;
8644                for line in &je.lines {
8645                    if let Some(ref tp) = line.trading_partner {
8646                        let amount = if line.debit_amount > line.credit_amount {
8647                            line.debit_amount
8648                        } else {
8649                            line.credit_amount
8650                        };
8651                        let entry = txn_summaries
8652                            .entry((cc.clone(), tp.clone()))
8653                            .or_insert_with(|| TransactionSummary {
8654                                total_volume: rust_decimal::Decimal::ZERO,
8655                                transaction_count: 0,
8656                                first_transaction_date: posting_date,
8657                                last_transaction_date: posting_date,
8658                                related_entities: std::collections::HashSet::new(),
8659                            });
8660                        entry.total_volume += amount;
8661                        entry.transaction_count += 1;
8662                        if posting_date < entry.first_transaction_date {
8663                            entry.first_transaction_date = posting_date;
8664                        }
8665                        if posting_date > entry.last_transaction_date {
8666                            entry.last_transaction_date = posting_date;
8667                        }
8668                        entry.related_entities.insert(cc.clone());
8669                    }
8670                }
8671            }
8672
8673            // Also extract transaction relationships from document flow chains.
8674            // P2P chains: Company → Vendor relationships
8675            for chain in &document_flows.p2p_chains {
8676                let cc = chain.purchase_order.header.company_code.clone();
8677                let vendor_id = chain.purchase_order.vendor_id.clone();
8678                let po_date = chain.purchase_order.header.document_date;
8679                let amount = chain.purchase_order.total_net_amount;
8680
8681                let entry = txn_summaries
8682                    .entry((cc.clone(), vendor_id))
8683                    .or_insert_with(|| TransactionSummary {
8684                        total_volume: rust_decimal::Decimal::ZERO,
8685                        transaction_count: 0,
8686                        first_transaction_date: po_date,
8687                        last_transaction_date: po_date,
8688                        related_entities: std::collections::HashSet::new(),
8689                    });
8690                entry.total_volume += amount;
8691                entry.transaction_count += 1;
8692                if po_date < entry.first_transaction_date {
8693                    entry.first_transaction_date = po_date;
8694                }
8695                if po_date > entry.last_transaction_date {
8696                    entry.last_transaction_date = po_date;
8697                }
8698                entry.related_entities.insert(cc);
8699            }
8700
8701            // O2C chains: Company → Customer relationships
8702            for chain in &document_flows.o2c_chains {
8703                let cc = chain.sales_order.header.company_code.clone();
8704                let customer_id = chain.sales_order.customer_id.clone();
8705                let so_date = chain.sales_order.header.document_date;
8706                let amount = chain.sales_order.total_net_amount;
8707
8708                let entry = txn_summaries
8709                    .entry((cc.clone(), customer_id))
8710                    .or_insert_with(|| TransactionSummary {
8711                        total_volume: rust_decimal::Decimal::ZERO,
8712                        transaction_count: 0,
8713                        first_transaction_date: so_date,
8714                        last_transaction_date: so_date,
8715                        related_entities: std::collections::HashSet::new(),
8716                    });
8717                entry.total_volume += amount;
8718                entry.transaction_count += 1;
8719                if so_date < entry.first_transaction_date {
8720                    entry.first_transaction_date = so_date;
8721                }
8722                if so_date > entry.last_transaction_date {
8723                    entry.last_transaction_date = so_date;
8724                }
8725                entry.related_entities.insert(cc);
8726            }
8727
8728            let as_of_date = journal_entries
8729                .last()
8730                .map(|je| je.header.posting_date)
8731                .unwrap_or(start_date);
8732
8733            let graph = gen.generate_entity_graph(
8734                company_code,
8735                as_of_date,
8736                &vendor_summaries,
8737                &customer_summaries,
8738                &txn_summaries,
8739            );
8740
8741            info!(
8742                "Entity relationship graph: {} nodes, {} edges",
8743                graph.nodes.len(),
8744                graph.edges.len()
8745            );
8746            stats.entity_relationship_node_count = graph.nodes.len();
8747            stats.entity_relationship_edge_count = graph.edges.len();
8748            Some(graph)
8749        } else {
8750            None
8751        };
8752
8753        // --- Part 2: Cross-Process Links ---
8754        let cross_process_links = if cpl_enabled {
8755            // Build GoodsReceiptRef from P2P chains
8756            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8757                .p2p_chains
8758                .iter()
8759                .flat_map(|chain| {
8760                    let vendor_id = chain.purchase_order.vendor_id.clone();
8761                    let cc = chain.purchase_order.header.company_code.clone();
8762                    chain.goods_receipts.iter().flat_map(move |gr| {
8763                        gr.items.iter().filter_map({
8764                            let doc_id = gr.header.document_id.clone();
8765                            let v_id = vendor_id.clone();
8766                            let company = cc.clone();
8767                            let receipt_date = gr.header.document_date;
8768                            move |item| {
8769                                item.base
8770                                    .material_id
8771                                    .as_ref()
8772                                    .map(|mat_id| GoodsReceiptRef {
8773                                        document_id: doc_id.clone(),
8774                                        material_id: mat_id.clone(),
8775                                        quantity: item.base.quantity,
8776                                        receipt_date,
8777                                        vendor_id: v_id.clone(),
8778                                        company_code: company.clone(),
8779                                    })
8780                            }
8781                        })
8782                    })
8783                })
8784                .collect();
8785
8786            // Build DeliveryRef from O2C chains
8787            let del_refs: Vec<DeliveryRef> = document_flows
8788                .o2c_chains
8789                .iter()
8790                .flat_map(|chain| {
8791                    let customer_id = chain.sales_order.customer_id.clone();
8792                    let cc = chain.sales_order.header.company_code.clone();
8793                    chain.deliveries.iter().flat_map(move |del| {
8794                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8795                        del.items.iter().filter_map({
8796                            let doc_id = del.header.document_id.clone();
8797                            let c_id = customer_id.clone();
8798                            let company = cc.clone();
8799                            move |item| {
8800                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8801                                    document_id: doc_id.clone(),
8802                                    material_id: mat_id.clone(),
8803                                    quantity: item.base.quantity,
8804                                    delivery_date,
8805                                    customer_id: c_id.clone(),
8806                                    company_code: company.clone(),
8807                                })
8808                            }
8809                        })
8810                    })
8811                })
8812                .collect();
8813
8814            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8815            info!("Cross-process links generated: {} links", links.len());
8816            stats.cross_process_link_count = links.len();
8817            links
8818        } else {
8819            Vec::new()
8820        };
8821
8822        self.check_resources_with_log("post-entity-relationships")?;
8823        Ok((entity_graph, cross_process_links))
8824    }
8825
8826    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
8827    fn phase_industry_data(
8828        &self,
8829        stats: &mut EnhancedGenerationStatistics,
8830    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8831        if !self.config.industry_specific.enabled {
8832            return None;
8833        }
8834        info!("Phase 29: Generating industry-specific data");
8835        let output = datasynth_generators::industry::factory::generate_industry_output(
8836            self.config.global.industry,
8837        );
8838        stats.industry_gl_account_count = output.gl_accounts.len();
8839        info!(
8840            "Industry data generated: {} GL accounts for {:?}",
8841            output.gl_accounts.len(),
8842            self.config.global.industry
8843        );
8844        Some(output)
8845    }
8846
8847    /// Phase 3b: Generate opening balances for each company.
8848    fn phase_opening_balances(
8849        &mut self,
8850        coa: &Arc<ChartOfAccounts>,
8851        stats: &mut EnhancedGenerationStatistics,
8852    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8853        if !self.config.balance.generate_opening_balances {
8854            debug!("Phase 3b: Skipped (opening balance generation disabled)");
8855            return Ok(Vec::new());
8856        }
8857        info!("Phase 3b: Generating Opening Balances");
8858
8859        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8860            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8861        let fiscal_year = start_date.year();
8862
8863        let industry = match self.config.global.industry {
8864            IndustrySector::Manufacturing => IndustryType::Manufacturing,
8865            IndustrySector::Retail => IndustryType::Retail,
8866            IndustrySector::FinancialServices => IndustryType::Financial,
8867            IndustrySector::Healthcare => IndustryType::Healthcare,
8868            IndustrySector::Technology => IndustryType::Technology,
8869            _ => IndustryType::Manufacturing,
8870        };
8871
8872        let config = datasynth_generators::OpeningBalanceConfig {
8873            industry,
8874            ..Default::default()
8875        };
8876        let mut gen =
8877            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8878
8879        let mut results = Vec::new();
8880        for company in &self.config.companies {
8881            let spec = OpeningBalanceSpec::new(
8882                company.code.clone(),
8883                start_date,
8884                fiscal_year,
8885                company.currency.clone(),
8886                rust_decimal::Decimal::new(10_000_000, 0),
8887                industry,
8888            );
8889            let ob = gen.generate(&spec, coa, start_date, &company.code);
8890            results.push(ob);
8891        }
8892
8893        stats.opening_balance_count = results.len();
8894        info!("Opening balances generated: {} companies", results.len());
8895        self.check_resources_with_log("post-opening-balances")?;
8896
8897        Ok(results)
8898    }
8899
8900    /// Phase 9b: Reconcile GL control accounts to subledger balances.
8901    fn phase_subledger_reconciliation(
8902        &mut self,
8903        subledger: &SubledgerSnapshot,
8904        entries: &[JournalEntry],
8905        stats: &mut EnhancedGenerationStatistics,
8906    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8907        if !self.config.balance.reconcile_subledgers {
8908            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8909            return Ok(Vec::new());
8910        }
8911        info!("Phase 9b: Reconciling GL to subledger balances");
8912
8913        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8914            .map(|d| d + chrono::Months::new(self.config.global.period_months))
8915            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8916
8917        // Build GL balance map from journal entries using a balance tracker
8918        let tracker_config = BalanceTrackerConfig {
8919            validate_on_each_entry: false,
8920            track_history: false,
8921            fail_on_validation_error: false,
8922            ..Default::default()
8923        };
8924        let recon_currency = self
8925            .config
8926            .companies
8927            .first()
8928            .map(|c| c.currency.clone())
8929            .unwrap_or_else(|| "USD".to_string());
8930        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
8931        let validation_errors = tracker.apply_entries(entries);
8932        if !validation_errors.is_empty() {
8933            warn!(
8934                error_count = validation_errors.len(),
8935                "Balance tracker encountered validation errors during subledger reconciliation"
8936            );
8937            for err in &validation_errors {
8938                debug!("Balance validation error: {:?}", err);
8939            }
8940        }
8941
8942        let mut engine = datasynth_generators::ReconciliationEngine::new(
8943            datasynth_generators::ReconciliationConfig::default(),
8944        );
8945
8946        let mut results = Vec::new();
8947        let company_code = self
8948            .config
8949            .companies
8950            .first()
8951            .map(|c| c.code.as_str())
8952            .unwrap_or("1000");
8953
8954        // Reconcile AR
8955        if !subledger.ar_invoices.is_empty() {
8956            let gl_balance = tracker
8957                .get_account_balance(
8958                    company_code,
8959                    datasynth_core::accounts::control_accounts::AR_CONTROL,
8960                )
8961                .map(|b| b.closing_balance)
8962                .unwrap_or_default();
8963            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
8964            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
8965        }
8966
8967        // Reconcile AP
8968        if !subledger.ap_invoices.is_empty() {
8969            let gl_balance = tracker
8970                .get_account_balance(
8971                    company_code,
8972                    datasynth_core::accounts::control_accounts::AP_CONTROL,
8973                )
8974                .map(|b| b.closing_balance)
8975                .unwrap_or_default();
8976            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
8977            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
8978        }
8979
8980        // Reconcile FA
8981        if !subledger.fa_records.is_empty() {
8982            let gl_asset_balance = tracker
8983                .get_account_balance(
8984                    company_code,
8985                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
8986                )
8987                .map(|b| b.closing_balance)
8988                .unwrap_or_default();
8989            let gl_accum_depr_balance = tracker
8990                .get_account_balance(
8991                    company_code,
8992                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
8993                )
8994                .map(|b| b.closing_balance)
8995                .unwrap_or_default();
8996            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
8997                subledger.fa_records.iter().collect();
8998            let (asset_recon, depr_recon) = engine.reconcile_fa(
8999                company_code,
9000                end_date,
9001                gl_asset_balance,
9002                gl_accum_depr_balance,
9003                &fa_refs,
9004            );
9005            results.push(asset_recon);
9006            results.push(depr_recon);
9007        }
9008
9009        // Reconcile Inventory
9010        if !subledger.inventory_positions.is_empty() {
9011            let gl_balance = tracker
9012                .get_account_balance(
9013                    company_code,
9014                    datasynth_core::accounts::control_accounts::INVENTORY,
9015                )
9016                .map(|b| b.closing_balance)
9017                .unwrap_or_default();
9018            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9019                subledger.inventory_positions.iter().collect();
9020            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9021        }
9022
9023        stats.subledger_reconciliation_count = results.len();
9024        let passed = results.iter().filter(|r| r.is_balanced()).count();
9025        let failed = results.len() - passed;
9026        info!(
9027            "Subledger reconciliation: {} checks, {} passed, {} failed",
9028            results.len(),
9029            passed,
9030            failed
9031        );
9032        self.check_resources_with_log("post-subledger-reconciliation")?;
9033
9034        Ok(results)
9035    }
9036
9037    /// Generate the chart of accounts.
9038    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9039        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9040
9041        let coa_framework = self.resolve_coa_framework();
9042
9043        let mut gen = ChartOfAccountsGenerator::new(
9044            self.config.chart_of_accounts.complexity,
9045            self.config.global.industry,
9046            self.seed,
9047        )
9048        .with_coa_framework(coa_framework);
9049
9050        let coa = Arc::new(gen.generate());
9051        self.coa = Some(Arc::clone(&coa));
9052
9053        if let Some(pb) = pb {
9054            pb.finish_with_message("Chart of Accounts complete");
9055        }
9056
9057        Ok(coa)
9058    }
9059
9060    /// Generate master data entities.
9061    fn generate_master_data(&mut self) -> SynthResult<()> {
9062        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9063            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9064        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9065
9066        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9067        let pb = self.create_progress_bar(total, "Generating Master Data");
9068
9069        // Resolve country pack once for all companies (uses primary company's country)
9070        let pack = self.primary_pack().clone();
9071
9072        // Capture config values needed inside the parallel closure
9073        let vendors_per_company = self.phase_config.vendors_per_company;
9074        let customers_per_company = self.phase_config.customers_per_company;
9075        let materials_per_company = self.phase_config.materials_per_company;
9076        let assets_per_company = self.phase_config.assets_per_company;
9077        let coa_framework = self.resolve_coa_framework();
9078
9079        // Generate all master data in parallel across companies.
9080        // Each company's data is independent, making this embarrassingly parallel.
9081        let per_company_results: Vec<_> = self
9082            .config
9083            .companies
9084            .par_iter()
9085            .enumerate()
9086            .map(|(i, company)| {
9087                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9088                let pack = pack.clone();
9089
9090                // Generate vendors (offset counter so IDs are globally unique across companies)
9091                let mut vendor_gen = VendorGenerator::new(company_seed);
9092                vendor_gen.set_country_pack(pack.clone());
9093                vendor_gen.set_coa_framework(coa_framework);
9094                vendor_gen.set_counter_offset(i * vendors_per_company);
9095                // Wire vendor network config when enabled
9096                if self.config.vendor_network.enabled {
9097                    let vn = &self.config.vendor_network;
9098                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9099                        enabled: true,
9100                        depth: vn.depth,
9101                        tier1_count: datasynth_generators::TierCountConfig::new(
9102                            vn.tier1.min,
9103                            vn.tier1.max,
9104                        ),
9105                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9106                            vn.tier2_per_parent.min,
9107                            vn.tier2_per_parent.max,
9108                        ),
9109                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9110                            vn.tier3_per_parent.min,
9111                            vn.tier3_per_parent.max,
9112                        ),
9113                        cluster_distribution: datasynth_generators::ClusterDistribution {
9114                            reliable_strategic: vn.clusters.reliable_strategic,
9115                            standard_operational: vn.clusters.standard_operational,
9116                            transactional: vn.clusters.transactional,
9117                            problematic: vn.clusters.problematic,
9118                        },
9119                        concentration_limits: datasynth_generators::ConcentrationLimits {
9120                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9121                            max_top5: vn.dependencies.top_5_concentration,
9122                        },
9123                        ..datasynth_generators::VendorNetworkConfig::default()
9124                    });
9125                }
9126                let vendor_pool =
9127                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9128
9129                // Generate customers (offset counter so IDs are globally unique across companies)
9130                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9131                customer_gen.set_country_pack(pack.clone());
9132                customer_gen.set_coa_framework(coa_framework);
9133                customer_gen.set_counter_offset(i * customers_per_company);
9134                // Wire customer segmentation config when enabled
9135                if self.config.customer_segmentation.enabled {
9136                    let cs = &self.config.customer_segmentation;
9137                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9138                        enabled: true,
9139                        segment_distribution: datasynth_generators::SegmentDistribution {
9140                            enterprise: cs.value_segments.enterprise.customer_share,
9141                            mid_market: cs.value_segments.mid_market.customer_share,
9142                            smb: cs.value_segments.smb.customer_share,
9143                            consumer: cs.value_segments.consumer.customer_share,
9144                        },
9145                        referral_config: datasynth_generators::ReferralConfig {
9146                            enabled: cs.networks.referrals.enabled,
9147                            referral_rate: cs.networks.referrals.referral_rate,
9148                            ..Default::default()
9149                        },
9150                        hierarchy_config: datasynth_generators::HierarchyConfig {
9151                            enabled: cs.networks.corporate_hierarchies.enabled,
9152                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9153                            ..Default::default()
9154                        },
9155                        ..Default::default()
9156                    };
9157                    customer_gen.set_segmentation_config(seg_cfg);
9158                }
9159                let customer_pool = customer_gen.generate_customer_pool(
9160                    customers_per_company,
9161                    &company.code,
9162                    start_date,
9163                );
9164
9165                // Generate materials (offset counter so IDs are globally unique across companies)
9166                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9167                material_gen.set_country_pack(pack.clone());
9168                material_gen.set_counter_offset(i * materials_per_company);
9169                let material_pool = material_gen.generate_material_pool(
9170                    materials_per_company,
9171                    &company.code,
9172                    start_date,
9173                );
9174
9175                // Generate fixed assets
9176                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9177                let asset_pool = asset_gen.generate_asset_pool(
9178                    assets_per_company,
9179                    &company.code,
9180                    (start_date, end_date),
9181                );
9182
9183                // Generate employees
9184                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9185                employee_gen.set_country_pack(pack);
9186                let employee_pool =
9187                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9188
9189                // Generate employee change history (2-5 events per employee)
9190                let employee_change_history =
9191                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9192
9193                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9194                let employee_ids: Vec<String> = employee_pool
9195                    .employees
9196                    .iter()
9197                    .map(|e| e.employee_id.clone())
9198                    .collect();
9199                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9200                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9201
9202                (
9203                    vendor_pool.vendors,
9204                    customer_pool.customers,
9205                    material_pool.materials,
9206                    asset_pool.assets,
9207                    employee_pool.employees,
9208                    employee_change_history,
9209                    cost_centers,
9210                )
9211            })
9212            .collect();
9213
9214        // Aggregate results from all companies
9215        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9216            per_company_results
9217        {
9218            self.master_data.vendors.extend(vendors);
9219            self.master_data.customers.extend(customers);
9220            self.master_data.materials.extend(materials);
9221            self.master_data.assets.extend(assets);
9222            self.master_data.employees.extend(employees);
9223            self.master_data.cost_centers.extend(cost_centers);
9224            self.master_data
9225                .employee_change_history
9226                .extend(change_history);
9227        }
9228
9229        if let Some(pb) = &pb {
9230            pb.inc(total);
9231        }
9232        if let Some(pb) = pb {
9233            pb.finish_with_message("Master data generation complete");
9234        }
9235
9236        Ok(())
9237    }
9238
9239    /// Generate document flows (P2P and O2C).
9240    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9241        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9242            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9243
9244        // Generate P2P chains
9245        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9246        let months = (self.config.global.period_months as usize).max(1);
9247        let p2p_count = self
9248            .phase_config
9249            .p2p_chains
9250            .min(self.master_data.vendors.len() * 2 * months);
9251        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9252
9253        // Convert P2P config from schema to generator config
9254        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9255        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9256        p2p_gen.set_country_pack(self.primary_pack().clone());
9257
9258        for i in 0..p2p_count {
9259            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9260            let materials: Vec<&Material> = self
9261                .master_data
9262                .materials
9263                .iter()
9264                .skip(i % self.master_data.materials.len().max(1))
9265                .take(2.min(self.master_data.materials.len()))
9266                .collect();
9267
9268            if materials.is_empty() {
9269                continue;
9270            }
9271
9272            let company = &self.config.companies[i % self.config.companies.len()];
9273            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9274            let fiscal_period = po_date.month() as u8;
9275            let created_by = if self.master_data.employees.is_empty() {
9276                "SYSTEM"
9277            } else {
9278                self.master_data.employees[i % self.master_data.employees.len()]
9279                    .user_id
9280                    .as_str()
9281            };
9282
9283            let chain = p2p_gen.generate_chain(
9284                &company.code,
9285                vendor,
9286                &materials,
9287                po_date,
9288                start_date.year() as u16,
9289                fiscal_period,
9290                created_by,
9291            );
9292
9293            // Flatten documents
9294            flows.purchase_orders.push(chain.purchase_order.clone());
9295            flows.goods_receipts.extend(chain.goods_receipts.clone());
9296            if let Some(vi) = &chain.vendor_invoice {
9297                flows.vendor_invoices.push(vi.clone());
9298            }
9299            if let Some(payment) = &chain.payment {
9300                flows.payments.push(payment.clone());
9301            }
9302            for remainder in &chain.remainder_payments {
9303                flows.payments.push(remainder.clone());
9304            }
9305            flows.p2p_chains.push(chain);
9306
9307            if let Some(pb) = &pb {
9308                pb.inc(1);
9309            }
9310        }
9311
9312        if let Some(pb) = pb {
9313            pb.finish_with_message("P2P document flows complete");
9314        }
9315
9316        // Generate O2C chains
9317        // Cap at ~2 SOs per customer per month to keep order volume realistic
9318        let o2c_count = self
9319            .phase_config
9320            .o2c_chains
9321            .min(self.master_data.customers.len() * 2 * months);
9322        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9323
9324        // Convert O2C config from schema to generator config
9325        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9326        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9327        o2c_gen.set_country_pack(self.primary_pack().clone());
9328
9329        for i in 0..o2c_count {
9330            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9331            let materials: Vec<&Material> = self
9332                .master_data
9333                .materials
9334                .iter()
9335                .skip(i % self.master_data.materials.len().max(1))
9336                .take(2.min(self.master_data.materials.len()))
9337                .collect();
9338
9339            if materials.is_empty() {
9340                continue;
9341            }
9342
9343            let company = &self.config.companies[i % self.config.companies.len()];
9344            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9345            let fiscal_period = so_date.month() as u8;
9346            let created_by = if self.master_data.employees.is_empty() {
9347                "SYSTEM"
9348            } else {
9349                self.master_data.employees[i % self.master_data.employees.len()]
9350                    .user_id
9351                    .as_str()
9352            };
9353
9354            let chain = o2c_gen.generate_chain(
9355                &company.code,
9356                customer,
9357                &materials,
9358                so_date,
9359                start_date.year() as u16,
9360                fiscal_period,
9361                created_by,
9362            );
9363
9364            // Flatten documents
9365            flows.sales_orders.push(chain.sales_order.clone());
9366            flows.deliveries.extend(chain.deliveries.clone());
9367            if let Some(ci) = &chain.customer_invoice {
9368                flows.customer_invoices.push(ci.clone());
9369            }
9370            if let Some(receipt) = &chain.customer_receipt {
9371                flows.payments.push(receipt.clone());
9372            }
9373            // Extract remainder receipts (follow-up to partial payments)
9374            for receipt in &chain.remainder_receipts {
9375                flows.payments.push(receipt.clone());
9376            }
9377            flows.o2c_chains.push(chain);
9378
9379            if let Some(pb) = &pb {
9380                pb.inc(1);
9381            }
9382        }
9383
9384        if let Some(pb) = pb {
9385            pb.finish_with_message("O2C document flows complete");
9386        }
9387
9388        // Collect all document cross-references from document headers.
9389        // Each document embeds references to its predecessor(s) via add_reference(); here we
9390        // denormalise them into a flat list for the document_references.json output file.
9391        {
9392            let mut refs = Vec::new();
9393            for doc in &flows.purchase_orders {
9394                refs.extend(doc.header.document_references.iter().cloned());
9395            }
9396            for doc in &flows.goods_receipts {
9397                refs.extend(doc.header.document_references.iter().cloned());
9398            }
9399            for doc in &flows.vendor_invoices {
9400                refs.extend(doc.header.document_references.iter().cloned());
9401            }
9402            for doc in &flows.sales_orders {
9403                refs.extend(doc.header.document_references.iter().cloned());
9404            }
9405            for doc in &flows.deliveries {
9406                refs.extend(doc.header.document_references.iter().cloned());
9407            }
9408            for doc in &flows.customer_invoices {
9409                refs.extend(doc.header.document_references.iter().cloned());
9410            }
9411            for doc in &flows.payments {
9412                refs.extend(doc.header.document_references.iter().cloned());
9413            }
9414            debug!(
9415                "Collected {} document cross-references from document headers",
9416                refs.len()
9417            );
9418            flows.document_references = refs;
9419        }
9420
9421        Ok(())
9422    }
9423
9424    /// Generate journal entries using parallel generation across multiple cores.
9425    fn generate_journal_entries(
9426        &mut self,
9427        coa: &Arc<ChartOfAccounts>,
9428    ) -> SynthResult<Vec<JournalEntry>> {
9429        use datasynth_core::traits::ParallelGenerator;
9430
9431        let total = self.calculate_total_transactions();
9432        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9433
9434        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9435            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9436        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9437
9438        let company_codes: Vec<String> = self
9439            .config
9440            .companies
9441            .iter()
9442            .map(|c| c.code.clone())
9443            .collect();
9444
9445        let generator = JournalEntryGenerator::new_with_params(
9446            self.config.transactions.clone(),
9447            Arc::clone(coa),
9448            company_codes,
9449            start_date,
9450            end_date,
9451            self.seed,
9452        );
9453
9454        // Connect generated master data to ensure JEs reference real entities
9455        // Enable persona-based error injection for realistic human behavior
9456        // Pass fraud configuration for fraud injection
9457        let je_pack = self.primary_pack();
9458
9459        let mut generator = generator
9460            .with_master_data(
9461                &self.master_data.vendors,
9462                &self.master_data.customers,
9463                &self.master_data.materials,
9464            )
9465            .with_country_pack_names(je_pack)
9466            .with_country_pack_temporal(
9467                self.config.temporal_patterns.clone(),
9468                self.seed + 200,
9469                je_pack,
9470            )
9471            .with_persona_errors(true)
9472            .with_fraud_config(self.config.fraud.clone());
9473
9474        // Apply temporal drift if configured
9475        if self.config.temporal.enabled {
9476            let drift_config = self.config.temporal.to_core_config();
9477            generator = generator.with_drift_config(drift_config, self.seed + 100);
9478        }
9479
9480        // Check memory limit at start
9481        self.check_memory_limit()?;
9482
9483        // Determine parallelism: use available cores, but cap at total entries
9484        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9485
9486        // Use parallel generation for datasets with 10K+ entries.
9487        // Below this threshold, the statistical properties of a single-seeded
9488        // generator (e.g. Benford compliance) are better preserved.
9489        let entries = if total >= 10_000 && num_threads > 1 {
9490            // Parallel path: split the generator across cores and generate in parallel.
9491            // Each sub-generator gets a unique seed for deterministic, independent generation.
9492            let sub_generators = generator.split(num_threads);
9493            let entries_per_thread = total as usize / num_threads;
9494            let remainder = total as usize % num_threads;
9495
9496            let batches: Vec<Vec<JournalEntry>> = sub_generators
9497                .into_par_iter()
9498                .enumerate()
9499                .map(|(i, mut gen)| {
9500                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9501                    gen.generate_batch(count)
9502                })
9503                .collect();
9504
9505            // Merge all batches into a single Vec
9506            let entries = JournalEntryGenerator::merge_results(batches);
9507
9508            if let Some(pb) = &pb {
9509                pb.inc(total);
9510            }
9511            entries
9512        } else {
9513            // Sequential path for small datasets (< 1000 entries)
9514            let mut entries = Vec::with_capacity(total as usize);
9515            for _ in 0..total {
9516                let entry = generator.generate();
9517                entries.push(entry);
9518                if let Some(pb) = &pb {
9519                    pb.inc(1);
9520                }
9521            }
9522            entries
9523        };
9524
9525        if let Some(pb) = pb {
9526            pb.finish_with_message("Journal entries complete");
9527        }
9528
9529        Ok(entries)
9530    }
9531
9532    /// Generate journal entries from document flows.
9533    ///
9534    /// This creates proper GL entries for each document in the P2P and O2C flows,
9535    /// ensuring that document activity is reflected in the general ledger.
9536    fn generate_jes_from_document_flows(
9537        &mut self,
9538        flows: &DocumentFlowSnapshot,
9539    ) -> SynthResult<Vec<JournalEntry>> {
9540        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9541        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9542
9543        let je_config = match self.resolve_coa_framework() {
9544            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9545            CoAFramework::GermanSkr04 => {
9546                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9547                DocumentFlowJeConfig::from(&fa)
9548            }
9549            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9550        };
9551
9552        let populate_fec = je_config.populate_fec_fields;
9553        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9554
9555        // Build auxiliary account lookup from vendor/customer master data so that
9556        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9557        // PCG "4010001") instead of raw partner IDs.
9558        if populate_fec {
9559            let mut aux_lookup = std::collections::HashMap::new();
9560            for vendor in &self.master_data.vendors {
9561                if let Some(ref aux) = vendor.auxiliary_gl_account {
9562                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9563                }
9564            }
9565            for customer in &self.master_data.customers {
9566                if let Some(ref aux) = customer.auxiliary_gl_account {
9567                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9568                }
9569            }
9570            if !aux_lookup.is_empty() {
9571                generator.set_auxiliary_account_lookup(aux_lookup);
9572            }
9573        }
9574
9575        let mut entries = Vec::new();
9576
9577        // Generate JEs from P2P chains
9578        for chain in &flows.p2p_chains {
9579            let chain_entries = generator.generate_from_p2p_chain(chain);
9580            entries.extend(chain_entries);
9581            if let Some(pb) = &pb {
9582                pb.inc(1);
9583            }
9584        }
9585
9586        // Generate JEs from O2C chains
9587        for chain in &flows.o2c_chains {
9588            let chain_entries = generator.generate_from_o2c_chain(chain);
9589            entries.extend(chain_entries);
9590            if let Some(pb) = &pb {
9591                pb.inc(1);
9592            }
9593        }
9594
9595        if let Some(pb) = pb {
9596            pb.finish_with_message(format!(
9597                "Generated {} JEs from document flows",
9598                entries.len()
9599            ));
9600        }
9601
9602        Ok(entries)
9603    }
9604
9605    /// Generate journal entries from payroll runs.
9606    ///
9607    /// Creates one JE per payroll run:
9608    /// - DR Salaries & Wages (6100) for gross pay
9609    /// - CR Payroll Clearing (9100) for gross pay
9610    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9611        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9612
9613        let mut jes = Vec::with_capacity(payroll_runs.len());
9614
9615        for run in payroll_runs {
9616            let mut je = JournalEntry::new_simple(
9617                format!("JE-PAYROLL-{}", run.payroll_id),
9618                run.company_code.clone(),
9619                run.run_date,
9620                format!("Payroll {}", run.payroll_id),
9621            );
9622
9623            // Debit Salaries & Wages for gross pay
9624            je.add_line(JournalEntryLine {
9625                line_number: 1,
9626                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9627                debit_amount: run.total_gross,
9628                reference: Some(run.payroll_id.clone()),
9629                text: Some(format!(
9630                    "Payroll {} ({} employees)",
9631                    run.payroll_id, run.employee_count
9632                )),
9633                ..Default::default()
9634            });
9635
9636            // Credit Payroll Clearing for gross pay
9637            je.add_line(JournalEntryLine {
9638                line_number: 2,
9639                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9640                credit_amount: run.total_gross,
9641                reference: Some(run.payroll_id.clone()),
9642                ..Default::default()
9643            });
9644
9645            jes.push(je);
9646        }
9647
9648        jes
9649    }
9650
9651    /// Link document flows to subledger records.
9652    ///
9653    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9654    /// ensuring subledger data is coherent with document flow data.
9655    fn link_document_flows_to_subledgers(
9656        &mut self,
9657        flows: &DocumentFlowSnapshot,
9658    ) -> SynthResult<SubledgerSnapshot> {
9659        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9660        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9661
9662        // Build vendor/customer name maps from master data for realistic subledger names
9663        let vendor_names: std::collections::HashMap<String, String> = self
9664            .master_data
9665            .vendors
9666            .iter()
9667            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9668            .collect();
9669        let customer_names: std::collections::HashMap<String, String> = self
9670            .master_data
9671            .customers
9672            .iter()
9673            .map(|c| (c.customer_id.clone(), c.name.clone()))
9674            .collect();
9675
9676        let mut linker = DocumentFlowLinker::new()
9677            .with_vendor_names(vendor_names)
9678            .with_customer_names(customer_names);
9679
9680        // Convert vendor invoices to AP invoices
9681        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9682        if let Some(pb) = &pb {
9683            pb.inc(flows.vendor_invoices.len() as u64);
9684        }
9685
9686        // Convert customer invoices to AR invoices
9687        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9688        if let Some(pb) = &pb {
9689            pb.inc(flows.customer_invoices.len() as u64);
9690        }
9691
9692        if let Some(pb) = pb {
9693            pb.finish_with_message(format!(
9694                "Linked {} AP and {} AR invoices",
9695                ap_invoices.len(),
9696                ar_invoices.len()
9697            ));
9698        }
9699
9700        Ok(SubledgerSnapshot {
9701            ap_invoices,
9702            ar_invoices,
9703            fa_records: Vec::new(),
9704            inventory_positions: Vec::new(),
9705            inventory_movements: Vec::new(),
9706            // Aging reports are computed after payment settlement in phase_document_flows.
9707            ar_aging_reports: Vec::new(),
9708            ap_aging_reports: Vec::new(),
9709            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9710            depreciation_runs: Vec::new(),
9711            inventory_valuations: Vec::new(),
9712            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9713            dunning_runs: Vec::new(),
9714            dunning_letters: Vec::new(),
9715        })
9716    }
9717
9718    /// Generate OCPM events from document flows.
9719    ///
9720    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9721    /// capturing the object-centric process perspective.
9722    #[allow(clippy::too_many_arguments)]
9723    fn generate_ocpm_events(
9724        &mut self,
9725        flows: &DocumentFlowSnapshot,
9726        sourcing: &SourcingSnapshot,
9727        hr: &HrSnapshot,
9728        manufacturing: &ManufacturingSnapshot,
9729        banking: &BankingSnapshot,
9730        audit: &AuditSnapshot,
9731        financial_reporting: &FinancialReportingSnapshot,
9732    ) -> SynthResult<OcpmSnapshot> {
9733        let total_chains = flows.p2p_chains.len()
9734            + flows.o2c_chains.len()
9735            + sourcing.sourcing_projects.len()
9736            + hr.payroll_runs.len()
9737            + manufacturing.production_orders.len()
9738            + banking.customers.len()
9739            + audit.engagements.len()
9740            + financial_reporting.bank_reconciliations.len();
9741        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9742
9743        // Create OCPM event log with standard types
9744        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9745        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9746
9747        // Configure the OCPM generator
9748        let ocpm_config = OcpmGeneratorConfig {
9749            generate_p2p: true,
9750            generate_o2c: true,
9751            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9752            generate_h2r: !hr.payroll_runs.is_empty(),
9753            generate_mfg: !manufacturing.production_orders.is_empty(),
9754            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9755            generate_bank: !banking.customers.is_empty(),
9756            generate_audit: !audit.engagements.is_empty(),
9757            happy_path_rate: 0.75,
9758            exception_path_rate: 0.20,
9759            error_path_rate: 0.05,
9760            add_duration_variability: true,
9761            duration_std_dev_factor: 0.3,
9762        };
9763        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9764        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9765
9766        // Get available users for resource assignment
9767        let available_users: Vec<String> = self
9768            .master_data
9769            .employees
9770            .iter()
9771            .take(20)
9772            .map(|e| e.user_id.clone())
9773            .collect();
9774
9775        // Deterministic base date from config (avoids Utc::now() non-determinism)
9776        let fallback_date =
9777            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9778        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9779            .unwrap_or(fallback_date);
9780        let base_midnight = base_date
9781            .and_hms_opt(0, 0, 0)
9782            .expect("midnight is always valid");
9783        let base_datetime =
9784            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9785
9786        // Helper closure to add case results to event log
9787        let add_result = |event_log: &mut OcpmEventLog,
9788                          result: datasynth_ocpm::CaseGenerationResult| {
9789            for event in result.events {
9790                event_log.add_event(event);
9791            }
9792            for object in result.objects {
9793                event_log.add_object(object);
9794            }
9795            for relationship in result.relationships {
9796                event_log.add_relationship(relationship);
9797            }
9798            for corr in result.correlation_events {
9799                event_log.add_correlation_event(corr);
9800            }
9801            event_log.add_case(result.case_trace);
9802        };
9803
9804        // Generate events from P2P chains
9805        for chain in &flows.p2p_chains {
9806            let po = &chain.purchase_order;
9807            let documents = P2pDocuments::new(
9808                &po.header.document_id,
9809                &po.vendor_id,
9810                &po.header.company_code,
9811                po.total_net_amount,
9812                &po.header.currency,
9813                &ocpm_uuid_factory,
9814            )
9815            .with_goods_receipt(
9816                chain
9817                    .goods_receipts
9818                    .first()
9819                    .map(|gr| gr.header.document_id.as_str())
9820                    .unwrap_or(""),
9821                &ocpm_uuid_factory,
9822            )
9823            .with_invoice(
9824                chain
9825                    .vendor_invoice
9826                    .as_ref()
9827                    .map(|vi| vi.header.document_id.as_str())
9828                    .unwrap_or(""),
9829                &ocpm_uuid_factory,
9830            )
9831            .with_payment(
9832                chain
9833                    .payment
9834                    .as_ref()
9835                    .map(|p| p.header.document_id.as_str())
9836                    .unwrap_or(""),
9837                &ocpm_uuid_factory,
9838            );
9839
9840            let start_time =
9841                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9842            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9843            add_result(&mut event_log, result);
9844
9845            if let Some(pb) = &pb {
9846                pb.inc(1);
9847            }
9848        }
9849
9850        // Generate events from O2C chains
9851        for chain in &flows.o2c_chains {
9852            let so = &chain.sales_order;
9853            let documents = O2cDocuments::new(
9854                &so.header.document_id,
9855                &so.customer_id,
9856                &so.header.company_code,
9857                so.total_net_amount,
9858                &so.header.currency,
9859                &ocpm_uuid_factory,
9860            )
9861            .with_delivery(
9862                chain
9863                    .deliveries
9864                    .first()
9865                    .map(|d| d.header.document_id.as_str())
9866                    .unwrap_or(""),
9867                &ocpm_uuid_factory,
9868            )
9869            .with_invoice(
9870                chain
9871                    .customer_invoice
9872                    .as_ref()
9873                    .map(|ci| ci.header.document_id.as_str())
9874                    .unwrap_or(""),
9875                &ocpm_uuid_factory,
9876            )
9877            .with_receipt(
9878                chain
9879                    .customer_receipt
9880                    .as_ref()
9881                    .map(|r| r.header.document_id.as_str())
9882                    .unwrap_or(""),
9883                &ocpm_uuid_factory,
9884            );
9885
9886            let start_time =
9887                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9888            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9889            add_result(&mut event_log, result);
9890
9891            if let Some(pb) = &pb {
9892                pb.inc(1);
9893            }
9894        }
9895
9896        // Generate events from S2C sourcing projects
9897        for project in &sourcing.sourcing_projects {
9898            // Find vendor from contracts or qualifications
9899            let vendor_id = sourcing
9900                .contracts
9901                .iter()
9902                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9903                .map(|c| c.vendor_id.clone())
9904                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9905                .or_else(|| {
9906                    self.master_data
9907                        .vendors
9908                        .first()
9909                        .map(|v| v.vendor_id.clone())
9910                })
9911                .unwrap_or_else(|| "V000".to_string());
9912            let mut docs = S2cDocuments::new(
9913                &project.project_id,
9914                &vendor_id,
9915                &project.company_code,
9916                project.estimated_annual_spend,
9917                &ocpm_uuid_factory,
9918            );
9919            // Link RFx if available
9920            if let Some(rfx) = sourcing
9921                .rfx_events
9922                .iter()
9923                .find(|r| r.sourcing_project_id == project.project_id)
9924            {
9925                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
9926                // Link winning bid (status == Accepted)
9927                if let Some(bid) = sourcing.bids.iter().find(|b| {
9928                    b.rfx_id == rfx.rfx_id
9929                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
9930                }) {
9931                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
9932                }
9933            }
9934            // Link contract
9935            if let Some(contract) = sourcing
9936                .contracts
9937                .iter()
9938                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9939            {
9940                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
9941            }
9942            let start_time = base_datetime - chrono::Duration::days(90);
9943            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
9944            add_result(&mut event_log, result);
9945
9946            if let Some(pb) = &pb {
9947                pb.inc(1);
9948            }
9949        }
9950
9951        // Generate events from H2R payroll runs
9952        for run in &hr.payroll_runs {
9953            // Use first matching payroll line item's employee, or fallback
9954            let employee_id = hr
9955                .payroll_line_items
9956                .iter()
9957                .find(|li| li.payroll_id == run.payroll_id)
9958                .map(|li| li.employee_id.as_str())
9959                .unwrap_or("EMP000");
9960            let docs = H2rDocuments::new(
9961                &run.payroll_id,
9962                employee_id,
9963                &run.company_code,
9964                run.total_gross,
9965                &ocpm_uuid_factory,
9966            )
9967            .with_time_entries(
9968                hr.time_entries
9969                    .iter()
9970                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
9971                    .take(5)
9972                    .map(|t| t.entry_id.as_str())
9973                    .collect(),
9974            );
9975            let start_time = base_datetime - chrono::Duration::days(30);
9976            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
9977            add_result(&mut event_log, result);
9978
9979            if let Some(pb) = &pb {
9980                pb.inc(1);
9981            }
9982        }
9983
9984        // Generate events from MFG production orders
9985        for order in &manufacturing.production_orders {
9986            let mut docs = MfgDocuments::new(
9987                &order.order_id,
9988                &order.material_id,
9989                &order.company_code,
9990                order.planned_quantity,
9991                &ocpm_uuid_factory,
9992            )
9993            .with_operations(
9994                order
9995                    .operations
9996                    .iter()
9997                    .map(|o| format!("OP-{:04}", o.operation_number))
9998                    .collect::<Vec<_>>()
9999                    .iter()
10000                    .map(std::string::String::as_str)
10001                    .collect(),
10002            );
10003            // Link quality inspection if available (via reference_id matching order_id)
10004            if let Some(insp) = manufacturing
10005                .quality_inspections
10006                .iter()
10007                .find(|i| i.reference_id == order.order_id)
10008            {
10009                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10010            }
10011            // Link cycle count if available (match by material_id in items)
10012            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10013                cc.items
10014                    .iter()
10015                    .any(|item| item.material_id == order.material_id)
10016            }) {
10017                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10018            }
10019            let start_time = base_datetime - chrono::Duration::days(60);
10020            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10021            add_result(&mut event_log, result);
10022
10023            if let Some(pb) = &pb {
10024                pb.inc(1);
10025            }
10026        }
10027
10028        // Generate events from Banking customers
10029        for customer in &banking.customers {
10030            let customer_id_str = customer.customer_id.to_string();
10031            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10032            // Link accounts (primary_owner_id matches customer_id)
10033            if let Some(account) = banking
10034                .accounts
10035                .iter()
10036                .find(|a| a.primary_owner_id == customer.customer_id)
10037            {
10038                let account_id_str = account.account_id.to_string();
10039                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10040                // Link transactions for this account
10041                let txn_strs: Vec<String> = banking
10042                    .transactions
10043                    .iter()
10044                    .filter(|t| t.account_id == account.account_id)
10045                    .take(10)
10046                    .map(|t| t.transaction_id.to_string())
10047                    .collect();
10048                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10049                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10050                    .transactions
10051                    .iter()
10052                    .filter(|t| t.account_id == account.account_id)
10053                    .take(10)
10054                    .map(|t| t.amount)
10055                    .collect();
10056                if !txn_ids.is_empty() {
10057                    docs = docs.with_transactions(txn_ids, txn_amounts);
10058                }
10059            }
10060            let start_time = base_datetime - chrono::Duration::days(180);
10061            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10062            add_result(&mut event_log, result);
10063
10064            if let Some(pb) = &pb {
10065                pb.inc(1);
10066            }
10067        }
10068
10069        // Generate events from Audit engagements
10070        for engagement in &audit.engagements {
10071            let engagement_id_str = engagement.engagement_id.to_string();
10072            let docs = AuditDocuments::new(
10073                &engagement_id_str,
10074                &engagement.client_entity_id,
10075                &ocpm_uuid_factory,
10076            )
10077            .with_workpapers(
10078                audit
10079                    .workpapers
10080                    .iter()
10081                    .filter(|w| w.engagement_id == engagement.engagement_id)
10082                    .take(10)
10083                    .map(|w| w.workpaper_id.to_string())
10084                    .collect::<Vec<_>>()
10085                    .iter()
10086                    .map(std::string::String::as_str)
10087                    .collect(),
10088            )
10089            .with_evidence(
10090                audit
10091                    .evidence
10092                    .iter()
10093                    .filter(|e| e.engagement_id == engagement.engagement_id)
10094                    .take(10)
10095                    .map(|e| e.evidence_id.to_string())
10096                    .collect::<Vec<_>>()
10097                    .iter()
10098                    .map(std::string::String::as_str)
10099                    .collect(),
10100            )
10101            .with_risks(
10102                audit
10103                    .risk_assessments
10104                    .iter()
10105                    .filter(|r| r.engagement_id == engagement.engagement_id)
10106                    .take(5)
10107                    .map(|r| r.risk_id.to_string())
10108                    .collect::<Vec<_>>()
10109                    .iter()
10110                    .map(std::string::String::as_str)
10111                    .collect(),
10112            )
10113            .with_findings(
10114                audit
10115                    .findings
10116                    .iter()
10117                    .filter(|f| f.engagement_id == engagement.engagement_id)
10118                    .take(5)
10119                    .map(|f| f.finding_id.to_string())
10120                    .collect::<Vec<_>>()
10121                    .iter()
10122                    .map(std::string::String::as_str)
10123                    .collect(),
10124            )
10125            .with_judgments(
10126                audit
10127                    .judgments
10128                    .iter()
10129                    .filter(|j| j.engagement_id == engagement.engagement_id)
10130                    .take(5)
10131                    .map(|j| j.judgment_id.to_string())
10132                    .collect::<Vec<_>>()
10133                    .iter()
10134                    .map(std::string::String::as_str)
10135                    .collect(),
10136            );
10137            let start_time = base_datetime - chrono::Duration::days(120);
10138            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10139            add_result(&mut event_log, result);
10140
10141            if let Some(pb) = &pb {
10142                pb.inc(1);
10143            }
10144        }
10145
10146        // Generate events from Bank Reconciliations
10147        for recon in &financial_reporting.bank_reconciliations {
10148            let docs = BankReconDocuments::new(
10149                &recon.reconciliation_id,
10150                &recon.bank_account_id,
10151                &recon.company_code,
10152                recon.bank_ending_balance,
10153                &ocpm_uuid_factory,
10154            )
10155            .with_statement_lines(
10156                recon
10157                    .statement_lines
10158                    .iter()
10159                    .take(20)
10160                    .map(|l| l.line_id.as_str())
10161                    .collect(),
10162            )
10163            .with_reconciling_items(
10164                recon
10165                    .reconciling_items
10166                    .iter()
10167                    .take(10)
10168                    .map(|i| i.item_id.as_str())
10169                    .collect(),
10170            );
10171            let start_time = base_datetime - chrono::Duration::days(30);
10172            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10173            add_result(&mut event_log, result);
10174
10175            if let Some(pb) = &pb {
10176                pb.inc(1);
10177            }
10178        }
10179
10180        // Compute process variants
10181        event_log.compute_variants();
10182
10183        let summary = event_log.summary();
10184
10185        if let Some(pb) = pb {
10186            pb.finish_with_message(format!(
10187                "Generated {} OCPM events, {} objects",
10188                summary.event_count, summary.object_count
10189            ));
10190        }
10191
10192        Ok(OcpmSnapshot {
10193            event_count: summary.event_count,
10194            object_count: summary.object_count,
10195            case_count: summary.case_count,
10196            event_log: Some(event_log),
10197        })
10198    }
10199
10200    /// Inject anomalies into journal entries.
10201    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10202        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10203
10204        // Read anomaly rates from config instead of using hardcoded values.
10205        // Priority: anomaly_injection config > fraud config > default 0.02
10206        let total_rate = if self.config.anomaly_injection.enabled {
10207            self.config.anomaly_injection.rates.total_rate
10208        } else if self.config.fraud.enabled {
10209            self.config.fraud.fraud_rate
10210        } else {
10211            0.02
10212        };
10213
10214        let fraud_rate = if self.config.anomaly_injection.enabled {
10215            self.config.anomaly_injection.rates.fraud_rate
10216        } else {
10217            AnomalyRateConfig::default().fraud_rate
10218        };
10219
10220        let error_rate = if self.config.anomaly_injection.enabled {
10221            self.config.anomaly_injection.rates.error_rate
10222        } else {
10223            AnomalyRateConfig::default().error_rate
10224        };
10225
10226        let process_issue_rate = if self.config.anomaly_injection.enabled {
10227            self.config.anomaly_injection.rates.process_rate
10228        } else {
10229            AnomalyRateConfig::default().process_issue_rate
10230        };
10231
10232        let anomaly_config = AnomalyInjectorConfig {
10233            rates: AnomalyRateConfig {
10234                total_rate,
10235                fraud_rate,
10236                error_rate,
10237                process_issue_rate,
10238                ..Default::default()
10239            },
10240            seed: self.seed + 5000,
10241            ..Default::default()
10242        };
10243
10244        let mut injector = AnomalyInjector::new(anomaly_config);
10245        let result = injector.process_entries(entries);
10246
10247        if let Some(pb) = &pb {
10248            pb.inc(entries.len() as u64);
10249            pb.finish_with_message("Anomaly injection complete");
10250        }
10251
10252        let mut by_type = HashMap::new();
10253        for label in &result.labels {
10254            *by_type
10255                .entry(format!("{:?}", label.anomaly_type))
10256                .or_insert(0) += 1;
10257        }
10258
10259        Ok(AnomalyLabels {
10260            labels: result.labels,
10261            summary: Some(result.summary),
10262            by_type,
10263        })
10264    }
10265
10266    /// Validate journal entries using running balance tracker.
10267    ///
10268    /// Applies all entries to the balance tracker and validates:
10269    /// - Each entry is internally balanced (debits = credits)
10270    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10271    ///
10272    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10273    /// excluded from balance validation as they may be intentionally unbalanced.
10274    fn validate_journal_entries(
10275        &mut self,
10276        entries: &[JournalEntry],
10277    ) -> SynthResult<BalanceValidationResult> {
10278        // Filter out entries with human errors as they may be intentionally unbalanced
10279        let clean_entries: Vec<&JournalEntry> = entries
10280            .iter()
10281            .filter(|e| {
10282                e.header
10283                    .header_text
10284                    .as_ref()
10285                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10286                    .unwrap_or(true)
10287            })
10288            .collect();
10289
10290        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10291
10292        // Configure tracker to not fail on errors (collect them instead)
10293        let config = BalanceTrackerConfig {
10294            validate_on_each_entry: false,   // We'll validate at the end
10295            track_history: false,            // Skip history for performance
10296            fail_on_validation_error: false, // Collect errors, don't fail
10297            ..Default::default()
10298        };
10299        let validation_currency = self
10300            .config
10301            .companies
10302            .first()
10303            .map(|c| c.currency.clone())
10304            .unwrap_or_else(|| "USD".to_string());
10305
10306        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10307
10308        // Apply clean entries (without human errors)
10309        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10310        let errors = tracker.apply_entries(&clean_refs);
10311
10312        if let Some(pb) = &pb {
10313            pb.inc(entries.len() as u64);
10314        }
10315
10316        // Check if any entries were unbalanced
10317        // Note: When fail_on_validation_error is false, errors are stored in tracker
10318        let has_unbalanced = tracker
10319            .get_validation_errors()
10320            .iter()
10321            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10322
10323        // Validate balance sheet for each company
10324        // Include both returned errors and collected validation errors
10325        let mut all_errors = errors;
10326        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10327        let company_codes: Vec<String> = self
10328            .config
10329            .companies
10330            .iter()
10331            .map(|c| c.code.clone())
10332            .collect();
10333
10334        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10335            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10336            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10337
10338        for company_code in &company_codes {
10339            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10340                all_errors.push(e);
10341            }
10342        }
10343
10344        // Get statistics after all mutable operations are done
10345        let stats = tracker.get_statistics();
10346
10347        // Determine if balanced overall
10348        let is_balanced = all_errors.is_empty();
10349
10350        if let Some(pb) = pb {
10351            let msg = if is_balanced {
10352                "Balance validation passed"
10353            } else {
10354                "Balance validation completed with errors"
10355            };
10356            pb.finish_with_message(msg);
10357        }
10358
10359        Ok(BalanceValidationResult {
10360            validated: true,
10361            is_balanced,
10362            entries_processed: stats.entries_processed,
10363            total_debits: stats.total_debits,
10364            total_credits: stats.total_credits,
10365            accounts_tracked: stats.accounts_tracked,
10366            companies_tracked: stats.companies_tracked,
10367            validation_errors: all_errors,
10368            has_unbalanced_entries: has_unbalanced,
10369        })
10370    }
10371
10372    /// Inject data quality variations into journal entries.
10373    ///
10374    /// Applies typos, missing values, and format variations to make
10375    /// the synthetic data more realistic for testing data cleaning pipelines.
10376    fn inject_data_quality(
10377        &mut self,
10378        entries: &mut [JournalEntry],
10379    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10380        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10381
10382        // Build config from user-specified schema settings when data_quality is enabled;
10383        // otherwise fall back to the low-rate minimal() preset.
10384        let config = if self.config.data_quality.enabled {
10385            let dq = &self.config.data_quality;
10386            DataQualityConfig {
10387                enable_missing_values: dq.missing_values.enabled,
10388                missing_values: datasynth_generators::MissingValueConfig {
10389                    global_rate: dq.effective_missing_rate(),
10390                    ..Default::default()
10391                },
10392                enable_format_variations: dq.format_variations.enabled,
10393                format_variations: datasynth_generators::FormatVariationConfig {
10394                    date_variation_rate: dq.format_variations.dates.rate,
10395                    amount_variation_rate: dq.format_variations.amounts.rate,
10396                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10397                    ..Default::default()
10398                },
10399                enable_duplicates: dq.duplicates.enabled,
10400                duplicates: datasynth_generators::DuplicateConfig {
10401                    duplicate_rate: dq.effective_duplicate_rate(),
10402                    ..Default::default()
10403                },
10404                enable_typos: dq.typos.enabled,
10405                typos: datasynth_generators::TypoConfig {
10406                    char_error_rate: dq.effective_typo_rate(),
10407                    ..Default::default()
10408                },
10409                enable_encoding_issues: dq.encoding_issues.enabled,
10410                encoding_issue_rate: dq.encoding_issues.rate,
10411                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10412                track_statistics: true,
10413            }
10414        } else {
10415            DataQualityConfig::minimal()
10416        };
10417        let mut injector = DataQualityInjector::new(config);
10418
10419        // Wire country pack for locale-aware format baselines
10420        injector.set_country_pack(self.primary_pack().clone());
10421
10422        // Build context for missing value decisions
10423        let context = HashMap::new();
10424
10425        for entry in entries.iter_mut() {
10426            // Process header_text field (common target for typos)
10427            if let Some(text) = &entry.header.header_text {
10428                let processed = injector.process_text_field(
10429                    "header_text",
10430                    text,
10431                    &entry.header.document_id.to_string(),
10432                    &context,
10433                );
10434                match processed {
10435                    Some(new_text) if new_text != *text => {
10436                        entry.header.header_text = Some(new_text);
10437                    }
10438                    None => {
10439                        entry.header.header_text = None; // Missing value
10440                    }
10441                    _ => {}
10442                }
10443            }
10444
10445            // Process reference field
10446            if let Some(ref_text) = &entry.header.reference {
10447                let processed = injector.process_text_field(
10448                    "reference",
10449                    ref_text,
10450                    &entry.header.document_id.to_string(),
10451                    &context,
10452                );
10453                match processed {
10454                    Some(new_text) if new_text != *ref_text => {
10455                        entry.header.reference = Some(new_text);
10456                    }
10457                    None => {
10458                        entry.header.reference = None;
10459                    }
10460                    _ => {}
10461                }
10462            }
10463
10464            // Process user_persona field (potential for typos in user IDs)
10465            let user_persona = entry.header.user_persona.clone();
10466            if let Some(processed) = injector.process_text_field(
10467                "user_persona",
10468                &user_persona,
10469                &entry.header.document_id.to_string(),
10470                &context,
10471            ) {
10472                if processed != user_persona {
10473                    entry.header.user_persona = processed;
10474                }
10475            }
10476
10477            // Process line items
10478            for line in &mut entry.lines {
10479                // Process line description if present
10480                if let Some(ref text) = line.line_text {
10481                    let processed = injector.process_text_field(
10482                        "line_text",
10483                        text,
10484                        &entry.header.document_id.to_string(),
10485                        &context,
10486                    );
10487                    match processed {
10488                        Some(new_text) if new_text != *text => {
10489                            line.line_text = Some(new_text);
10490                        }
10491                        None => {
10492                            line.line_text = None;
10493                        }
10494                        _ => {}
10495                    }
10496                }
10497
10498                // Process cost_center if present
10499                if let Some(cc) = &line.cost_center {
10500                    let processed = injector.process_text_field(
10501                        "cost_center",
10502                        cc,
10503                        &entry.header.document_id.to_string(),
10504                        &context,
10505                    );
10506                    match processed {
10507                        Some(new_cc) if new_cc != *cc => {
10508                            line.cost_center = Some(new_cc);
10509                        }
10510                        None => {
10511                            line.cost_center = None;
10512                        }
10513                        _ => {}
10514                    }
10515                }
10516            }
10517
10518            if let Some(pb) = &pb {
10519                pb.inc(1);
10520            }
10521        }
10522
10523        if let Some(pb) = pb {
10524            pb.finish_with_message("Data quality injection complete");
10525        }
10526
10527        let quality_issues = injector.issues().to_vec();
10528        Ok((injector.stats().clone(), quality_issues))
10529    }
10530
10531    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10532    ///
10533    /// Creates complete audit documentation for each company in the configuration,
10534    /// following ISA standards:
10535    /// - ISA 210/220: Engagement acceptance and terms
10536    /// - ISA 230: Audit documentation (workpapers)
10537    /// - ISA 265: Control deficiencies (findings)
10538    /// - ISA 315/330: Risk assessment and response
10539    /// - ISA 500: Audit evidence
10540    /// - ISA 200: Professional judgment
10541    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10542        // Check if FSM-driven audit generation is enabled
10543        let use_fsm = self
10544            .config
10545            .audit
10546            .fsm
10547            .as_ref()
10548            .map(|f| f.enabled)
10549            .unwrap_or(false);
10550
10551        if use_fsm {
10552            return self.generate_audit_data_with_fsm(entries);
10553        }
10554
10555        // --- Legacy (non-FSM) audit generation follows ---
10556        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10557            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10558        let fiscal_year = start_date.year() as u16;
10559        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10560
10561        // Calculate rough total revenue from entries for materiality
10562        let total_revenue: rust_decimal::Decimal = entries
10563            .iter()
10564            .flat_map(|e| e.lines.iter())
10565            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10566            .map(|l| l.credit_amount)
10567            .sum();
10568
10569        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10570        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10571
10572        let mut snapshot = AuditSnapshot::default();
10573
10574        // Initialize generators
10575        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10576        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10577        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10578        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10579        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10580        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10581        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10582        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10583        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10584        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10585        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10586        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10587
10588        // Get list of accounts from CoA for risk assessment
10589        let accounts: Vec<String> = self
10590            .coa
10591            .as_ref()
10592            .map(|coa| {
10593                coa.get_postable_accounts()
10594                    .iter()
10595                    .map(|acc| acc.account_code().to_string())
10596                    .collect()
10597            })
10598            .unwrap_or_default();
10599
10600        // Generate engagements for each company
10601        for (i, company) in self.config.companies.iter().enumerate() {
10602            // Calculate company-specific revenue (proportional to volume weight)
10603            let company_revenue = total_revenue
10604                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10605
10606            // Generate engagements for this company
10607            let engagements_for_company =
10608                self.phase_config.audit_engagements / self.config.companies.len().max(1);
10609            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10610                1
10611            } else {
10612                0
10613            };
10614
10615            for _eng_idx in 0..(engagements_for_company + extra) {
10616                // Generate the engagement
10617                let mut engagement = engagement_gen.generate_engagement(
10618                    &company.code,
10619                    &company.name,
10620                    fiscal_year,
10621                    period_end,
10622                    company_revenue,
10623                    None, // Use default engagement type
10624                );
10625
10626                // Replace synthetic team IDs with real employee IDs from master data
10627                if !self.master_data.employees.is_empty() {
10628                    let emp_count = self.master_data.employees.len();
10629                    // Use employee IDs deterministically based on engagement index
10630                    let base = (i * 10 + _eng_idx) % emp_count;
10631                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10632                        .employee_id
10633                        .clone();
10634                    engagement.engagement_manager_id = self.master_data.employees
10635                        [(base + 1) % emp_count]
10636                        .employee_id
10637                        .clone();
10638                    let real_team: Vec<String> = engagement
10639                        .team_member_ids
10640                        .iter()
10641                        .enumerate()
10642                        .map(|(j, _)| {
10643                            self.master_data.employees[(base + 2 + j) % emp_count]
10644                                .employee_id
10645                                .clone()
10646                        })
10647                        .collect();
10648                    engagement.team_member_ids = real_team;
10649                }
10650
10651                if let Some(pb) = &pb {
10652                    pb.inc(1);
10653                }
10654
10655                // Get team members from the engagement
10656                let team_members: Vec<String> = engagement.team_member_ids.clone();
10657
10658                // Generate workpapers for the engagement
10659                let workpapers =
10660                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10661
10662                for wp in &workpapers {
10663                    if let Some(pb) = &pb {
10664                        pb.inc(1);
10665                    }
10666
10667                    // Generate evidence for each workpaper
10668                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10669                        wp,
10670                        &team_members,
10671                        wp.preparer_date,
10672                    );
10673
10674                    for _ in &evidence {
10675                        if let Some(pb) = &pb {
10676                            pb.inc(1);
10677                        }
10678                    }
10679
10680                    snapshot.evidence.extend(evidence);
10681                }
10682
10683                // Generate risk assessments for the engagement
10684                let risks =
10685                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10686
10687                for _ in &risks {
10688                    if let Some(pb) = &pb {
10689                        pb.inc(1);
10690                    }
10691                }
10692                snapshot.risk_assessments.extend(risks);
10693
10694                // Generate findings for the engagement
10695                let findings = finding_gen.generate_findings_for_engagement(
10696                    &engagement,
10697                    &workpapers,
10698                    &team_members,
10699                );
10700
10701                for _ in &findings {
10702                    if let Some(pb) = &pb {
10703                        pb.inc(1);
10704                    }
10705                }
10706                snapshot.findings.extend(findings);
10707
10708                // Generate professional judgments for the engagement
10709                let judgments =
10710                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10711
10712                for _ in &judgments {
10713                    if let Some(pb) = &pb {
10714                        pb.inc(1);
10715                    }
10716                }
10717                snapshot.judgments.extend(judgments);
10718
10719                // ISA 505: External confirmations and responses
10720                let (confs, resps) =
10721                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10722                snapshot.confirmations.extend(confs);
10723                snapshot.confirmation_responses.extend(resps);
10724
10725                // ISA 330: Procedure steps per workpaper
10726                let team_pairs: Vec<(String, String)> = team_members
10727                    .iter()
10728                    .map(|id| {
10729                        let name = self
10730                            .master_data
10731                            .employees
10732                            .iter()
10733                            .find(|e| e.employee_id == *id)
10734                            .map(|e| e.display_name.clone())
10735                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10736                        (id.clone(), name)
10737                    })
10738                    .collect();
10739                for wp in &workpapers {
10740                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10741                    snapshot.procedure_steps.extend(steps);
10742                }
10743
10744                // ISA 530: Samples per workpaper
10745                for wp in &workpapers {
10746                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10747                        snapshot.samples.push(sample);
10748                    }
10749                }
10750
10751                // ISA 520: Analytical procedures
10752                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10753                snapshot.analytical_results.extend(analytical);
10754
10755                // ISA 610: Internal audit function and reports
10756                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10757                snapshot.ia_functions.push(ia_func);
10758                snapshot.ia_reports.extend(ia_reports);
10759
10760                // ISA 550: Related parties and transactions
10761                let vendor_names: Vec<String> = self
10762                    .master_data
10763                    .vendors
10764                    .iter()
10765                    .map(|v| v.name.clone())
10766                    .collect();
10767                let customer_names: Vec<String> = self
10768                    .master_data
10769                    .customers
10770                    .iter()
10771                    .map(|c| c.name.clone())
10772                    .collect();
10773                let (parties, rp_txns) =
10774                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10775                snapshot.related_parties.extend(parties);
10776                snapshot.related_party_transactions.extend(rp_txns);
10777
10778                // Add workpapers after findings since findings need them
10779                snapshot.workpapers.extend(workpapers);
10780
10781                // Generate audit scope record for this engagement (one per engagement)
10782                {
10783                    let scope_id = format!(
10784                        "SCOPE-{}-{}",
10785                        engagement.engagement_id.simple(),
10786                        &engagement.client_entity_id
10787                    );
10788                    let scope = datasynth_core::models::audit::AuditScope::new(
10789                        scope_id.clone(),
10790                        engagement.engagement_id.to_string(),
10791                        engagement.client_entity_id.clone(),
10792                        engagement.materiality,
10793                    );
10794                    // Wire scope_id back to engagement
10795                    let mut eng = engagement;
10796                    eng.scope_id = Some(scope_id);
10797                    snapshot.audit_scopes.push(scope);
10798                    snapshot.engagements.push(eng);
10799                }
10800            }
10801        }
10802
10803        // ----------------------------------------------------------------
10804        // ISA 600: Group audit — component auditors, plan, instructions, reports
10805        // ----------------------------------------------------------------
10806        if self.config.companies.len() > 1 {
10807            // Use materiality from the first engagement if available, otherwise
10808            // derive a reasonable figure from total revenue.
10809            let group_materiality = snapshot
10810                .engagements
10811                .first()
10812                .map(|e| e.materiality)
10813                .unwrap_or_else(|| {
10814                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10815                    total_revenue * pct
10816                });
10817
10818            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10819            let group_engagement_id = snapshot
10820                .engagements
10821                .first()
10822                .map(|e| e.engagement_id.to_string())
10823                .unwrap_or_else(|| "GROUP-ENG".to_string());
10824
10825            let component_snapshot = component_gen.generate(
10826                &self.config.companies,
10827                group_materiality,
10828                &group_engagement_id,
10829                period_end,
10830            );
10831
10832            snapshot.component_auditors = component_snapshot.component_auditors;
10833            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10834            snapshot.component_instructions = component_snapshot.component_instructions;
10835            snapshot.component_reports = component_snapshot.component_reports;
10836
10837            info!(
10838                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10839                snapshot.component_auditors.len(),
10840                snapshot.component_instructions.len(),
10841                snapshot.component_reports.len(),
10842            );
10843        }
10844
10845        // ----------------------------------------------------------------
10846        // ISA 210: Engagement letters — one per engagement
10847        // ----------------------------------------------------------------
10848        {
10849            let applicable_framework = self
10850                .config
10851                .accounting_standards
10852                .framework
10853                .as_ref()
10854                .map(|f| format!("{f:?}"))
10855                .unwrap_or_else(|| "IFRS".to_string());
10856
10857            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10858            let entity_count = self.config.companies.len();
10859
10860            for engagement in &snapshot.engagements {
10861                let company = self
10862                    .config
10863                    .companies
10864                    .iter()
10865                    .find(|c| c.code == engagement.client_entity_id);
10866                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10867                let letter_date = engagement.planning_start;
10868                let letter = letter_gen.generate(
10869                    &engagement.engagement_id.to_string(),
10870                    &engagement.client_name,
10871                    entity_count,
10872                    engagement.period_end_date,
10873                    currency,
10874                    &applicable_framework,
10875                    letter_date,
10876                );
10877                snapshot.engagement_letters.push(letter);
10878            }
10879
10880            info!(
10881                "ISA 210 engagement letters: {} generated",
10882                snapshot.engagement_letters.len()
10883            );
10884        }
10885
10886        // ----------------------------------------------------------------
10887        // ISA 560 / IAS 10: Subsequent events
10888        // ----------------------------------------------------------------
10889        {
10890            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10891            let entity_codes: Vec<String> = self
10892                .config
10893                .companies
10894                .iter()
10895                .map(|c| c.code.clone())
10896                .collect();
10897            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10898            info!(
10899                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10900                subsequent.len(),
10901                subsequent
10902                    .iter()
10903                    .filter(|e| matches!(
10904                        e.classification,
10905                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10906                    ))
10907                    .count(),
10908                subsequent
10909                    .iter()
10910                    .filter(|e| matches!(
10911                        e.classification,
10912                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10913                    ))
10914                    .count(),
10915            );
10916            snapshot.subsequent_events = subsequent;
10917        }
10918
10919        // ----------------------------------------------------------------
10920        // ISA 402: Service organization controls
10921        // ----------------------------------------------------------------
10922        {
10923            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
10924            let entity_codes: Vec<String> = self
10925                .config
10926                .companies
10927                .iter()
10928                .map(|c| c.code.clone())
10929                .collect();
10930            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
10931            info!(
10932                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
10933                soc_snapshot.service_organizations.len(),
10934                soc_snapshot.soc_reports.len(),
10935                soc_snapshot.user_entity_controls.len(),
10936            );
10937            snapshot.service_organizations = soc_snapshot.service_organizations;
10938            snapshot.soc_reports = soc_snapshot.soc_reports;
10939            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
10940        }
10941
10942        // ----------------------------------------------------------------
10943        // ISA 570: Going concern assessments
10944        // ----------------------------------------------------------------
10945        {
10946            use datasynth_generators::audit::going_concern_generator::{
10947                GoingConcernGenerator, GoingConcernInput,
10948            };
10949            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
10950            let entity_codes: Vec<String> = self
10951                .config
10952                .companies
10953                .iter()
10954                .map(|c| c.code.clone())
10955                .collect();
10956            // Assessment date = period end + 75 days (typical sign-off window).
10957            let assessment_date = period_end + chrono::Duration::days(75);
10958            let period_label = format!("FY{}", period_end.year());
10959
10960            // Build financial inputs from actual journal entries.
10961            //
10962            // We derive approximate P&L, working capital, and operating cash flow
10963            // by aggregating GL account balances from the journal entry population.
10964            // Account ranges used (standard chart):
10965            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
10966            //   Expenses:        6xxx (debit-normal)
10967            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
10968            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
10969            //   Operating CF:    net income adjusted for D&A (rough proxy)
10970            let gc_inputs: Vec<GoingConcernInput> = self
10971                .config
10972                .companies
10973                .iter()
10974                .map(|company| {
10975                    let code = &company.code;
10976                    let mut revenue = rust_decimal::Decimal::ZERO;
10977                    let mut expenses = rust_decimal::Decimal::ZERO;
10978                    let mut current_assets = rust_decimal::Decimal::ZERO;
10979                    let mut current_liabs = rust_decimal::Decimal::ZERO;
10980                    let mut total_debt = rust_decimal::Decimal::ZERO;
10981
10982                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
10983                        for line in &je.lines {
10984                            let acct = line.gl_account.as_str();
10985                            let net = line.debit_amount - line.credit_amount;
10986                            if acct.starts_with('4') {
10987                                // Revenue accounts: credit-normal, so negative net = revenue earned
10988                                revenue -= net;
10989                            } else if acct.starts_with('6') {
10990                                // Expense accounts: debit-normal
10991                                expenses += net;
10992                            }
10993                            // Balance sheet accounts for working capital
10994                            if acct.starts_with('1') {
10995                                // Current asset accounts (1000–1499)
10996                                if let Ok(n) = acct.parse::<u32>() {
10997                                    if (1000..=1499).contains(&n) {
10998                                        current_assets += net;
10999                                    }
11000                                }
11001                            } else if acct.starts_with('2') {
11002                                if let Ok(n) = acct.parse::<u32>() {
11003                                    if (2000..=2499).contains(&n) {
11004                                        // Current liabilities
11005                                        current_liabs -= net; // credit-normal
11006                                    } else if (2500..=2999).contains(&n) {
11007                                        // Long-term debt
11008                                        total_debt -= net;
11009                                    }
11010                                }
11011                            }
11012                        }
11013                    }
11014
11015                    let net_income = revenue - expenses;
11016                    let working_capital = current_assets - current_liabs;
11017                    // Rough operating CF proxy: net income (full accrual CF calculation
11018                    // is done separately in the cash flow statement generator)
11019                    let operating_cash_flow = net_income;
11020
11021                    GoingConcernInput {
11022                        entity_code: code.clone(),
11023                        net_income,
11024                        working_capital,
11025                        operating_cash_flow,
11026                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11027                        assessment_date,
11028                    }
11029                })
11030                .collect();
11031
11032            let assessments = if gc_inputs.is_empty() {
11033                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11034            } else {
11035                gc_gen.generate_for_entities_with_inputs(
11036                    &entity_codes,
11037                    &gc_inputs,
11038                    assessment_date,
11039                    &period_label,
11040                )
11041            };
11042            info!(
11043                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11044                assessments.len(),
11045                assessments.iter().filter(|a| matches!(
11046                    a.auditor_conclusion,
11047                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11048                )).count(),
11049                assessments.iter().filter(|a| matches!(
11050                    a.auditor_conclusion,
11051                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11052                )).count(),
11053                assessments.iter().filter(|a| matches!(
11054                    a.auditor_conclusion,
11055                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11056                )).count(),
11057            );
11058            snapshot.going_concern_assessments = assessments;
11059        }
11060
11061        // ----------------------------------------------------------------
11062        // ISA 540: Accounting estimates
11063        // ----------------------------------------------------------------
11064        {
11065            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11066            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11067            let entity_codes: Vec<String> = self
11068                .config
11069                .companies
11070                .iter()
11071                .map(|c| c.code.clone())
11072                .collect();
11073            let estimates = est_gen.generate_for_entities(&entity_codes);
11074            info!(
11075                "ISA 540 accounting estimates: {} estimates across {} entities \
11076                 ({} with retrospective reviews, {} with auditor point estimates)",
11077                estimates.len(),
11078                entity_codes.len(),
11079                estimates
11080                    .iter()
11081                    .filter(|e| e.retrospective_review.is_some())
11082                    .count(),
11083                estimates
11084                    .iter()
11085                    .filter(|e| e.auditor_point_estimate.is_some())
11086                    .count(),
11087            );
11088            snapshot.accounting_estimates = estimates;
11089        }
11090
11091        // ----------------------------------------------------------------
11092        // ISA 700/701/705/706: Audit opinions (one per engagement)
11093        // ----------------------------------------------------------------
11094        {
11095            use datasynth_generators::audit::audit_opinion_generator::{
11096                AuditOpinionGenerator, AuditOpinionInput,
11097            };
11098
11099            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11100
11101            // Build inputs — one per engagement, linking findings and going concern.
11102            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11103                .engagements
11104                .iter()
11105                .map(|eng| {
11106                    // Collect findings for this engagement.
11107                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11108                        .findings
11109                        .iter()
11110                        .filter(|f| f.engagement_id == eng.engagement_id)
11111                        .cloned()
11112                        .collect();
11113
11114                    // Going concern for this entity.
11115                    let gc = snapshot
11116                        .going_concern_assessments
11117                        .iter()
11118                        .find(|g| g.entity_code == eng.client_entity_id)
11119                        .cloned();
11120
11121                    // Component reports relevant to this engagement.
11122                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11123                        snapshot.component_reports.clone();
11124
11125                    let auditor = self
11126                        .master_data
11127                        .employees
11128                        .first()
11129                        .map(|e| e.display_name.clone())
11130                        .unwrap_or_else(|| "Global Audit LLP".into());
11131
11132                    let partner = self
11133                        .master_data
11134                        .employees
11135                        .get(1)
11136                        .map(|e| e.display_name.clone())
11137                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11138
11139                    AuditOpinionInput {
11140                        entity_code: eng.client_entity_id.clone(),
11141                        entity_name: eng.client_name.clone(),
11142                        engagement_id: eng.engagement_id,
11143                        period_end: eng.period_end_date,
11144                        findings: eng_findings,
11145                        going_concern: gc,
11146                        component_reports: comp_reports,
11147                        // Mark as US-listed when audit standards include PCAOB.
11148                        is_us_listed: {
11149                            let fw = &self.config.audit_standards.isa_compliance.framework;
11150                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11151                        },
11152                        auditor_name: auditor,
11153                        engagement_partner: partner,
11154                    }
11155                })
11156                .collect();
11157
11158            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11159
11160            for go in &generated_opinions {
11161                snapshot
11162                    .key_audit_matters
11163                    .extend(go.key_audit_matters.clone());
11164            }
11165            snapshot.audit_opinions = generated_opinions
11166                .into_iter()
11167                .map(|go| go.opinion)
11168                .collect();
11169
11170            info!(
11171                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11172                snapshot.audit_opinions.len(),
11173                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11174                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11175                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11176                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11177            );
11178        }
11179
11180        // ----------------------------------------------------------------
11181        // SOX 302 / 404 assessments
11182        // ----------------------------------------------------------------
11183        {
11184            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11185
11186            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11187
11188            for (i, company) in self.config.companies.iter().enumerate() {
11189                // Collect findings for this company's engagements.
11190                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11191                    .engagements
11192                    .iter()
11193                    .filter(|e| e.client_entity_id == company.code)
11194                    .map(|e| e.engagement_id)
11195                    .collect();
11196
11197                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11198                    .findings
11199                    .iter()
11200                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11201                    .cloned()
11202                    .collect();
11203
11204                // Derive executive names from employee list.
11205                let emp_count = self.master_data.employees.len();
11206                let ceo_name = if emp_count > 0 {
11207                    self.master_data.employees[i % emp_count]
11208                        .display_name
11209                        .clone()
11210                } else {
11211                    format!("CEO of {}", company.name)
11212                };
11213                let cfo_name = if emp_count > 1 {
11214                    self.master_data.employees[(i + 1) % emp_count]
11215                        .display_name
11216                        .clone()
11217                } else {
11218                    format!("CFO of {}", company.name)
11219                };
11220
11221                // Use engagement materiality if available.
11222                let materiality = snapshot
11223                    .engagements
11224                    .iter()
11225                    .find(|e| e.client_entity_id == company.code)
11226                    .map(|e| e.materiality)
11227                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11228
11229                let input = SoxGeneratorInput {
11230                    company_code: company.code.clone(),
11231                    company_name: company.name.clone(),
11232                    fiscal_year,
11233                    period_end,
11234                    findings: company_findings,
11235                    ceo_name,
11236                    cfo_name,
11237                    materiality_threshold: materiality,
11238                    revenue_percent: rust_decimal::Decimal::from(100),
11239                    assets_percent: rust_decimal::Decimal::from(100),
11240                    significant_accounts: vec![
11241                        "Revenue".into(),
11242                        "Accounts Receivable".into(),
11243                        "Inventory".into(),
11244                        "Fixed Assets".into(),
11245                        "Accounts Payable".into(),
11246                    ],
11247                };
11248
11249                let (certs, assessment) = sox_gen.generate(&input);
11250                snapshot.sox_302_certifications.extend(certs);
11251                snapshot.sox_404_assessments.push(assessment);
11252            }
11253
11254            info!(
11255                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11256                snapshot.sox_302_certifications.len(),
11257                snapshot.sox_404_assessments.len(),
11258                snapshot
11259                    .sox_404_assessments
11260                    .iter()
11261                    .filter(|a| a.icfr_effective)
11262                    .count(),
11263                snapshot
11264                    .sox_404_assessments
11265                    .iter()
11266                    .filter(|a| !a.icfr_effective)
11267                    .count(),
11268            );
11269        }
11270
11271        // ----------------------------------------------------------------
11272        // ISA 320: Materiality calculations (one per entity)
11273        // ----------------------------------------------------------------
11274        {
11275            use datasynth_generators::audit::materiality_generator::{
11276                MaterialityGenerator, MaterialityInput,
11277            };
11278
11279            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11280
11281            // Compute per-company financials from JEs.
11282            // Asset accounts start with '1', revenue with '4',
11283            // expense accounts with '5' or '6'.
11284            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11285
11286            for company in &self.config.companies {
11287                let company_code = company.code.clone();
11288
11289                // Revenue: credit-side entries on 4xxx accounts
11290                let company_revenue: rust_decimal::Decimal = entries
11291                    .iter()
11292                    .filter(|e| e.company_code() == company_code)
11293                    .flat_map(|e| e.lines.iter())
11294                    .filter(|l| l.account_code.starts_with('4'))
11295                    .map(|l| l.credit_amount)
11296                    .sum();
11297
11298                // Total assets: debit balances on 1xxx accounts
11299                let total_assets: rust_decimal::Decimal = entries
11300                    .iter()
11301                    .filter(|e| e.company_code() == company_code)
11302                    .flat_map(|e| e.lines.iter())
11303                    .filter(|l| l.account_code.starts_with('1'))
11304                    .map(|l| l.debit_amount)
11305                    .sum();
11306
11307                // Expenses: debit-side entries on 5xxx/6xxx accounts
11308                let total_expenses: rust_decimal::Decimal = entries
11309                    .iter()
11310                    .filter(|e| e.company_code() == company_code)
11311                    .flat_map(|e| e.lines.iter())
11312                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11313                    .map(|l| l.debit_amount)
11314                    .sum();
11315
11316                // Equity: credit balances on 3xxx accounts
11317                let equity: rust_decimal::Decimal = entries
11318                    .iter()
11319                    .filter(|e| e.company_code() == company_code)
11320                    .flat_map(|e| e.lines.iter())
11321                    .filter(|l| l.account_code.starts_with('3'))
11322                    .map(|l| l.credit_amount)
11323                    .sum();
11324
11325                let pretax_income = company_revenue - total_expenses;
11326
11327                // If no company-specific data, fall back to proportional share
11328                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11329                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11330                        .unwrap_or(rust_decimal::Decimal::ONE);
11331                    (
11332                        total_revenue * w,
11333                        total_revenue * w * rust_decimal::Decimal::from(3),
11334                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11335                        total_revenue * w * rust_decimal::Decimal::from(2),
11336                    )
11337                } else {
11338                    (company_revenue, total_assets, pretax_income, equity)
11339                };
11340
11341                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11342
11343                materiality_inputs.push(MaterialityInput {
11344                    entity_code: company_code,
11345                    period: format!("FY{}", fiscal_year),
11346                    revenue: rev,
11347                    pretax_income: pti,
11348                    total_assets: assets,
11349                    equity: eq,
11350                    gross_profit,
11351                });
11352            }
11353
11354            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11355
11356            info!(
11357                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11358                 {} total assets, {} equity benchmarks)",
11359                snapshot.materiality_calculations.len(),
11360                snapshot
11361                    .materiality_calculations
11362                    .iter()
11363                    .filter(|m| matches!(
11364                        m.benchmark,
11365                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11366                    ))
11367                    .count(),
11368                snapshot
11369                    .materiality_calculations
11370                    .iter()
11371                    .filter(|m| matches!(
11372                        m.benchmark,
11373                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11374                    ))
11375                    .count(),
11376                snapshot
11377                    .materiality_calculations
11378                    .iter()
11379                    .filter(|m| matches!(
11380                        m.benchmark,
11381                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11382                    ))
11383                    .count(),
11384                snapshot
11385                    .materiality_calculations
11386                    .iter()
11387                    .filter(|m| matches!(
11388                        m.benchmark,
11389                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11390                    ))
11391                    .count(),
11392            );
11393        }
11394
11395        // ----------------------------------------------------------------
11396        // ISA 315: Combined Risk Assessments (per entity, per account area)
11397        // ----------------------------------------------------------------
11398        {
11399            use datasynth_generators::audit::cra_generator::CraGenerator;
11400
11401            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11402
11403            // Build entity → scope_id map from already-generated scopes
11404            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11405                .audit_scopes
11406                .iter()
11407                .map(|s| (s.entity_code.clone(), s.id.clone()))
11408                .collect();
11409
11410            for company in &self.config.companies {
11411                let cras = cra_gen.generate_for_entity(&company.code, None);
11412                let scope_id = entity_scope_map.get(&company.code).cloned();
11413                let cras_with_scope: Vec<_> = cras
11414                    .into_iter()
11415                    .map(|mut cra| {
11416                        cra.scope_id = scope_id.clone();
11417                        cra
11418                    })
11419                    .collect();
11420                snapshot.combined_risk_assessments.extend(cras_with_scope);
11421            }
11422
11423            let significant_count = snapshot
11424                .combined_risk_assessments
11425                .iter()
11426                .filter(|c| c.significant_risk)
11427                .count();
11428            let high_cra_count = snapshot
11429                .combined_risk_assessments
11430                .iter()
11431                .filter(|c| {
11432                    matches!(
11433                        c.combined_risk,
11434                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11435                    )
11436                })
11437                .count();
11438
11439            info!(
11440                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11441                snapshot.combined_risk_assessments.len(),
11442                significant_count,
11443                high_cra_count,
11444            );
11445        }
11446
11447        // ----------------------------------------------------------------
11448        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11449        // ----------------------------------------------------------------
11450        {
11451            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11452
11453            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11454
11455            // Group CRAs by entity and use per-entity tolerable error from materiality
11456            for company in &self.config.companies {
11457                let entity_code = company.code.clone();
11458
11459                // Find tolerable error for this entity (= performance materiality)
11460                let tolerable_error = snapshot
11461                    .materiality_calculations
11462                    .iter()
11463                    .find(|m| m.entity_code == entity_code)
11464                    .map(|m| m.tolerable_error);
11465
11466                // Collect CRAs for this entity
11467                let entity_cras: Vec<_> = snapshot
11468                    .combined_risk_assessments
11469                    .iter()
11470                    .filter(|c| c.entity_code == entity_code)
11471                    .cloned()
11472                    .collect();
11473
11474                if !entity_cras.is_empty() {
11475                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11476                    snapshot.sampling_plans.extend(plans);
11477                    snapshot.sampled_items.extend(items);
11478                }
11479            }
11480
11481            let misstatement_count = snapshot
11482                .sampled_items
11483                .iter()
11484                .filter(|i| i.misstatement_found)
11485                .count();
11486
11487            info!(
11488                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11489                snapshot.sampling_plans.len(),
11490                snapshot.sampled_items.len(),
11491                misstatement_count,
11492            );
11493        }
11494
11495        // ----------------------------------------------------------------
11496        // ISA 315: Significant Classes of Transactions (SCOTS)
11497        // ----------------------------------------------------------------
11498        {
11499            use datasynth_generators::audit::scots_generator::{
11500                ScotsGenerator, ScotsGeneratorConfig,
11501            };
11502
11503            let ic_enabled = self.config.intercompany.enabled;
11504
11505            let config = ScotsGeneratorConfig {
11506                intercompany_enabled: ic_enabled,
11507                ..ScotsGeneratorConfig::default()
11508            };
11509            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11510
11511            for company in &self.config.companies {
11512                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11513                snapshot
11514                    .significant_transaction_classes
11515                    .extend(entity_scots);
11516            }
11517
11518            let estimation_count = snapshot
11519                .significant_transaction_classes
11520                .iter()
11521                .filter(|s| {
11522                    matches!(
11523                        s.transaction_type,
11524                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11525                    )
11526                })
11527                .count();
11528
11529            info!(
11530                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11531                snapshot.significant_transaction_classes.len(),
11532                estimation_count,
11533            );
11534        }
11535
11536        // ----------------------------------------------------------------
11537        // ISA 520: Unusual Item Markers
11538        // ----------------------------------------------------------------
11539        {
11540            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11541
11542            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11543            let entity_codes: Vec<String> = self
11544                .config
11545                .companies
11546                .iter()
11547                .map(|c| c.code.clone())
11548                .collect();
11549            let unusual_flags =
11550                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11551            info!(
11552                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11553                unusual_flags.len(),
11554                unusual_flags
11555                    .iter()
11556                    .filter(|f| matches!(
11557                        f.severity,
11558                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11559                    ))
11560                    .count(),
11561                unusual_flags
11562                    .iter()
11563                    .filter(|f| matches!(
11564                        f.severity,
11565                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11566                    ))
11567                    .count(),
11568                unusual_flags
11569                    .iter()
11570                    .filter(|f| matches!(
11571                        f.severity,
11572                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11573                    ))
11574                    .count(),
11575            );
11576            snapshot.unusual_items = unusual_flags;
11577        }
11578
11579        // ----------------------------------------------------------------
11580        // ISA 520: Analytical Relationships
11581        // ----------------------------------------------------------------
11582        {
11583            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11584
11585            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11586            let entity_codes: Vec<String> = self
11587                .config
11588                .companies
11589                .iter()
11590                .map(|c| c.code.clone())
11591                .collect();
11592            let current_period_label = format!("FY{fiscal_year}");
11593            let prior_period_label = format!("FY{}", fiscal_year - 1);
11594            let analytical_rels = ar_gen.generate_for_entities(
11595                &entity_codes,
11596                entries,
11597                &current_period_label,
11598                &prior_period_label,
11599            );
11600            let out_of_range = analytical_rels
11601                .iter()
11602                .filter(|r| !r.within_expected_range)
11603                .count();
11604            info!(
11605                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11606                analytical_rels.len(),
11607                out_of_range,
11608            );
11609            snapshot.analytical_relationships = analytical_rels;
11610        }
11611
11612        if let Some(pb) = pb {
11613            pb.finish_with_message(format!(
11614                "Audit data: {} engagements, {} workpapers, {} evidence, \
11615                 {} confirmations, {} procedure steps, {} samples, \
11616                 {} analytical, {} IA funcs, {} related parties, \
11617                 {} component auditors, {} letters, {} subsequent events, \
11618                 {} service orgs, {} going concern, {} accounting estimates, \
11619                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11620                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11621                 {} unusual items, {} analytical relationships",
11622                snapshot.engagements.len(),
11623                snapshot.workpapers.len(),
11624                snapshot.evidence.len(),
11625                snapshot.confirmations.len(),
11626                snapshot.procedure_steps.len(),
11627                snapshot.samples.len(),
11628                snapshot.analytical_results.len(),
11629                snapshot.ia_functions.len(),
11630                snapshot.related_parties.len(),
11631                snapshot.component_auditors.len(),
11632                snapshot.engagement_letters.len(),
11633                snapshot.subsequent_events.len(),
11634                snapshot.service_organizations.len(),
11635                snapshot.going_concern_assessments.len(),
11636                snapshot.accounting_estimates.len(),
11637                snapshot.audit_opinions.len(),
11638                snapshot.key_audit_matters.len(),
11639                snapshot.sox_302_certifications.len(),
11640                snapshot.sox_404_assessments.len(),
11641                snapshot.materiality_calculations.len(),
11642                snapshot.combined_risk_assessments.len(),
11643                snapshot.sampling_plans.len(),
11644                snapshot.significant_transaction_classes.len(),
11645                snapshot.unusual_items.len(),
11646                snapshot.analytical_relationships.len(),
11647            ));
11648        }
11649
11650        // ----------------------------------------------------------------
11651        // PCAOB-ISA cross-reference mappings
11652        // ----------------------------------------------------------------
11653        // Always include the standard PCAOB-ISA mappings when audit generation is
11654        // enabled. These are static reference data (no randomness required) so we
11655        // call standard_mappings() directly.
11656        {
11657            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11658            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11659            debug!(
11660                "PCAOB-ISA mappings generated: {} mappings",
11661                snapshot.isa_pcaob_mappings.len()
11662            );
11663        }
11664
11665        // ----------------------------------------------------------------
11666        // ISA standard reference entries
11667        // ----------------------------------------------------------------
11668        // Emit flat ISA standard reference data (number, title, series) so
11669        // consumers get a machine-readable listing of all 34 ISA standards in
11670        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11671        {
11672            use datasynth_standards::audit::isa_reference::IsaStandard;
11673            snapshot.isa_mappings = IsaStandard::standard_entries();
11674            debug!(
11675                "ISA standard entries generated: {} standards",
11676                snapshot.isa_mappings.len()
11677            );
11678        }
11679
11680        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11681        // For each RPT, find the chronologically closest JE for the engagement's entity.
11682        {
11683            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11684                .engagements
11685                .iter()
11686                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11687                .collect();
11688
11689            for rpt in &mut snapshot.related_party_transactions {
11690                if rpt.journal_entry_id.is_some() {
11691                    continue; // already set
11692                }
11693                let entity = engagement_by_id
11694                    .get(&rpt.engagement_id.to_string())
11695                    .copied()
11696                    .unwrap_or("");
11697
11698                // Find closest JE by date in the entity's company
11699                let best_je = entries
11700                    .iter()
11701                    .filter(|je| je.header.company_code == entity)
11702                    .min_by_key(|je| {
11703                        (je.header.posting_date - rpt.transaction_date)
11704                            .num_days()
11705                            .abs()
11706                    });
11707
11708                if let Some(je) = best_je {
11709                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11710                }
11711            }
11712
11713            let linked = snapshot
11714                .related_party_transactions
11715                .iter()
11716                .filter(|t| t.journal_entry_id.is_some())
11717                .count();
11718            debug!(
11719                "Linked {}/{} related party transactions to journal entries",
11720                linked,
11721                snapshot.related_party_transactions.len()
11722            );
11723        }
11724
11725        Ok(snapshot)
11726    }
11727
11728    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11729    ///
11730    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11731    /// from the current orchestrator state, runs the FSM engine, and maps the
11732    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11733    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11734    fn generate_audit_data_with_fsm(
11735        &mut self,
11736        entries: &[JournalEntry],
11737    ) -> SynthResult<AuditSnapshot> {
11738        use datasynth_audit_fsm::{
11739            context::EngagementContext,
11740            engine::AuditFsmEngine,
11741            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11742        };
11743        use rand::SeedableRng;
11744        use rand_chacha::ChaCha8Rng;
11745
11746        info!("Audit FSM: generating audit data via FSM engine");
11747
11748        let fsm_config = self
11749            .config
11750            .audit
11751            .fsm
11752            .as_ref()
11753            .expect("FSM config must be present when FSM is enabled");
11754
11755        // 1. Load blueprint from config string.
11756        let bwp = match fsm_config.blueprint.as_str() {
11757            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11758            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11759            _ => {
11760                warn!(
11761                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11762                    fsm_config.blueprint
11763                );
11764                BlueprintWithPreconditions::load_builtin_fsa()
11765            }
11766        }
11767        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11768
11769        // 2. Load overlay from config string.
11770        let overlay = match fsm_config.overlay.as_str() {
11771            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11772            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11773            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11774            _ => {
11775                warn!(
11776                    "Unknown FSM overlay '{}', falling back to builtin:default",
11777                    fsm_config.overlay
11778                );
11779                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11780            }
11781        }
11782        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11783
11784        // 3. Build EngagementContext from orchestrator state.
11785        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11786            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11787        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11788
11789        // Determine the engagement entity early so we can filter JEs.
11790        let company = self.config.companies.first();
11791        let company_code = company
11792            .map(|c| c.code.clone())
11793            .unwrap_or_else(|| "UNKNOWN".to_string());
11794        let company_name = company
11795            .map(|c| c.name.clone())
11796            .unwrap_or_else(|| "Unknown Company".to_string());
11797        let currency = company
11798            .map(|c| c.currency.clone())
11799            .unwrap_or_else(|| "USD".to_string());
11800
11801        // Filter JEs to the engagement entity for single-company coherence.
11802        let entity_entries: Vec<_> = entries
11803            .iter()
11804            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11805            .cloned()
11806            .collect();
11807        let entries = &entity_entries; // Shadow the parameter for remaining usage
11808
11809        // Financial aggregates from journal entries.
11810        let total_revenue: rust_decimal::Decimal = entries
11811            .iter()
11812            .flat_map(|e| e.lines.iter())
11813            .filter(|l| l.account_code.starts_with('4'))
11814            .map(|l| l.credit_amount - l.debit_amount)
11815            .sum();
11816
11817        let total_assets: rust_decimal::Decimal = entries
11818            .iter()
11819            .flat_map(|e| e.lines.iter())
11820            .filter(|l| l.account_code.starts_with('1'))
11821            .map(|l| l.debit_amount - l.credit_amount)
11822            .sum();
11823
11824        let total_expenses: rust_decimal::Decimal = entries
11825            .iter()
11826            .flat_map(|e| e.lines.iter())
11827            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11828            .map(|l| l.debit_amount)
11829            .sum();
11830
11831        let equity: rust_decimal::Decimal = entries
11832            .iter()
11833            .flat_map(|e| e.lines.iter())
11834            .filter(|l| l.account_code.starts_with('3'))
11835            .map(|l| l.credit_amount - l.debit_amount)
11836            .sum();
11837
11838        let total_debt: rust_decimal::Decimal = entries
11839            .iter()
11840            .flat_map(|e| e.lines.iter())
11841            .filter(|l| l.account_code.starts_with('2'))
11842            .map(|l| l.credit_amount - l.debit_amount)
11843            .sum();
11844
11845        let pretax_income = total_revenue - total_expenses;
11846
11847        let cogs: rust_decimal::Decimal = entries
11848            .iter()
11849            .flat_map(|e| e.lines.iter())
11850            .filter(|l| l.account_code.starts_with('5'))
11851            .map(|l| l.debit_amount)
11852            .sum();
11853        let gross_profit = total_revenue - cogs;
11854
11855        let current_assets: rust_decimal::Decimal = entries
11856            .iter()
11857            .flat_map(|e| e.lines.iter())
11858            .filter(|l| {
11859                l.account_code.starts_with("10")
11860                    || l.account_code.starts_with("11")
11861                    || l.account_code.starts_with("12")
11862                    || l.account_code.starts_with("13")
11863            })
11864            .map(|l| l.debit_amount - l.credit_amount)
11865            .sum();
11866        let current_liabilities: rust_decimal::Decimal = entries
11867            .iter()
11868            .flat_map(|e| e.lines.iter())
11869            .filter(|l| {
11870                l.account_code.starts_with("20")
11871                    || l.account_code.starts_with("21")
11872                    || l.account_code.starts_with("22")
11873            })
11874            .map(|l| l.credit_amount - l.debit_amount)
11875            .sum();
11876        let working_capital = current_assets - current_liabilities;
11877
11878        let depreciation: rust_decimal::Decimal = entries
11879            .iter()
11880            .flat_map(|e| e.lines.iter())
11881            .filter(|l| l.account_code.starts_with("60"))
11882            .map(|l| l.debit_amount)
11883            .sum();
11884        let operating_cash_flow = pretax_income + depreciation;
11885
11886        // GL accounts for reference data.
11887        let accounts: Vec<String> = self
11888            .coa
11889            .as_ref()
11890            .map(|coa| {
11891                coa.get_postable_accounts()
11892                    .iter()
11893                    .map(|acc| acc.account_code().to_string())
11894                    .collect()
11895            })
11896            .unwrap_or_default();
11897
11898        // Team member IDs and display names from master data.
11899        let team_member_ids: Vec<String> = self
11900            .master_data
11901            .employees
11902            .iter()
11903            .take(8) // Cap team size
11904            .map(|e| e.employee_id.clone())
11905            .collect();
11906        let team_member_pairs: Vec<(String, String)> = self
11907            .master_data
11908            .employees
11909            .iter()
11910            .take(8)
11911            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11912            .collect();
11913
11914        let vendor_names: Vec<String> = self
11915            .master_data
11916            .vendors
11917            .iter()
11918            .map(|v| v.name.clone())
11919            .collect();
11920        let customer_names: Vec<String> = self
11921            .master_data
11922            .customers
11923            .iter()
11924            .map(|c| c.name.clone())
11925            .collect();
11926
11927        let entity_codes: Vec<String> = self
11928            .config
11929            .companies
11930            .iter()
11931            .map(|c| c.code.clone())
11932            .collect();
11933
11934        // Journal entry IDs for evidence tracing (sample up to 50).
11935        let journal_entry_ids: Vec<String> = entries
11936            .iter()
11937            .take(50)
11938            .map(|e| e.header.document_id.to_string())
11939            .collect();
11940
11941        // Account balances for risk weighting (aggregate debit - credit per account).
11942        let mut account_balances = std::collections::HashMap::<String, f64>::new();
11943        for entry in entries {
11944            for line in &entry.lines {
11945                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
11946                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
11947                *account_balances
11948                    .entry(line.account_code.clone())
11949                    .or_insert(0.0) += debit_f64 - credit_f64;
11950            }
11951        }
11952
11953        // Internal control IDs and anomaly refs are populated by the
11954        // caller when available; here we default to empty because the
11955        // orchestrator state may not have generated controls/anomalies
11956        // yet at this point in the pipeline.
11957        let control_ids: Vec<String> = Vec::new();
11958        let anomaly_refs: Vec<String> = Vec::new();
11959
11960        let mut context = EngagementContext {
11961            company_code,
11962            company_name,
11963            fiscal_year: start_date.year(),
11964            currency,
11965            total_revenue,
11966            total_assets,
11967            engagement_start: start_date,
11968            report_date: period_end,
11969            pretax_income,
11970            equity,
11971            gross_profit,
11972            working_capital,
11973            operating_cash_flow,
11974            total_debt,
11975            team_member_ids,
11976            team_member_pairs,
11977            accounts,
11978            vendor_names,
11979            customer_names,
11980            journal_entry_ids,
11981            account_balances,
11982            control_ids,
11983            anomaly_refs,
11984            journal_entries: entries.to_vec(),
11985            is_us_listed: false,
11986            entity_codes,
11987            auditor_firm_name: "DataSynth Audit LLP".into(),
11988            accounting_framework: self
11989                .config
11990                .accounting_standards
11991                .framework
11992                .map(|f| match f {
11993                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
11994                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
11995                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
11996                        "French GAAP"
11997                    }
11998                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
11999                        "German GAAP"
12000                    }
12001                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12002                        "Dual Reporting"
12003                    }
12004                })
12005                .unwrap_or("IFRS")
12006                .into(),
12007        };
12008
12009        // 4. Create and run the FSM engine.
12010        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12011        let rng = ChaCha8Rng::seed_from_u64(seed);
12012        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12013
12014        let mut result = engine
12015            .run_engagement(&context)
12016            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12017
12018        info!(
12019            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12020             {} phases completed, duration {:.1}h",
12021            result.event_log.len(),
12022            result.artifacts.total_artifacts(),
12023            result.anomalies.len(),
12024            result.phases_completed.len(),
12025            result.total_duration_hours,
12026        );
12027
12028        // 4b. Populate financial data in the artifact bag for downstream consumers.
12029        let tb_entity = context.company_code.clone();
12030        let tb_fy = context.fiscal_year;
12031        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12032        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12033            entries,
12034            &tb_entity,
12035            tb_fy,
12036            self.coa.as_ref().map(|c| c.as_ref()),
12037        );
12038
12039        // 5. Map ArtifactBag fields to AuditSnapshot.
12040        let bag = result.artifacts;
12041        let mut snapshot = AuditSnapshot {
12042            engagements: bag.engagements,
12043            engagement_letters: bag.engagement_letters,
12044            materiality_calculations: bag.materiality_calculations,
12045            risk_assessments: bag.risk_assessments,
12046            combined_risk_assessments: bag.combined_risk_assessments,
12047            workpapers: bag.workpapers,
12048            evidence: bag.evidence,
12049            findings: bag.findings,
12050            judgments: bag.judgments,
12051            sampling_plans: bag.sampling_plans,
12052            sampled_items: bag.sampled_items,
12053            analytical_results: bag.analytical_results,
12054            going_concern_assessments: bag.going_concern_assessments,
12055            subsequent_events: bag.subsequent_events,
12056            audit_opinions: bag.audit_opinions,
12057            key_audit_matters: bag.key_audit_matters,
12058            procedure_steps: bag.procedure_steps,
12059            samples: bag.samples,
12060            confirmations: bag.confirmations,
12061            confirmation_responses: bag.confirmation_responses,
12062            // Store the event trail for downstream export.
12063            fsm_event_trail: Some(result.event_log),
12064            // Fields not produced by the FSM engine remain at their defaults.
12065            ..Default::default()
12066        };
12067
12068        // 6. Add static reference data (same as legacy path).
12069        {
12070            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12071            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12072        }
12073        {
12074            use datasynth_standards::audit::isa_reference::IsaStandard;
12075            snapshot.isa_mappings = IsaStandard::standard_entries();
12076        }
12077
12078        info!(
12079            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12080             {} risk assessments, {} findings, {} materiality calcs",
12081            snapshot.engagements.len(),
12082            snapshot.workpapers.len(),
12083            snapshot.evidence.len(),
12084            snapshot.risk_assessments.len(),
12085            snapshot.findings.len(),
12086            snapshot.materiality_calculations.len(),
12087        );
12088
12089        Ok(snapshot)
12090    }
12091
12092    /// Export journal entries as graph data for ML training and network reconstruction.
12093    ///
12094    /// Builds a transaction graph where:
12095    /// - Nodes are GL accounts
12096    /// - Edges are money flows from credit to debit accounts
12097    /// - Edge attributes include amount, date, business process, anomaly flags
12098    fn export_graphs(
12099        &mut self,
12100        entries: &[JournalEntry],
12101        _coa: &Arc<ChartOfAccounts>,
12102        stats: &mut EnhancedGenerationStatistics,
12103    ) -> SynthResult<GraphExportSnapshot> {
12104        let pb = self.create_progress_bar(100, "Exporting Graphs");
12105
12106        let mut snapshot = GraphExportSnapshot::default();
12107
12108        // Get output directory
12109        let output_dir = self
12110            .output_path
12111            .clone()
12112            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12113        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12114
12115        // Process each graph type configuration
12116        for graph_type in &self.config.graph_export.graph_types {
12117            if let Some(pb) = &pb {
12118                pb.inc(10);
12119            }
12120
12121            // Build transaction graph
12122            let graph_config = TransactionGraphConfig {
12123                include_vendors: false,
12124                include_customers: false,
12125                create_debit_credit_edges: true,
12126                include_document_nodes: graph_type.include_document_nodes,
12127                min_edge_weight: graph_type.min_edge_weight,
12128                aggregate_parallel_edges: graph_type.aggregate_edges,
12129                framework: None,
12130            };
12131
12132            let mut builder = TransactionGraphBuilder::new(graph_config);
12133            builder.add_journal_entries(entries);
12134            let graph = builder.build();
12135
12136            // Update stats
12137            stats.graph_node_count += graph.node_count();
12138            stats.graph_edge_count += graph.edge_count();
12139
12140            if let Some(pb) = &pb {
12141                pb.inc(40);
12142            }
12143
12144            // Export to each configured format
12145            for format in &self.config.graph_export.formats {
12146                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12147
12148                // Create output directory
12149                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12150                    warn!("Failed to create graph output directory: {}", e);
12151                    continue;
12152                }
12153
12154                match format {
12155                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12156                        let pyg_config = PyGExportConfig {
12157                            common: datasynth_graph::CommonExportConfig {
12158                                export_node_features: true,
12159                                export_edge_features: true,
12160                                export_node_labels: true,
12161                                export_edge_labels: true,
12162                                export_masks: true,
12163                                train_ratio: self.config.graph_export.train_ratio,
12164                                val_ratio: self.config.graph_export.validation_ratio,
12165                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12166                            },
12167                            one_hot_categoricals: false,
12168                        };
12169
12170                        let exporter = PyGExporter::new(pyg_config);
12171                        match exporter.export(&graph, &format_dir) {
12172                            Ok(metadata) => {
12173                                snapshot.exports.insert(
12174                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12175                                    GraphExportInfo {
12176                                        name: graph_type.name.clone(),
12177                                        format: "pytorch_geometric".to_string(),
12178                                        output_path: format_dir.clone(),
12179                                        node_count: metadata.num_nodes,
12180                                        edge_count: metadata.num_edges,
12181                                    },
12182                                );
12183                                snapshot.graph_count += 1;
12184                            }
12185                            Err(e) => {
12186                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12187                            }
12188                        }
12189                    }
12190                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12191                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12192
12193                        let neo4j_config = Neo4jExportConfig {
12194                            export_node_properties: true,
12195                            export_edge_properties: true,
12196                            export_features: true,
12197                            generate_cypher: true,
12198                            generate_admin_import: true,
12199                            database_name: "synth".to_string(),
12200                            cypher_batch_size: 1000,
12201                        };
12202
12203                        let exporter = Neo4jExporter::new(neo4j_config);
12204                        match exporter.export(&graph, &format_dir) {
12205                            Ok(metadata) => {
12206                                snapshot.exports.insert(
12207                                    format!("{}_{}", graph_type.name, "neo4j"),
12208                                    GraphExportInfo {
12209                                        name: graph_type.name.clone(),
12210                                        format: "neo4j".to_string(),
12211                                        output_path: format_dir.clone(),
12212                                        node_count: metadata.num_nodes,
12213                                        edge_count: metadata.num_edges,
12214                                    },
12215                                );
12216                                snapshot.graph_count += 1;
12217                            }
12218                            Err(e) => {
12219                                warn!("Failed to export Neo4j graph: {}", e);
12220                            }
12221                        }
12222                    }
12223                    datasynth_config::schema::GraphExportFormat::Dgl => {
12224                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12225
12226                        let dgl_config = DGLExportConfig {
12227                            common: datasynth_graph::CommonExportConfig {
12228                                export_node_features: true,
12229                                export_edge_features: true,
12230                                export_node_labels: true,
12231                                export_edge_labels: true,
12232                                export_masks: true,
12233                                train_ratio: self.config.graph_export.train_ratio,
12234                                val_ratio: self.config.graph_export.validation_ratio,
12235                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12236                            },
12237                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12238                            include_pickle_script: true, // DGL ecosystem standard helper
12239                        };
12240
12241                        let exporter = DGLExporter::new(dgl_config);
12242                        match exporter.export(&graph, &format_dir) {
12243                            Ok(metadata) => {
12244                                snapshot.exports.insert(
12245                                    format!("{}_{}", graph_type.name, "dgl"),
12246                                    GraphExportInfo {
12247                                        name: graph_type.name.clone(),
12248                                        format: "dgl".to_string(),
12249                                        output_path: format_dir.clone(),
12250                                        node_count: metadata.common.num_nodes,
12251                                        edge_count: metadata.common.num_edges,
12252                                    },
12253                                );
12254                                snapshot.graph_count += 1;
12255                            }
12256                            Err(e) => {
12257                                warn!("Failed to export DGL graph: {}", e);
12258                            }
12259                        }
12260                    }
12261                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12262                        use datasynth_graph::{
12263                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12264                        };
12265
12266                        let rustgraph_config = RustGraphExportConfig {
12267                            include_features: true,
12268                            include_temporal: true,
12269                            include_labels: true,
12270                            source_name: "datasynth".to_string(),
12271                            batch_id: None,
12272                            output_format: RustGraphOutputFormat::JsonLines,
12273                            export_node_properties: true,
12274                            export_edge_properties: true,
12275                            pretty_print: false,
12276                        };
12277
12278                        let exporter = RustGraphExporter::new(rustgraph_config);
12279                        match exporter.export(&graph, &format_dir) {
12280                            Ok(metadata) => {
12281                                snapshot.exports.insert(
12282                                    format!("{}_{}", graph_type.name, "rustgraph"),
12283                                    GraphExportInfo {
12284                                        name: graph_type.name.clone(),
12285                                        format: "rustgraph".to_string(),
12286                                        output_path: format_dir.clone(),
12287                                        node_count: metadata.num_nodes,
12288                                        edge_count: metadata.num_edges,
12289                                    },
12290                                );
12291                                snapshot.graph_count += 1;
12292                            }
12293                            Err(e) => {
12294                                warn!("Failed to export RustGraph: {}", e);
12295                            }
12296                        }
12297                    }
12298                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12299                        // Hypergraph export is handled separately in Phase 10b
12300                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12301                    }
12302                }
12303            }
12304
12305            if let Some(pb) = &pb {
12306                pb.inc(40);
12307            }
12308        }
12309
12310        stats.graph_export_count = snapshot.graph_count;
12311        snapshot.exported = snapshot.graph_count > 0;
12312
12313        if let Some(pb) = pb {
12314            pb.finish_with_message(format!(
12315                "Graphs exported: {} graphs ({} nodes, {} edges)",
12316                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12317            ));
12318        }
12319
12320        Ok(snapshot)
12321    }
12322
12323    /// Build additional graph types (banking, approval, entity) when relevant data
12324    /// is available. These run as a late phase because the data they need (banking
12325    /// snapshot, intercompany snapshot) is only generated after the main graph
12326    /// export phase.
12327    fn build_additional_graphs(
12328        &self,
12329        banking: &BankingSnapshot,
12330        intercompany: &IntercompanySnapshot,
12331        entries: &[JournalEntry],
12332        stats: &mut EnhancedGenerationStatistics,
12333    ) {
12334        let output_dir = self
12335            .output_path
12336            .clone()
12337            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12338        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12339
12340        // Banking graph: build when banking customers and transactions exist
12341        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12342            info!("Phase 10c: Building banking network graph");
12343            let config = BankingGraphConfig::default();
12344            let mut builder = BankingGraphBuilder::new(config);
12345            builder.add_customers(&banking.customers);
12346            builder.add_accounts(&banking.accounts, &banking.customers);
12347            builder.add_transactions(&banking.transactions);
12348            let graph = builder.build();
12349
12350            let node_count = graph.node_count();
12351            let edge_count = graph.edge_count();
12352            stats.graph_node_count += node_count;
12353            stats.graph_edge_count += edge_count;
12354
12355            // Export as PyG if configured
12356            for format in &self.config.graph_export.formats {
12357                if matches!(
12358                    format,
12359                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12360                ) {
12361                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12362                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12363                        warn!("Failed to create banking graph output dir: {}", e);
12364                        continue;
12365                    }
12366                    let pyg_config = PyGExportConfig::default();
12367                    let exporter = PyGExporter::new(pyg_config);
12368                    if let Err(e) = exporter.export(&graph, &format_dir) {
12369                        warn!("Failed to export banking graph as PyG: {}", e);
12370                    } else {
12371                        info!(
12372                            "Banking network graph exported: {} nodes, {} edges",
12373                            node_count, edge_count
12374                        );
12375                    }
12376                }
12377            }
12378        }
12379
12380        // Approval graph: build from journal entry approval workflows
12381        let approval_entries: Vec<_> = entries
12382            .iter()
12383            .filter(|je| je.header.approval_workflow.is_some())
12384            .collect();
12385
12386        if !approval_entries.is_empty() {
12387            info!(
12388                "Phase 10c: Building approval network graph ({} entries with approvals)",
12389                approval_entries.len()
12390            );
12391            let config = ApprovalGraphConfig::default();
12392            let mut builder = ApprovalGraphBuilder::new(config);
12393
12394            for je in &approval_entries {
12395                if let Some(ref wf) = je.header.approval_workflow {
12396                    for action in &wf.actions {
12397                        let record = datasynth_core::models::ApprovalRecord {
12398                            approval_id: format!(
12399                                "APR-{}-{}",
12400                                je.header.document_id, action.approval_level
12401                            ),
12402                            document_number: je.header.document_id.to_string(),
12403                            document_type: "JE".to_string(),
12404                            company_code: je.company_code().to_string(),
12405                            requester_id: wf.preparer_id.clone(),
12406                            requester_name: Some(wf.preparer_name.clone()),
12407                            approver_id: action.actor_id.clone(),
12408                            approver_name: action.actor_name.clone(),
12409                            approval_date: je.posting_date(),
12410                            action: format!("{:?}", action.action),
12411                            amount: wf.amount,
12412                            approval_limit: None,
12413                            comments: action.comments.clone(),
12414                            delegation_from: None,
12415                            is_auto_approved: false,
12416                        };
12417                        builder.add_approval(&record);
12418                    }
12419                }
12420            }
12421
12422            let graph = builder.build();
12423            let node_count = graph.node_count();
12424            let edge_count = graph.edge_count();
12425            stats.graph_node_count += node_count;
12426            stats.graph_edge_count += edge_count;
12427
12428            // Export as PyG if configured
12429            for format in &self.config.graph_export.formats {
12430                if matches!(
12431                    format,
12432                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12433                ) {
12434                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12435                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12436                        warn!("Failed to create approval graph output dir: {}", e);
12437                        continue;
12438                    }
12439                    let pyg_config = PyGExportConfig::default();
12440                    let exporter = PyGExporter::new(pyg_config);
12441                    if let Err(e) = exporter.export(&graph, &format_dir) {
12442                        warn!("Failed to export approval graph as PyG: {}", e);
12443                    } else {
12444                        info!(
12445                            "Approval network graph exported: {} nodes, {} edges",
12446                            node_count, edge_count
12447                        );
12448                    }
12449                }
12450            }
12451        }
12452
12453        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12454        if self.config.companies.len() >= 2 {
12455            info!(
12456                "Phase 10c: Building entity relationship graph ({} companies)",
12457                self.config.companies.len()
12458            );
12459
12460            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12461                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12462
12463            // Map CompanyConfig → Company objects
12464            let parent_code = &self.config.companies[0].code;
12465            let mut companies: Vec<datasynth_core::models::Company> =
12466                Vec::with_capacity(self.config.companies.len());
12467
12468            // First company is the parent
12469            let first = &self.config.companies[0];
12470            companies.push(datasynth_core::models::Company::parent(
12471                &first.code,
12472                &first.name,
12473                &first.country,
12474                &first.currency,
12475            ));
12476
12477            // Remaining companies are subsidiaries (100% owned by parent)
12478            for cc in self.config.companies.iter().skip(1) {
12479                companies.push(datasynth_core::models::Company::subsidiary(
12480                    &cc.code,
12481                    &cc.name,
12482                    &cc.country,
12483                    &cc.currency,
12484                    parent_code,
12485                    rust_decimal::Decimal::from(100),
12486                ));
12487            }
12488
12489            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12490            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12491                self.config
12492                    .companies
12493                    .iter()
12494                    .skip(1)
12495                    .enumerate()
12496                    .map(|(i, cc)| {
12497                        let mut rel =
12498                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12499                                format!("REL{:03}", i + 1),
12500                                parent_code.clone(),
12501                                cc.code.clone(),
12502                                rust_decimal::Decimal::from(100),
12503                                start_date,
12504                            );
12505                        rel.functional_currency = cc.currency.clone();
12506                        rel
12507                    })
12508                    .collect();
12509
12510            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12511            builder.add_companies(&companies);
12512            builder.add_ownership_relationships(&relationships);
12513
12514            // Thread IC matched-pair transaction edges into the entity graph
12515            for pair in &intercompany.matched_pairs {
12516                builder.add_intercompany_edge(
12517                    &pair.seller_company,
12518                    &pair.buyer_company,
12519                    pair.amount,
12520                    &format!("{:?}", pair.transaction_type),
12521                );
12522            }
12523
12524            let graph = builder.build();
12525            let node_count = graph.node_count();
12526            let edge_count = graph.edge_count();
12527            stats.graph_node_count += node_count;
12528            stats.graph_edge_count += edge_count;
12529
12530            // Export as PyG if configured
12531            for format in &self.config.graph_export.formats {
12532                if matches!(
12533                    format,
12534                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12535                ) {
12536                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12537                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12538                        warn!("Failed to create entity graph output dir: {}", e);
12539                        continue;
12540                    }
12541                    let pyg_config = PyGExportConfig::default();
12542                    let exporter = PyGExporter::new(pyg_config);
12543                    if let Err(e) = exporter.export(&graph, &format_dir) {
12544                        warn!("Failed to export entity graph as PyG: {}", e);
12545                    } else {
12546                        info!(
12547                            "Entity relationship graph exported: {} nodes, {} edges",
12548                            node_count, edge_count
12549                        );
12550                    }
12551                }
12552            }
12553        } else {
12554            debug!(
12555                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12556                self.config.companies.len()
12557            );
12558        }
12559    }
12560
12561    /// Export a multi-layer hypergraph for RustGraph integration.
12562    ///
12563    /// Builds a 3-layer hypergraph:
12564    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
12565    /// - Layer 2: Process Events (all process family document flows + OCPM events)
12566    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
12567    #[allow(clippy::too_many_arguments)]
12568    fn export_hypergraph(
12569        &self,
12570        coa: &Arc<ChartOfAccounts>,
12571        entries: &[JournalEntry],
12572        document_flows: &DocumentFlowSnapshot,
12573        sourcing: &SourcingSnapshot,
12574        hr: &HrSnapshot,
12575        manufacturing: &ManufacturingSnapshot,
12576        banking: &BankingSnapshot,
12577        audit: &AuditSnapshot,
12578        financial_reporting: &FinancialReportingSnapshot,
12579        ocpm: &OcpmSnapshot,
12580        compliance: &ComplianceRegulationsSnapshot,
12581        stats: &mut EnhancedGenerationStatistics,
12582    ) -> SynthResult<HypergraphExportInfo> {
12583        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12584        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12585        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12586        use datasynth_graph::models::hypergraph::AggregationStrategy;
12587
12588        let hg_settings = &self.config.graph_export.hypergraph;
12589
12590        // Parse aggregation strategy from config string
12591        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12592            "truncate" => AggregationStrategy::Truncate,
12593            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12594            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12595            "importance_sample" => AggregationStrategy::ImportanceSample,
12596            _ => AggregationStrategy::PoolByCounterparty,
12597        };
12598
12599        let builder_config = HypergraphConfig {
12600            max_nodes: hg_settings.max_nodes,
12601            aggregation_strategy,
12602            include_coso: hg_settings.governance_layer.include_coso,
12603            include_controls: hg_settings.governance_layer.include_controls,
12604            include_sox: hg_settings.governance_layer.include_sox,
12605            include_vendors: hg_settings.governance_layer.include_vendors,
12606            include_customers: hg_settings.governance_layer.include_customers,
12607            include_employees: hg_settings.governance_layer.include_employees,
12608            include_p2p: hg_settings.process_layer.include_p2p,
12609            include_o2c: hg_settings.process_layer.include_o2c,
12610            include_s2c: hg_settings.process_layer.include_s2c,
12611            include_h2r: hg_settings.process_layer.include_h2r,
12612            include_mfg: hg_settings.process_layer.include_mfg,
12613            include_bank: hg_settings.process_layer.include_bank,
12614            include_audit: hg_settings.process_layer.include_audit,
12615            include_r2r: hg_settings.process_layer.include_r2r,
12616            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12617            docs_per_counterparty_threshold: hg_settings
12618                .process_layer
12619                .docs_per_counterparty_threshold,
12620            include_accounts: hg_settings.accounting_layer.include_accounts,
12621            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12622            include_cross_layer_edges: hg_settings.cross_layer.enabled,
12623            include_compliance: self.config.compliance_regulations.enabled,
12624            include_tax: true,
12625            include_treasury: true,
12626            include_esg: true,
12627            include_project: true,
12628            include_intercompany: true,
12629            include_temporal_events: true,
12630        };
12631
12632        let mut builder = HypergraphBuilder::new(builder_config);
12633
12634        // Layer 1: Governance & Controls
12635        builder.add_coso_framework();
12636
12637        // Add controls if available (generated during JE generation)
12638        // Controls are generated per-company; we use the standard set
12639        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12640            let controls = InternalControl::standard_controls();
12641            builder.add_controls(&controls);
12642        }
12643
12644        // Add master data
12645        builder.add_vendors(&self.master_data.vendors);
12646        builder.add_customers(&self.master_data.customers);
12647        builder.add_employees(&self.master_data.employees);
12648
12649        // Layer 2: Process Events (all process families)
12650        builder.add_p2p_documents(
12651            &document_flows.purchase_orders,
12652            &document_flows.goods_receipts,
12653            &document_flows.vendor_invoices,
12654            &document_flows.payments,
12655        );
12656        builder.add_o2c_documents(
12657            &document_flows.sales_orders,
12658            &document_flows.deliveries,
12659            &document_flows.customer_invoices,
12660        );
12661        builder.add_s2c_documents(
12662            &sourcing.sourcing_projects,
12663            &sourcing.qualifications,
12664            &sourcing.rfx_events,
12665            &sourcing.bids,
12666            &sourcing.bid_evaluations,
12667            &sourcing.contracts,
12668        );
12669        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12670        builder.add_mfg_documents(
12671            &manufacturing.production_orders,
12672            &manufacturing.quality_inspections,
12673            &manufacturing.cycle_counts,
12674        );
12675        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12676        builder.add_audit_documents(
12677            &audit.engagements,
12678            &audit.workpapers,
12679            &audit.findings,
12680            &audit.evidence,
12681            &audit.risk_assessments,
12682            &audit.judgments,
12683            &audit.materiality_calculations,
12684            &audit.audit_opinions,
12685            &audit.going_concern_assessments,
12686        );
12687        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12688
12689        // OCPM events as hyperedges
12690        if let Some(ref event_log) = ocpm.event_log {
12691            builder.add_ocpm_events(event_log);
12692        }
12693
12694        // Compliance regulations as cross-layer nodes
12695        if self.config.compliance_regulations.enabled
12696            && hg_settings.governance_layer.include_controls
12697        {
12698            // Reconstruct ComplianceStandard objects from the registry
12699            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12700            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12701                .standard_records
12702                .iter()
12703                .filter_map(|r| {
12704                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12705                    registry.get(&sid).cloned()
12706                })
12707                .collect();
12708
12709            builder.add_compliance_regulations(
12710                &standards,
12711                &compliance.findings,
12712                &compliance.filings,
12713            );
12714        }
12715
12716        // Layer 3: Accounting Network
12717        builder.add_accounts(coa);
12718        builder.add_journal_entries_as_hyperedges(entries);
12719
12720        // Build the hypergraph
12721        let hypergraph = builder.build();
12722
12723        // Export
12724        let output_dir = self
12725            .output_path
12726            .clone()
12727            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12728        let hg_dir = output_dir
12729            .join(&self.config.graph_export.output_subdirectory)
12730            .join(&hg_settings.output_subdirectory);
12731
12732        // Branch on output format
12733        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12734            "unified" => {
12735                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12736                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12737                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12738                })?;
12739                (
12740                    metadata.num_nodes,
12741                    metadata.num_edges,
12742                    metadata.num_hyperedges,
12743                )
12744            }
12745            _ => {
12746                // "native" or any unrecognized format → use existing exporter
12747                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12748                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12749                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12750                })?;
12751                (
12752                    metadata.num_nodes,
12753                    metadata.num_edges,
12754                    metadata.num_hyperedges,
12755                )
12756            }
12757        };
12758
12759        // Stream to RustGraph ingest endpoint if configured
12760        #[cfg(feature = "streaming")]
12761        if let Some(ref target_url) = hg_settings.stream_target {
12762            use crate::stream_client::{StreamClient, StreamConfig};
12763            use std::io::Write as _;
12764
12765            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12766            let stream_config = StreamConfig {
12767                target_url: target_url.clone(),
12768                batch_size: hg_settings.stream_batch_size,
12769                api_key,
12770                ..StreamConfig::default()
12771            };
12772
12773            match StreamClient::new(stream_config) {
12774                Ok(mut client) => {
12775                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12776                    match exporter.export_to_writer(&hypergraph, &mut client) {
12777                        Ok(_) => {
12778                            if let Err(e) = client.flush() {
12779                                warn!("Failed to flush stream client: {}", e);
12780                            } else {
12781                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12782                            }
12783                        }
12784                        Err(e) => {
12785                            warn!("Streaming export failed: {}", e);
12786                        }
12787                    }
12788                }
12789                Err(e) => {
12790                    warn!("Failed to create stream client: {}", e);
12791                }
12792            }
12793        }
12794
12795        // Update stats
12796        stats.graph_node_count += num_nodes;
12797        stats.graph_edge_count += num_edges;
12798        stats.graph_export_count += 1;
12799
12800        Ok(HypergraphExportInfo {
12801            node_count: num_nodes,
12802            edge_count: num_edges,
12803            hyperedge_count: num_hyperedges,
12804            output_path: hg_dir,
12805        })
12806    }
12807
12808    /// Generate banking KYC/AML data.
12809    ///
12810    /// Creates banking customers, accounts, and transactions with AML typology injection.
12811    /// Uses the BankingOrchestrator from synth-banking crate.
12812    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12813        let pb = self.create_progress_bar(100, "Generating Banking Data");
12814
12815        // Build the banking orchestrator from config
12816        let orchestrator = BankingOrchestratorBuilder::new()
12817            .config(self.config.banking.clone())
12818            .seed(self.seed + 9000)
12819            .country_pack(self.primary_pack().clone())
12820            .build();
12821
12822        if let Some(pb) = &pb {
12823            pb.inc(10);
12824        }
12825
12826        // Generate the banking data
12827        let result = orchestrator.generate();
12828
12829        if let Some(pb) = &pb {
12830            pb.inc(90);
12831            pb.finish_with_message(format!(
12832                "Banking: {} customers, {} transactions",
12833                result.customers.len(),
12834                result.transactions.len()
12835            ));
12836        }
12837
12838        // Cross-reference banking customers with core master data so that
12839        // banking customer names align with the enterprise customer list.
12840        // We rotate through core customers, overlaying their name and country
12841        // onto the generated banking customers where possible.
12842        let mut banking_customers = result.customers;
12843        let core_customers = &self.master_data.customers;
12844        if !core_customers.is_empty() {
12845            for (i, bc) in banking_customers.iter_mut().enumerate() {
12846                let core = &core_customers[i % core_customers.len()];
12847                bc.name = CustomerName::business(&core.name);
12848                bc.residence_country = core.country.clone();
12849                bc.enterprise_customer_id = Some(core.customer_id.clone());
12850            }
12851            debug!(
12852                "Cross-referenced {} banking customers with {} core customers",
12853                banking_customers.len(),
12854                core_customers.len()
12855            );
12856        }
12857
12858        Ok(BankingSnapshot {
12859            customers: banking_customers,
12860            accounts: result.accounts,
12861            transactions: result.transactions,
12862            transaction_labels: result.transaction_labels,
12863            customer_labels: result.customer_labels,
12864            account_labels: result.account_labels,
12865            relationship_labels: result.relationship_labels,
12866            narratives: result.narratives,
12867            suspicious_count: result.stats.suspicious_count,
12868            scenario_count: result.scenarios.len(),
12869        })
12870    }
12871
12872    /// Calculate total transactions to generate.
12873    fn calculate_total_transactions(&self) -> u64 {
12874        let months = self.config.global.period_months as f64;
12875        self.config
12876            .companies
12877            .iter()
12878            .map(|c| {
12879                let annual = c.annual_transaction_volume.count() as f64;
12880                let weighted = annual * c.volume_weight;
12881                (weighted * months / 12.0) as u64
12882            })
12883            .sum()
12884    }
12885
12886    /// Create a progress bar if progress display is enabled.
12887    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12888        if !self.phase_config.show_progress {
12889            return None;
12890        }
12891
12892        let pb = if let Some(mp) = &self.multi_progress {
12893            mp.add(ProgressBar::new(total))
12894        } else {
12895            ProgressBar::new(total)
12896        };
12897
12898        pb.set_style(
12899            ProgressStyle::default_bar()
12900                .template(&format!(
12901                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12902                ))
12903                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12904                .progress_chars("#>-"),
12905        );
12906
12907        Some(pb)
12908    }
12909
12910    /// Get the generated chart of accounts.
12911    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12912        self.coa.clone()
12913    }
12914
12915    /// Get the generated master data.
12916    pub fn get_master_data(&self) -> &MasterDataSnapshot {
12917        &self.master_data
12918    }
12919
12920    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
12921    fn phase_compliance_regulations(
12922        &mut self,
12923        _stats: &mut EnhancedGenerationStatistics,
12924    ) -> SynthResult<ComplianceRegulationsSnapshot> {
12925        if !self.phase_config.generate_compliance_regulations {
12926            return Ok(ComplianceRegulationsSnapshot::default());
12927        }
12928
12929        info!("Phase: Generating Compliance Regulations Data");
12930
12931        let cr_config = &self.config.compliance_regulations;
12932
12933        // Determine jurisdictions: from config or inferred from companies
12934        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
12935            self.config
12936                .companies
12937                .iter()
12938                .map(|c| c.country.clone())
12939                .collect::<std::collections::HashSet<_>>()
12940                .into_iter()
12941                .collect()
12942        } else {
12943            cr_config.jurisdictions.clone()
12944        };
12945
12946        // Determine reference date
12947        let fallback_date =
12948            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
12949        let reference_date = cr_config
12950            .reference_date
12951            .as_ref()
12952            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
12953            .unwrap_or_else(|| {
12954                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12955                    .unwrap_or(fallback_date)
12956            });
12957
12958        // Generate standards registry data
12959        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
12960        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
12961        let cross_reference_records = reg_gen.generate_cross_reference_records();
12962        let jurisdiction_records =
12963            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
12964
12965        info!(
12966            "  Standards: {} records, {} cross-references, {} jurisdictions",
12967            standard_records.len(),
12968            cross_reference_records.len(),
12969            jurisdiction_records.len()
12970        );
12971
12972        // Generate audit procedures (if enabled)
12973        let audit_procedures = if cr_config.audit_procedures.enabled {
12974            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
12975                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
12976                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
12977                confidence_level: cr_config.audit_procedures.confidence_level,
12978                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
12979            };
12980            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
12981                self.seed + 9000,
12982                proc_config,
12983            );
12984            let registry = reg_gen.registry();
12985            let mut all_procs = Vec::new();
12986            for jurisdiction in &jurisdictions {
12987                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
12988                all_procs.extend(procs);
12989            }
12990            info!("  Audit procedures: {}", all_procs.len());
12991            all_procs
12992        } else {
12993            Vec::new()
12994        };
12995
12996        // Generate compliance findings (if enabled)
12997        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
12998            let finding_config =
12999                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13000                    finding_rate: cr_config.findings.finding_rate,
13001                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13002                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13003                    generate_remediation: cr_config.findings.generate_remediation,
13004                };
13005            let mut finding_gen =
13006                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13007                    self.seed + 9100,
13008                    finding_config,
13009                );
13010            let mut all_findings = Vec::new();
13011            for company in &self.config.companies {
13012                let company_findings =
13013                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13014                all_findings.extend(company_findings);
13015            }
13016            info!("  Compliance findings: {}", all_findings.len());
13017            all_findings
13018        } else {
13019            Vec::new()
13020        };
13021
13022        // Generate regulatory filings (if enabled)
13023        let filings = if cr_config.filings.enabled {
13024            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13025                filing_types: cr_config.filings.filing_types.clone(),
13026                generate_status_progression: cr_config.filings.generate_status_progression,
13027            };
13028            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13029                self.seed + 9200,
13030                filing_config,
13031            );
13032            let company_codes: Vec<String> = self
13033                .config
13034                .companies
13035                .iter()
13036                .map(|c| c.code.clone())
13037                .collect();
13038            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13039                .unwrap_or(fallback_date);
13040            let filings = filing_gen.generate_filings(
13041                &company_codes,
13042                &jurisdictions,
13043                start_date,
13044                self.config.global.period_months,
13045            );
13046            info!("  Regulatory filings: {}", filings.len());
13047            filings
13048        } else {
13049            Vec::new()
13050        };
13051
13052        // Build compliance graph (if enabled)
13053        let compliance_graph = if cr_config.graph.enabled {
13054            let graph_config = datasynth_graph::ComplianceGraphConfig {
13055                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13056                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13057                include_cross_references: cr_config.graph.include_cross_references,
13058                include_supersession_edges: cr_config.graph.include_supersession_edges,
13059                include_account_links: cr_config.graph.include_account_links,
13060                include_control_links: cr_config.graph.include_control_links,
13061                include_company_links: cr_config.graph.include_company_links,
13062            };
13063            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13064
13065            // Add standard nodes
13066            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13067                .iter()
13068                .map(|r| datasynth_graph::StandardNodeInput {
13069                    standard_id: r.standard_id.clone(),
13070                    title: r.title.clone(),
13071                    category: r.category.clone(),
13072                    domain: r.domain.clone(),
13073                    is_active: r.is_active,
13074                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13075                    applicable_account_types: r.applicable_account_types.clone(),
13076                    applicable_processes: r.applicable_processes.clone(),
13077                })
13078                .collect();
13079            builder.add_standards(&standard_inputs);
13080
13081            // Add jurisdiction nodes
13082            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13083                jurisdiction_records
13084                    .iter()
13085                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13086                        country_code: r.country_code.clone(),
13087                        country_name: r.country_name.clone(),
13088                        framework: r.accounting_framework.clone(),
13089                        standard_count: r.standard_count,
13090                        tax_rate: r.statutory_tax_rate,
13091                    })
13092                    .collect();
13093            builder.add_jurisdictions(&jurisdiction_inputs);
13094
13095            // Add cross-reference edges
13096            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13097                cross_reference_records
13098                    .iter()
13099                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13100                        from_standard: r.from_standard.clone(),
13101                        to_standard: r.to_standard.clone(),
13102                        relationship: r.relationship.clone(),
13103                        convergence_level: r.convergence_level,
13104                    })
13105                    .collect();
13106            builder.add_cross_references(&xref_inputs);
13107
13108            // Add jurisdiction→standard mappings
13109            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13110                .iter()
13111                .map(|r| datasynth_graph::JurisdictionMappingInput {
13112                    country_code: r.jurisdiction.clone(),
13113                    standard_id: r.standard_id.clone(),
13114                })
13115                .collect();
13116            builder.add_jurisdiction_mappings(&mapping_inputs);
13117
13118            // Add procedure nodes
13119            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13120                .iter()
13121                .map(|p| datasynth_graph::ProcedureNodeInput {
13122                    procedure_id: p.procedure_id.clone(),
13123                    standard_id: p.standard_id.clone(),
13124                    procedure_type: p.procedure_type.clone(),
13125                    sample_size: p.sample_size,
13126                    confidence_level: p.confidence_level,
13127                })
13128                .collect();
13129            builder.add_procedures(&proc_inputs);
13130
13131            // Add finding nodes
13132            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13133                .iter()
13134                .map(|f| datasynth_graph::FindingNodeInput {
13135                    finding_id: f.finding_id.to_string(),
13136                    standard_id: f
13137                        .related_standards
13138                        .first()
13139                        .map(|s| s.as_str().to_string())
13140                        .unwrap_or_default(),
13141                    severity: f.severity.to_string(),
13142                    deficiency_level: f.deficiency_level.to_string(),
13143                    severity_score: f.deficiency_level.severity_score(),
13144                    control_id: f.control_id.clone(),
13145                    affected_accounts: f.affected_accounts.clone(),
13146                })
13147                .collect();
13148            builder.add_findings(&finding_inputs);
13149
13150            // Cross-domain: link standards to accounts from chart of accounts
13151            if cr_config.graph.include_account_links {
13152                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13153                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13154                for std_record in &standard_records {
13155                    if let Some(std_obj) =
13156                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13157                            &std_record.standard_id,
13158                        ))
13159                    {
13160                        for acct_type in &std_obj.applicable_account_types {
13161                            account_links.push(datasynth_graph::AccountLinkInput {
13162                                standard_id: std_record.standard_id.clone(),
13163                                account_code: acct_type.clone(),
13164                                account_name: acct_type.clone(),
13165                            });
13166                        }
13167                    }
13168                }
13169                builder.add_account_links(&account_links);
13170            }
13171
13172            // Cross-domain: link standards to internal controls
13173            if cr_config.graph.include_control_links {
13174                let mut control_links = Vec::new();
13175                // SOX/PCAOB standards link to all controls
13176                let sox_like_ids: Vec<String> = standard_records
13177                    .iter()
13178                    .filter(|r| {
13179                        r.standard_id.starts_with("SOX")
13180                            || r.standard_id.starts_with("PCAOB-AS-2201")
13181                    })
13182                    .map(|r| r.standard_id.clone())
13183                    .collect();
13184                // Get control IDs from config (C001-C060 standard controls)
13185                let control_ids = [
13186                    ("C001", "Cash Controls"),
13187                    ("C002", "Large Transaction Approval"),
13188                    ("C010", "PO Approval"),
13189                    ("C011", "Three-Way Match"),
13190                    ("C020", "Revenue Recognition"),
13191                    ("C021", "Credit Check"),
13192                    ("C030", "Manual JE Approval"),
13193                    ("C031", "Period Close Review"),
13194                    ("C032", "Account Reconciliation"),
13195                    ("C040", "Payroll Processing"),
13196                    ("C050", "Fixed Asset Capitalization"),
13197                    ("C060", "Intercompany Elimination"),
13198                ];
13199                for sox_id in &sox_like_ids {
13200                    for (ctrl_id, ctrl_name) in &control_ids {
13201                        control_links.push(datasynth_graph::ControlLinkInput {
13202                            standard_id: sox_id.clone(),
13203                            control_id: ctrl_id.to_string(),
13204                            control_name: ctrl_name.to_string(),
13205                        });
13206                    }
13207                }
13208                builder.add_control_links(&control_links);
13209            }
13210
13211            // Cross-domain: filing nodes with company links
13212            if cr_config.graph.include_company_links {
13213                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13214                    .iter()
13215                    .enumerate()
13216                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13217                        filing_id: format!("F{:04}", i + 1),
13218                        filing_type: f.filing_type.to_string(),
13219                        company_code: f.company_code.clone(),
13220                        jurisdiction: f.jurisdiction.clone(),
13221                        status: format!("{:?}", f.status),
13222                    })
13223                    .collect();
13224                builder.add_filings(&filing_inputs);
13225            }
13226
13227            let graph = builder.build();
13228            info!(
13229                "  Compliance graph: {} nodes, {} edges",
13230                graph.nodes.len(),
13231                graph.edges.len()
13232            );
13233            Some(graph)
13234        } else {
13235            None
13236        };
13237
13238        self.check_resources_with_log("post-compliance-regulations")?;
13239
13240        Ok(ComplianceRegulationsSnapshot {
13241            standard_records,
13242            cross_reference_records,
13243            jurisdiction_records,
13244            audit_procedures,
13245            findings,
13246            filings,
13247            compliance_graph,
13248        })
13249    }
13250
13251    /// Build a lineage graph describing config → phase → output relationships.
13252    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13253        use super::lineage::LineageGraphBuilder;
13254
13255        let mut builder = LineageGraphBuilder::new();
13256
13257        // Config sections
13258        builder.add_config_section("config:global", "Global Config");
13259        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13260        builder.add_config_section("config:transactions", "Transaction Config");
13261
13262        // Generator phases
13263        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13264        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13265
13266        // Config → phase edges
13267        builder.configured_by("phase:coa", "config:chart_of_accounts");
13268        builder.configured_by("phase:je", "config:transactions");
13269
13270        // Output files
13271        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13272        builder.produced_by("output:je", "phase:je");
13273
13274        // Optional phases based on config
13275        if self.phase_config.generate_master_data {
13276            builder.add_config_section("config:master_data", "Master Data Config");
13277            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13278            builder.configured_by("phase:master_data", "config:master_data");
13279            builder.input_to("phase:master_data", "phase:je");
13280        }
13281
13282        if self.phase_config.generate_document_flows {
13283            builder.add_config_section("config:document_flows", "Document Flow Config");
13284            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13285            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13286            builder.configured_by("phase:p2p", "config:document_flows");
13287            builder.configured_by("phase:o2c", "config:document_flows");
13288
13289            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13290            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13291            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13292            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13293            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13294
13295            builder.produced_by("output:po", "phase:p2p");
13296            builder.produced_by("output:gr", "phase:p2p");
13297            builder.produced_by("output:vi", "phase:p2p");
13298            builder.produced_by("output:so", "phase:o2c");
13299            builder.produced_by("output:ci", "phase:o2c");
13300        }
13301
13302        if self.phase_config.inject_anomalies {
13303            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13304            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13305            builder.configured_by("phase:anomaly", "config:fraud");
13306            builder.add_output_file(
13307                "output:labels",
13308                "Anomaly Labels",
13309                "labels/anomaly_labels.csv",
13310            );
13311            builder.produced_by("output:labels", "phase:anomaly");
13312        }
13313
13314        if self.phase_config.generate_audit {
13315            builder.add_config_section("config:audit", "Audit Config");
13316            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13317            builder.configured_by("phase:audit", "config:audit");
13318        }
13319
13320        if self.phase_config.generate_banking {
13321            builder.add_config_section("config:banking", "Banking Config");
13322            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13323            builder.configured_by("phase:banking", "config:banking");
13324        }
13325
13326        if self.config.llm.enabled {
13327            builder.add_config_section("config:llm", "LLM Enrichment Config");
13328            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13329            builder.configured_by("phase:llm_enrichment", "config:llm");
13330        }
13331
13332        if self.config.diffusion.enabled {
13333            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13334            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13335            builder.configured_by("phase:diffusion", "config:diffusion");
13336        }
13337
13338        if self.config.causal.enabled {
13339            builder.add_config_section("config:causal", "Causal Generation Config");
13340            builder.add_generator_phase("phase:causal", "Causal Overlay");
13341            builder.configured_by("phase:causal", "config:causal");
13342        }
13343
13344        builder.build()
13345    }
13346
13347    // -----------------------------------------------------------------------
13348    // Trial-balance helpers used to replace hardcoded proxy values
13349    // -----------------------------------------------------------------------
13350
13351    /// Compute total revenue for a company from its journal entries.
13352    ///
13353    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13354    /// net credits on all revenue-account lines filtered to `company_code`.
13355    fn compute_company_revenue(
13356        entries: &[JournalEntry],
13357        company_code: &str,
13358    ) -> rust_decimal::Decimal {
13359        use rust_decimal::Decimal;
13360        let mut revenue = Decimal::ZERO;
13361        for je in entries {
13362            if je.header.company_code != company_code {
13363                continue;
13364            }
13365            for line in &je.lines {
13366                if line.gl_account.starts_with('4') {
13367                    // Revenue is credit-normal
13368                    revenue += line.credit_amount - line.debit_amount;
13369                }
13370            }
13371        }
13372        revenue.max(Decimal::ZERO)
13373    }
13374
13375    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13376    ///
13377    /// Asset accounts start with "1"; liability accounts start with "2".
13378    fn compute_entity_net_assets(
13379        entries: &[JournalEntry],
13380        entity_code: &str,
13381    ) -> rust_decimal::Decimal {
13382        use rust_decimal::Decimal;
13383        let mut asset_net = Decimal::ZERO;
13384        let mut liability_net = Decimal::ZERO;
13385        for je in entries {
13386            if je.header.company_code != entity_code {
13387                continue;
13388            }
13389            for line in &je.lines {
13390                if line.gl_account.starts_with('1') {
13391                    asset_net += line.debit_amount - line.credit_amount;
13392                } else if line.gl_account.starts_with('2') {
13393                    liability_net += line.credit_amount - line.debit_amount;
13394                }
13395            }
13396        }
13397        asset_net - liability_net
13398    }
13399}
13400
13401/// Get the directory name for a graph export format.
13402fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13403    match format {
13404        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13405        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13406        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13407        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13408        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13409    }
13410}
13411
13412/// Aggregate journal entry lines into per-account trial balance rows.
13413///
13414/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13415/// debit/credit totals and a net balance (debit minus credit).
13416fn compute_trial_balance_entries(
13417    entries: &[JournalEntry],
13418    entity_code: &str,
13419    fiscal_year: i32,
13420    coa: Option<&ChartOfAccounts>,
13421) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13422    use std::collections::BTreeMap;
13423
13424    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13425        BTreeMap::new();
13426
13427    for je in entries {
13428        for line in &je.lines {
13429            let entry = balances.entry(line.account_code.clone()).or_default();
13430            entry.0 += line.debit_amount;
13431            entry.1 += line.credit_amount;
13432        }
13433    }
13434
13435    balances
13436        .into_iter()
13437        .map(
13438            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13439                account_description: coa
13440                    .and_then(|c| c.get_account(&account_code))
13441                    .map(|a| a.description().to_string())
13442                    .unwrap_or_else(|| account_code.clone()),
13443                account_code,
13444                debit_balance: debit,
13445                credit_balance: credit,
13446                net_balance: debit - credit,
13447                entity_code: entity_code.to_string(),
13448                period: format!("FY{}", fiscal_year),
13449            },
13450        )
13451        .collect()
13452}
13453
13454#[cfg(test)]
13455#[allow(clippy::unwrap_used)]
13456mod tests {
13457    use super::*;
13458    use datasynth_config::schema::*;
13459
13460    fn create_test_config() -> GeneratorConfig {
13461        GeneratorConfig {
13462            global: GlobalConfig {
13463                industry: IndustrySector::Manufacturing,
13464                start_date: "2024-01-01".to_string(),
13465                period_months: 1,
13466                seed: Some(42),
13467                parallel: false,
13468                group_currency: "USD".to_string(),
13469                presentation_currency: None,
13470                worker_threads: 0,
13471                memory_limit_mb: 0,
13472                fiscal_year_months: None,
13473            },
13474            companies: vec![CompanyConfig {
13475                code: "1000".to_string(),
13476                name: "Test Company".to_string(),
13477                currency: "USD".to_string(),
13478                functional_currency: None,
13479                country: "US".to_string(),
13480                annual_transaction_volume: TransactionVolume::TenK,
13481                volume_weight: 1.0,
13482                fiscal_year_variant: "K4".to_string(),
13483            }],
13484            chart_of_accounts: ChartOfAccountsConfig {
13485                complexity: CoAComplexity::Small,
13486                industry_specific: true,
13487                custom_accounts: None,
13488                min_hierarchy_depth: 2,
13489                max_hierarchy_depth: 4,
13490            },
13491            transactions: TransactionConfig::default(),
13492            output: OutputConfig::default(),
13493            fraud: FraudConfig::default(),
13494            internal_controls: InternalControlsConfig::default(),
13495            business_processes: BusinessProcessConfig::default(),
13496            user_personas: UserPersonaConfig::default(),
13497            templates: TemplateConfig::default(),
13498            approval: ApprovalConfig::default(),
13499            departments: DepartmentConfig::default(),
13500            master_data: MasterDataConfig::default(),
13501            document_flows: DocumentFlowConfig::default(),
13502            intercompany: IntercompanyConfig::default(),
13503            balance: BalanceConfig::default(),
13504            ocpm: OcpmConfig::default(),
13505            audit: AuditGenerationConfig::default(),
13506            banking: datasynth_banking::BankingConfig::default(),
13507            data_quality: DataQualitySchemaConfig::default(),
13508            scenario: ScenarioConfig::default(),
13509            temporal: TemporalDriftConfig::default(),
13510            graph_export: GraphExportConfig::default(),
13511            streaming: StreamingSchemaConfig::default(),
13512            rate_limit: RateLimitSchemaConfig::default(),
13513            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13514            relationships: RelationshipSchemaConfig::default(),
13515            accounting_standards: AccountingStandardsConfig::default(),
13516            audit_standards: AuditStandardsConfig::default(),
13517            distributions: Default::default(),
13518            temporal_patterns: Default::default(),
13519            vendor_network: VendorNetworkSchemaConfig::default(),
13520            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13521            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13522            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13523            organizational_events: OrganizationalEventsSchemaConfig::default(),
13524            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13525            market_drift: MarketDriftSchemaConfig::default(),
13526            drift_labeling: DriftLabelingSchemaConfig::default(),
13527            anomaly_injection: Default::default(),
13528            industry_specific: Default::default(),
13529            fingerprint_privacy: Default::default(),
13530            quality_gates: Default::default(),
13531            compliance: Default::default(),
13532            webhooks: Default::default(),
13533            llm: Default::default(),
13534            diffusion: Default::default(),
13535            causal: Default::default(),
13536            source_to_pay: Default::default(),
13537            financial_reporting: Default::default(),
13538            hr: Default::default(),
13539            manufacturing: Default::default(),
13540            sales_quotes: Default::default(),
13541            tax: Default::default(),
13542            treasury: Default::default(),
13543            project_accounting: Default::default(),
13544            esg: Default::default(),
13545            country_packs: None,
13546            scenarios: Default::default(),
13547            session: Default::default(),
13548            compliance_regulations: Default::default(),
13549        }
13550    }
13551
13552    #[test]
13553    fn test_enhanced_orchestrator_creation() {
13554        let config = create_test_config();
13555        let orchestrator = EnhancedOrchestrator::with_defaults(config);
13556        assert!(orchestrator.is_ok());
13557    }
13558
13559    #[test]
13560    fn test_minimal_generation() {
13561        let config = create_test_config();
13562        let phase_config = PhaseConfig {
13563            generate_master_data: false,
13564            generate_document_flows: false,
13565            generate_journal_entries: true,
13566            inject_anomalies: false,
13567            show_progress: false,
13568            ..Default::default()
13569        };
13570
13571        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13572        let result = orchestrator.generate();
13573
13574        assert!(result.is_ok());
13575        let result = result.unwrap();
13576        assert!(!result.journal_entries.is_empty());
13577    }
13578
13579    #[test]
13580    fn test_master_data_generation() {
13581        let config = create_test_config();
13582        let phase_config = PhaseConfig {
13583            generate_master_data: true,
13584            generate_document_flows: false,
13585            generate_journal_entries: false,
13586            inject_anomalies: false,
13587            show_progress: false,
13588            vendors_per_company: 5,
13589            customers_per_company: 5,
13590            materials_per_company: 10,
13591            assets_per_company: 5,
13592            employees_per_company: 10,
13593            ..Default::default()
13594        };
13595
13596        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13597        let result = orchestrator.generate().unwrap();
13598
13599        assert!(!result.master_data.vendors.is_empty());
13600        assert!(!result.master_data.customers.is_empty());
13601        assert!(!result.master_data.materials.is_empty());
13602    }
13603
13604    #[test]
13605    fn test_document_flow_generation() {
13606        let config = create_test_config();
13607        let phase_config = PhaseConfig {
13608            generate_master_data: true,
13609            generate_document_flows: true,
13610            generate_journal_entries: false,
13611            inject_anomalies: false,
13612            inject_data_quality: false,
13613            validate_balances: false,
13614            generate_ocpm_events: false,
13615            show_progress: false,
13616            vendors_per_company: 5,
13617            customers_per_company: 5,
13618            materials_per_company: 10,
13619            assets_per_company: 5,
13620            employees_per_company: 10,
13621            p2p_chains: 5,
13622            o2c_chains: 5,
13623            ..Default::default()
13624        };
13625
13626        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13627        let result = orchestrator.generate().unwrap();
13628
13629        // Should have generated P2P and O2C chains
13630        assert!(!result.document_flows.p2p_chains.is_empty());
13631        assert!(!result.document_flows.o2c_chains.is_empty());
13632
13633        // Flattened documents should be populated
13634        assert!(!result.document_flows.purchase_orders.is_empty());
13635        assert!(!result.document_flows.sales_orders.is_empty());
13636    }
13637
13638    #[test]
13639    fn test_anomaly_injection() {
13640        let config = create_test_config();
13641        let phase_config = PhaseConfig {
13642            generate_master_data: false,
13643            generate_document_flows: false,
13644            generate_journal_entries: true,
13645            inject_anomalies: true,
13646            show_progress: false,
13647            ..Default::default()
13648        };
13649
13650        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13651        let result = orchestrator.generate().unwrap();
13652
13653        // Should have journal entries
13654        assert!(!result.journal_entries.is_empty());
13655
13656        // With ~833 entries and 2% rate, expect some anomalies
13657        // Note: This is probabilistic, so we just verify the structure exists
13658        assert!(result.anomaly_labels.summary.is_some());
13659    }
13660
13661    #[test]
13662    fn test_full_generation_pipeline() {
13663        let config = create_test_config();
13664        let phase_config = PhaseConfig {
13665            generate_master_data: true,
13666            generate_document_flows: true,
13667            generate_journal_entries: true,
13668            inject_anomalies: false,
13669            inject_data_quality: false,
13670            validate_balances: true,
13671            generate_ocpm_events: false,
13672            show_progress: false,
13673            vendors_per_company: 3,
13674            customers_per_company: 3,
13675            materials_per_company: 5,
13676            assets_per_company: 3,
13677            employees_per_company: 5,
13678            p2p_chains: 3,
13679            o2c_chains: 3,
13680            ..Default::default()
13681        };
13682
13683        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13684        let result = orchestrator.generate().unwrap();
13685
13686        // All phases should have results
13687        assert!(!result.master_data.vendors.is_empty());
13688        assert!(!result.master_data.customers.is_empty());
13689        assert!(!result.document_flows.p2p_chains.is_empty());
13690        assert!(!result.document_flows.o2c_chains.is_empty());
13691        assert!(!result.journal_entries.is_empty());
13692        assert!(result.statistics.accounts_count > 0);
13693
13694        // Subledger linking should have run
13695        assert!(!result.subledger.ap_invoices.is_empty());
13696        assert!(!result.subledger.ar_invoices.is_empty());
13697
13698        // Balance validation should have run
13699        assert!(result.balance_validation.validated);
13700        assert!(result.balance_validation.entries_processed > 0);
13701    }
13702
13703    #[test]
13704    fn test_subledger_linking() {
13705        let config = create_test_config();
13706        let phase_config = PhaseConfig {
13707            generate_master_data: true,
13708            generate_document_flows: true,
13709            generate_journal_entries: false,
13710            inject_anomalies: false,
13711            inject_data_quality: false,
13712            validate_balances: false,
13713            generate_ocpm_events: false,
13714            show_progress: false,
13715            vendors_per_company: 5,
13716            customers_per_company: 5,
13717            materials_per_company: 10,
13718            assets_per_company: 3,
13719            employees_per_company: 5,
13720            p2p_chains: 5,
13721            o2c_chains: 5,
13722            ..Default::default()
13723        };
13724
13725        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13726        let result = orchestrator.generate().unwrap();
13727
13728        // Should have document flows
13729        assert!(!result.document_flows.vendor_invoices.is_empty());
13730        assert!(!result.document_flows.customer_invoices.is_empty());
13731
13732        // Subledger should be linked from document flows
13733        assert!(!result.subledger.ap_invoices.is_empty());
13734        assert!(!result.subledger.ar_invoices.is_empty());
13735
13736        // AP invoices count should match vendor invoices count
13737        assert_eq!(
13738            result.subledger.ap_invoices.len(),
13739            result.document_flows.vendor_invoices.len()
13740        );
13741
13742        // AR invoices count should match customer invoices count
13743        assert_eq!(
13744            result.subledger.ar_invoices.len(),
13745            result.document_flows.customer_invoices.len()
13746        );
13747
13748        // Statistics should reflect subledger counts
13749        assert_eq!(
13750            result.statistics.ap_invoice_count,
13751            result.subledger.ap_invoices.len()
13752        );
13753        assert_eq!(
13754            result.statistics.ar_invoice_count,
13755            result.subledger.ar_invoices.len()
13756        );
13757    }
13758
13759    #[test]
13760    fn test_balance_validation() {
13761        let config = create_test_config();
13762        let phase_config = PhaseConfig {
13763            generate_master_data: false,
13764            generate_document_flows: false,
13765            generate_journal_entries: true,
13766            inject_anomalies: false,
13767            validate_balances: true,
13768            show_progress: false,
13769            ..Default::default()
13770        };
13771
13772        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13773        let result = orchestrator.generate().unwrap();
13774
13775        // Balance validation should run
13776        assert!(result.balance_validation.validated);
13777        assert!(result.balance_validation.entries_processed > 0);
13778
13779        // Generated JEs should be balanced (no unbalanced entries)
13780        assert!(!result.balance_validation.has_unbalanced_entries);
13781
13782        // Total debits should equal total credits
13783        assert_eq!(
13784            result.balance_validation.total_debits,
13785            result.balance_validation.total_credits
13786        );
13787    }
13788
13789    #[test]
13790    fn test_statistics_accuracy() {
13791        let config = create_test_config();
13792        let phase_config = PhaseConfig {
13793            generate_master_data: true,
13794            generate_document_flows: false,
13795            generate_journal_entries: true,
13796            inject_anomalies: false,
13797            show_progress: false,
13798            vendors_per_company: 10,
13799            customers_per_company: 20,
13800            materials_per_company: 15,
13801            assets_per_company: 5,
13802            employees_per_company: 8,
13803            ..Default::default()
13804        };
13805
13806        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13807        let result = orchestrator.generate().unwrap();
13808
13809        // Statistics should match actual data
13810        assert_eq!(
13811            result.statistics.vendor_count,
13812            result.master_data.vendors.len()
13813        );
13814        assert_eq!(
13815            result.statistics.customer_count,
13816            result.master_data.customers.len()
13817        );
13818        assert_eq!(
13819            result.statistics.material_count,
13820            result.master_data.materials.len()
13821        );
13822        assert_eq!(
13823            result.statistics.total_entries as usize,
13824            result.journal_entries.len()
13825        );
13826    }
13827
13828    #[test]
13829    fn test_phase_config_defaults() {
13830        let config = PhaseConfig::default();
13831        assert!(config.generate_master_data);
13832        assert!(config.generate_document_flows);
13833        assert!(config.generate_journal_entries);
13834        assert!(!config.inject_anomalies);
13835        assert!(config.validate_balances);
13836        assert!(config.show_progress);
13837        assert!(config.vendors_per_company > 0);
13838        assert!(config.customers_per_company > 0);
13839    }
13840
13841    #[test]
13842    fn test_get_coa_before_generation() {
13843        let config = create_test_config();
13844        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13845
13846        // Before generation, CoA should be None
13847        assert!(orchestrator.get_coa().is_none());
13848    }
13849
13850    #[test]
13851    fn test_get_coa_after_generation() {
13852        let config = create_test_config();
13853        let phase_config = PhaseConfig {
13854            generate_master_data: false,
13855            generate_document_flows: false,
13856            generate_journal_entries: true,
13857            inject_anomalies: false,
13858            show_progress: false,
13859            ..Default::default()
13860        };
13861
13862        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13863        let _ = orchestrator.generate().unwrap();
13864
13865        // After generation, CoA should be available
13866        assert!(orchestrator.get_coa().is_some());
13867    }
13868
13869    #[test]
13870    fn test_get_master_data() {
13871        let config = create_test_config();
13872        let phase_config = PhaseConfig {
13873            generate_master_data: true,
13874            generate_document_flows: false,
13875            generate_journal_entries: false,
13876            inject_anomalies: false,
13877            show_progress: false,
13878            vendors_per_company: 5,
13879            customers_per_company: 5,
13880            materials_per_company: 5,
13881            assets_per_company: 5,
13882            employees_per_company: 5,
13883            ..Default::default()
13884        };
13885
13886        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13887        let result = orchestrator.generate().unwrap();
13888
13889        // After generate(), master_data is moved into the result
13890        assert!(!result.master_data.vendors.is_empty());
13891    }
13892
13893    #[test]
13894    fn test_with_progress_builder() {
13895        let config = create_test_config();
13896        let orchestrator = EnhancedOrchestrator::with_defaults(config)
13897            .unwrap()
13898            .with_progress(false);
13899
13900        // Should still work without progress
13901        assert!(!orchestrator.phase_config.show_progress);
13902    }
13903
13904    #[test]
13905    fn test_multi_company_generation() {
13906        let mut config = create_test_config();
13907        config.companies.push(CompanyConfig {
13908            code: "2000".to_string(),
13909            name: "Subsidiary".to_string(),
13910            currency: "EUR".to_string(),
13911            functional_currency: None,
13912            country: "DE".to_string(),
13913            annual_transaction_volume: TransactionVolume::TenK,
13914            volume_weight: 0.5,
13915            fiscal_year_variant: "K4".to_string(),
13916        });
13917
13918        let phase_config = PhaseConfig {
13919            generate_master_data: true,
13920            generate_document_flows: false,
13921            generate_journal_entries: true,
13922            inject_anomalies: false,
13923            show_progress: false,
13924            vendors_per_company: 5,
13925            customers_per_company: 5,
13926            materials_per_company: 5,
13927            assets_per_company: 5,
13928            employees_per_company: 5,
13929            ..Default::default()
13930        };
13931
13932        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13933        let result = orchestrator.generate().unwrap();
13934
13935        // Should have master data for both companies
13936        assert!(result.statistics.vendor_count >= 10); // 5 per company
13937        assert!(result.statistics.customer_count >= 10);
13938        assert!(result.statistics.companies_count == 2);
13939    }
13940
13941    #[test]
13942    fn test_empty_master_data_skips_document_flows() {
13943        let config = create_test_config();
13944        let phase_config = PhaseConfig {
13945            generate_master_data: false,   // Skip master data
13946            generate_document_flows: true, // Try to generate flows
13947            generate_journal_entries: false,
13948            inject_anomalies: false,
13949            show_progress: false,
13950            ..Default::default()
13951        };
13952
13953        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13954        let result = orchestrator.generate().unwrap();
13955
13956        // Without master data, document flows should be empty
13957        assert!(result.document_flows.p2p_chains.is_empty());
13958        assert!(result.document_flows.o2c_chains.is_empty());
13959    }
13960
13961    #[test]
13962    fn test_journal_entry_line_item_count() {
13963        let config = create_test_config();
13964        let phase_config = PhaseConfig {
13965            generate_master_data: false,
13966            generate_document_flows: false,
13967            generate_journal_entries: true,
13968            inject_anomalies: false,
13969            show_progress: false,
13970            ..Default::default()
13971        };
13972
13973        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13974        let result = orchestrator.generate().unwrap();
13975
13976        // Total line items should match sum of all entry line counts
13977        let calculated_line_items: u64 = result
13978            .journal_entries
13979            .iter()
13980            .map(|e| e.line_count() as u64)
13981            .sum();
13982        assert_eq!(result.statistics.total_line_items, calculated_line_items);
13983    }
13984
13985    #[test]
13986    fn test_audit_generation() {
13987        let config = create_test_config();
13988        let phase_config = PhaseConfig {
13989            generate_master_data: false,
13990            generate_document_flows: false,
13991            generate_journal_entries: true,
13992            inject_anomalies: false,
13993            show_progress: false,
13994            generate_audit: true,
13995            audit_engagements: 2,
13996            workpapers_per_engagement: 5,
13997            evidence_per_workpaper: 2,
13998            risks_per_engagement: 3,
13999            findings_per_engagement: 2,
14000            judgments_per_engagement: 2,
14001            ..Default::default()
14002        };
14003
14004        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14005        let result = orchestrator.generate().unwrap();
14006
14007        // Should have generated audit data
14008        assert_eq!(result.audit.engagements.len(), 2);
14009        assert!(!result.audit.workpapers.is_empty());
14010        assert!(!result.audit.evidence.is_empty());
14011        assert!(!result.audit.risk_assessments.is_empty());
14012        assert!(!result.audit.findings.is_empty());
14013        assert!(!result.audit.judgments.is_empty());
14014
14015        // New ISA entity collections should also be populated
14016        assert!(
14017            !result.audit.confirmations.is_empty(),
14018            "ISA 505 confirmations should be generated"
14019        );
14020        assert!(
14021            !result.audit.confirmation_responses.is_empty(),
14022            "ISA 505 confirmation responses should be generated"
14023        );
14024        assert!(
14025            !result.audit.procedure_steps.is_empty(),
14026            "ISA 330 procedure steps should be generated"
14027        );
14028        // Samples may or may not be generated depending on workpaper sampling methods
14029        assert!(
14030            !result.audit.analytical_results.is_empty(),
14031            "ISA 520 analytical procedures should be generated"
14032        );
14033        assert!(
14034            !result.audit.ia_functions.is_empty(),
14035            "ISA 610 IA functions should be generated (one per engagement)"
14036        );
14037        assert!(
14038            !result.audit.related_parties.is_empty(),
14039            "ISA 550 related parties should be generated"
14040        );
14041
14042        // Statistics should match
14043        assert_eq!(
14044            result.statistics.audit_engagement_count,
14045            result.audit.engagements.len()
14046        );
14047        assert_eq!(
14048            result.statistics.audit_workpaper_count,
14049            result.audit.workpapers.len()
14050        );
14051        assert_eq!(
14052            result.statistics.audit_evidence_count,
14053            result.audit.evidence.len()
14054        );
14055        assert_eq!(
14056            result.statistics.audit_risk_count,
14057            result.audit.risk_assessments.len()
14058        );
14059        assert_eq!(
14060            result.statistics.audit_finding_count,
14061            result.audit.findings.len()
14062        );
14063        assert_eq!(
14064            result.statistics.audit_judgment_count,
14065            result.audit.judgments.len()
14066        );
14067        assert_eq!(
14068            result.statistics.audit_confirmation_count,
14069            result.audit.confirmations.len()
14070        );
14071        assert_eq!(
14072            result.statistics.audit_confirmation_response_count,
14073            result.audit.confirmation_responses.len()
14074        );
14075        assert_eq!(
14076            result.statistics.audit_procedure_step_count,
14077            result.audit.procedure_steps.len()
14078        );
14079        assert_eq!(
14080            result.statistics.audit_sample_count,
14081            result.audit.samples.len()
14082        );
14083        assert_eq!(
14084            result.statistics.audit_analytical_result_count,
14085            result.audit.analytical_results.len()
14086        );
14087        assert_eq!(
14088            result.statistics.audit_ia_function_count,
14089            result.audit.ia_functions.len()
14090        );
14091        assert_eq!(
14092            result.statistics.audit_ia_report_count,
14093            result.audit.ia_reports.len()
14094        );
14095        assert_eq!(
14096            result.statistics.audit_related_party_count,
14097            result.audit.related_parties.len()
14098        );
14099        assert_eq!(
14100            result.statistics.audit_related_party_transaction_count,
14101            result.audit.related_party_transactions.len()
14102        );
14103    }
14104
14105    #[test]
14106    fn test_new_phases_disabled_by_default() {
14107        let config = create_test_config();
14108        // Verify new config fields default to disabled
14109        assert!(!config.llm.enabled);
14110        assert!(!config.diffusion.enabled);
14111        assert!(!config.causal.enabled);
14112
14113        let phase_config = PhaseConfig {
14114            generate_master_data: false,
14115            generate_document_flows: false,
14116            generate_journal_entries: true,
14117            inject_anomalies: false,
14118            show_progress: false,
14119            ..Default::default()
14120        };
14121
14122        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14123        let result = orchestrator.generate().unwrap();
14124
14125        // All new phase statistics should be zero when disabled
14126        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14127        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14128        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14129        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14130        assert_eq!(result.statistics.causal_generation_ms, 0);
14131        assert_eq!(result.statistics.causal_samples_generated, 0);
14132        assert!(result.statistics.causal_validation_passed.is_none());
14133        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14134        assert!(result.counterfactual_pairs.is_empty());
14135    }
14136
14137    #[test]
14138    fn test_counterfactual_generation_enabled() {
14139        let config = create_test_config();
14140        let phase_config = PhaseConfig {
14141            generate_master_data: false,
14142            generate_document_flows: false,
14143            generate_journal_entries: true,
14144            inject_anomalies: false,
14145            show_progress: false,
14146            generate_counterfactuals: true,
14147            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14148            ..Default::default()
14149        };
14150
14151        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14152        let result = orchestrator.generate().unwrap();
14153
14154        // With JE generation enabled, counterfactual pairs should be generated
14155        if !result.journal_entries.is_empty() {
14156            assert_eq!(
14157                result.counterfactual_pairs.len(),
14158                result.journal_entries.len()
14159            );
14160            assert_eq!(
14161                result.statistics.counterfactual_pair_count,
14162                result.journal_entries.len()
14163            );
14164            // Each pair should have a distinct pair_id
14165            let ids: std::collections::HashSet<_> = result
14166                .counterfactual_pairs
14167                .iter()
14168                .map(|p| p.pair_id.clone())
14169                .collect();
14170            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14171        }
14172    }
14173
14174    #[test]
14175    fn test_llm_enrichment_enabled() {
14176        let mut config = create_test_config();
14177        config.llm.enabled = true;
14178        config.llm.max_vendor_enrichments = 3;
14179
14180        let phase_config = PhaseConfig {
14181            generate_master_data: true,
14182            generate_document_flows: false,
14183            generate_journal_entries: false,
14184            inject_anomalies: false,
14185            show_progress: false,
14186            vendors_per_company: 5,
14187            customers_per_company: 3,
14188            materials_per_company: 3,
14189            assets_per_company: 3,
14190            employees_per_company: 3,
14191            ..Default::default()
14192        };
14193
14194        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14195        let result = orchestrator.generate().unwrap();
14196
14197        // LLM enrichment should have run
14198        assert!(result.statistics.llm_vendors_enriched > 0);
14199        assert!(result.statistics.llm_vendors_enriched <= 3);
14200    }
14201
14202    #[test]
14203    fn test_diffusion_enhancement_enabled() {
14204        let mut config = create_test_config();
14205        config.diffusion.enabled = true;
14206        config.diffusion.n_steps = 50;
14207        config.diffusion.sample_size = 20;
14208
14209        let phase_config = PhaseConfig {
14210            generate_master_data: false,
14211            generate_document_flows: false,
14212            generate_journal_entries: true,
14213            inject_anomalies: false,
14214            show_progress: false,
14215            ..Default::default()
14216        };
14217
14218        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14219        let result = orchestrator.generate().unwrap();
14220
14221        // Diffusion phase should have generated samples
14222        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14223    }
14224
14225    #[test]
14226    fn test_causal_overlay_enabled() {
14227        let mut config = create_test_config();
14228        config.causal.enabled = true;
14229        config.causal.template = "fraud_detection".to_string();
14230        config.causal.sample_size = 100;
14231        config.causal.validate = true;
14232
14233        let phase_config = PhaseConfig {
14234            generate_master_data: false,
14235            generate_document_flows: false,
14236            generate_journal_entries: true,
14237            inject_anomalies: false,
14238            show_progress: false,
14239            ..Default::default()
14240        };
14241
14242        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14243        let result = orchestrator.generate().unwrap();
14244
14245        // Causal phase should have generated samples
14246        assert_eq!(result.statistics.causal_samples_generated, 100);
14247        // Validation should have run
14248        assert!(result.statistics.causal_validation_passed.is_some());
14249    }
14250
14251    #[test]
14252    fn test_causal_overlay_revenue_cycle_template() {
14253        let mut config = create_test_config();
14254        config.causal.enabled = true;
14255        config.causal.template = "revenue_cycle".to_string();
14256        config.causal.sample_size = 50;
14257        config.causal.validate = false;
14258
14259        let phase_config = PhaseConfig {
14260            generate_master_data: false,
14261            generate_document_flows: false,
14262            generate_journal_entries: true,
14263            inject_anomalies: false,
14264            show_progress: false,
14265            ..Default::default()
14266        };
14267
14268        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14269        let result = orchestrator.generate().unwrap();
14270
14271        // Causal phase should have generated samples
14272        assert_eq!(result.statistics.causal_samples_generated, 50);
14273        // Validation was disabled
14274        assert!(result.statistics.causal_validation_passed.is_none());
14275    }
14276
14277    #[test]
14278    fn test_all_new_phases_enabled_together() {
14279        let mut config = create_test_config();
14280        config.llm.enabled = true;
14281        config.llm.max_vendor_enrichments = 2;
14282        config.diffusion.enabled = true;
14283        config.diffusion.n_steps = 20;
14284        config.diffusion.sample_size = 10;
14285        config.causal.enabled = true;
14286        config.causal.sample_size = 50;
14287        config.causal.validate = true;
14288
14289        let phase_config = PhaseConfig {
14290            generate_master_data: true,
14291            generate_document_flows: false,
14292            generate_journal_entries: true,
14293            inject_anomalies: false,
14294            show_progress: false,
14295            vendors_per_company: 5,
14296            customers_per_company: 3,
14297            materials_per_company: 3,
14298            assets_per_company: 3,
14299            employees_per_company: 3,
14300            ..Default::default()
14301        };
14302
14303        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14304        let result = orchestrator.generate().unwrap();
14305
14306        // All three phases should have run
14307        assert!(result.statistics.llm_vendors_enriched > 0);
14308        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14309        assert_eq!(result.statistics.causal_samples_generated, 50);
14310        assert!(result.statistics.causal_validation_passed.is_some());
14311    }
14312
14313    #[test]
14314    fn test_statistics_serialization_with_new_fields() {
14315        let stats = EnhancedGenerationStatistics {
14316            total_entries: 100,
14317            total_line_items: 500,
14318            llm_enrichment_ms: 42,
14319            llm_vendors_enriched: 10,
14320            diffusion_enhancement_ms: 100,
14321            diffusion_samples_generated: 50,
14322            causal_generation_ms: 200,
14323            causal_samples_generated: 100,
14324            causal_validation_passed: Some(true),
14325            ..Default::default()
14326        };
14327
14328        let json = serde_json::to_string(&stats).unwrap();
14329        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14330
14331        assert_eq!(deserialized.llm_enrichment_ms, 42);
14332        assert_eq!(deserialized.llm_vendors_enriched, 10);
14333        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14334        assert_eq!(deserialized.diffusion_samples_generated, 50);
14335        assert_eq!(deserialized.causal_generation_ms, 200);
14336        assert_eq!(deserialized.causal_samples_generated, 100);
14337        assert_eq!(deserialized.causal_validation_passed, Some(true));
14338    }
14339
14340    #[test]
14341    fn test_statistics_backward_compat_deserialization() {
14342        // Old JSON without the new fields should still deserialize
14343        let old_json = r#"{
14344            "total_entries": 100,
14345            "total_line_items": 500,
14346            "accounts_count": 50,
14347            "companies_count": 1,
14348            "period_months": 12,
14349            "vendor_count": 10,
14350            "customer_count": 20,
14351            "material_count": 15,
14352            "asset_count": 5,
14353            "employee_count": 8,
14354            "p2p_chain_count": 5,
14355            "o2c_chain_count": 5,
14356            "ap_invoice_count": 5,
14357            "ar_invoice_count": 5,
14358            "ocpm_event_count": 0,
14359            "ocpm_object_count": 0,
14360            "ocpm_case_count": 0,
14361            "audit_engagement_count": 0,
14362            "audit_workpaper_count": 0,
14363            "audit_evidence_count": 0,
14364            "audit_risk_count": 0,
14365            "audit_finding_count": 0,
14366            "audit_judgment_count": 0,
14367            "anomalies_injected": 0,
14368            "data_quality_issues": 0,
14369            "banking_customer_count": 0,
14370            "banking_account_count": 0,
14371            "banking_transaction_count": 0,
14372            "banking_suspicious_count": 0,
14373            "graph_export_count": 0,
14374            "graph_node_count": 0,
14375            "graph_edge_count": 0
14376        }"#;
14377
14378        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14379
14380        // New fields should default to 0 / None
14381        assert_eq!(stats.llm_enrichment_ms, 0);
14382        assert_eq!(stats.llm_vendors_enriched, 0);
14383        assert_eq!(stats.diffusion_enhancement_ms, 0);
14384        assert_eq!(stats.diffusion_samples_generated, 0);
14385        assert_eq!(stats.causal_generation_ms, 0);
14386        assert_eq!(stats.causal_samples_generated, 0);
14387        assert!(stats.causal_validation_passed.is_none());
14388    }
14389}