Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec};
164use datasynth_core::models::documents::PaymentMethod;
165use datasynth_core::models::IndustrySector;
166use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
167use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
168use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
169use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
170use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
171use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
172use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
173use datasynth_generators::audit::sample_generator::SampleGenerator;
174use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
175use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
176use datasynth_generators::coa_generator::CoAFramework;
177use datasynth_generators::llm_enrichment::VendorLlmEnricher;
178use rayon::prelude::*;
179
180// ============================================================================
181// Configuration Conversion Functions
182// ============================================================================
183
184/// Convert P2P flow config from schema to generator config.
185fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
186    let payment_behavior = &schema_config.payment_behavior;
187    let late_dist = &payment_behavior.late_payment_days_distribution;
188
189    P2PGeneratorConfig {
190        three_way_match_rate: schema_config.three_way_match_rate,
191        partial_delivery_rate: schema_config.partial_delivery_rate,
192        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
193        price_variance_rate: schema_config.price_variance_rate,
194        max_price_variance_percent: schema_config.max_price_variance_percent,
195        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
196        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
197        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
198        payment_method_distribution: vec![
199            (PaymentMethod::BankTransfer, 0.60),
200            (PaymentMethod::Check, 0.25),
201            (PaymentMethod::Wire, 0.10),
202            (PaymentMethod::CreditCard, 0.05),
203        ],
204        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
205        payment_behavior: P2PPaymentBehavior {
206            late_payment_rate: payment_behavior.late_payment_rate,
207            late_payment_distribution: LatePaymentDistribution {
208                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
209                late_8_to_14: late_dist.late_8_to_14,
210                very_late_15_to_30: late_dist.very_late_15_to_30,
211                severely_late_31_to_60: late_dist.severely_late_31_to_60,
212                extremely_late_over_60: late_dist.extremely_late_over_60,
213            },
214            partial_payment_rate: payment_behavior.partial_payment_rate,
215            payment_correction_rate: payment_behavior.payment_correction_rate,
216            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
217        },
218    }
219}
220
221/// Convert O2C flow config from schema to generator config.
222fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
223    let payment_behavior = &schema_config.payment_behavior;
224
225    O2CGeneratorConfig {
226        credit_check_failure_rate: schema_config.credit_check_failure_rate,
227        partial_shipment_rate: schema_config.partial_shipment_rate,
228        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
229        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
230        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
231        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
232        bad_debt_rate: schema_config.bad_debt_rate,
233        returns_rate: schema_config.return_rate,
234        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
235        payment_method_distribution: vec![
236            (PaymentMethod::BankTransfer, 0.50),
237            (PaymentMethod::Check, 0.30),
238            (PaymentMethod::Wire, 0.15),
239            (PaymentMethod::CreditCard, 0.05),
240        ],
241        payment_behavior: O2CPaymentBehavior {
242            partial_payment_rate: payment_behavior.partial_payments.rate,
243            short_payment_rate: payment_behavior.short_payments.rate,
244            max_short_percent: payment_behavior.short_payments.max_short_percent,
245            on_account_rate: payment_behavior.on_account_payments.rate,
246            payment_correction_rate: payment_behavior.payment_corrections.rate,
247            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
248        },
249    }
250}
251
252/// Configuration for which generation phases to run.
253#[derive(Debug, Clone)]
254pub struct PhaseConfig {
255    /// Generate master data (vendors, customers, materials, assets, employees).
256    pub generate_master_data: bool,
257    /// Generate document flows (P2P, O2C).
258    pub generate_document_flows: bool,
259    /// Generate OCPM events from document flows.
260    pub generate_ocpm_events: bool,
261    /// Generate journal entries.
262    pub generate_journal_entries: bool,
263    /// Inject anomalies.
264    pub inject_anomalies: bool,
265    /// Inject data quality variations (typos, missing values, format variations).
266    pub inject_data_quality: bool,
267    /// Validate balance sheet equation after generation.
268    pub validate_balances: bool,
269    /// Show progress bars.
270    pub show_progress: bool,
271    /// Number of vendors to generate per company.
272    pub vendors_per_company: usize,
273    /// Number of customers to generate per company.
274    pub customers_per_company: usize,
275    /// Number of materials to generate per company.
276    pub materials_per_company: usize,
277    /// Number of assets to generate per company.
278    pub assets_per_company: usize,
279    /// Number of employees to generate per company.
280    pub employees_per_company: usize,
281    /// Number of P2P chains to generate.
282    pub p2p_chains: usize,
283    /// Number of O2C chains to generate.
284    pub o2c_chains: usize,
285    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
286    pub generate_audit: bool,
287    /// Number of audit engagements to generate.
288    pub audit_engagements: usize,
289    /// Number of workpapers per engagement.
290    pub workpapers_per_engagement: usize,
291    /// Number of evidence items per workpaper.
292    pub evidence_per_workpaper: usize,
293    /// Number of risk assessments per engagement.
294    pub risks_per_engagement: usize,
295    /// Number of findings per engagement.
296    pub findings_per_engagement: usize,
297    /// Number of professional judgments per engagement.
298    pub judgments_per_engagement: usize,
299    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
300    pub generate_banking: bool,
301    /// Generate graph exports (accounting network for ML training).
302    pub generate_graph_export: bool,
303    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
304    pub generate_sourcing: bool,
305    /// Generate bank reconciliations from payments.
306    pub generate_bank_reconciliation: bool,
307    /// Generate financial statements from trial balances.
308    pub generate_financial_statements: bool,
309    /// Generate accounting standards data (revenue recognition, impairment).
310    pub generate_accounting_standards: bool,
311    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
312    pub generate_manufacturing: bool,
313    /// Generate sales quotes, management KPIs, and budgets.
314    pub generate_sales_kpi_budgets: bool,
315    /// Generate tax jurisdictions and tax codes.
316    pub generate_tax: bool,
317    /// Generate ESG data (emissions, energy, water, waste, social, governance).
318    pub generate_esg: bool,
319    /// Generate intercompany transactions and eliminations.
320    pub generate_intercompany: bool,
321    /// Generate process evolution and organizational events.
322    pub generate_evolution_events: bool,
323    /// Generate counterfactual (original, mutated) JE pairs for ML training.
324    pub generate_counterfactuals: bool,
325    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
326    pub generate_compliance_regulations: bool,
327    /// Generate period-close journal entries (tax provision, income statement close).
328    pub generate_period_close: bool,
329    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
330    pub generate_hr: bool,
331    /// Generate treasury data (cash management, hedging, debt, pooling).
332    pub generate_treasury: bool,
333    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
334    pub generate_project_accounting: bool,
335}
336
337impl Default for PhaseConfig {
338    fn default() -> Self {
339        Self {
340            generate_master_data: true,
341            generate_document_flows: true,
342            generate_ocpm_events: false, // Off by default
343            generate_journal_entries: true,
344            inject_anomalies: false,
345            inject_data_quality: false, // Off by default (to preserve clean test data)
346            validate_balances: true,
347            show_progress: true,
348            vendors_per_company: 50,
349            customers_per_company: 100,
350            materials_per_company: 200,
351            assets_per_company: 50,
352            employees_per_company: 100,
353            p2p_chains: 100,
354            o2c_chains: 100,
355            generate_audit: false, // Off by default
356            audit_engagements: 5,
357            workpapers_per_engagement: 20,
358            evidence_per_workpaper: 5,
359            risks_per_engagement: 15,
360            findings_per_engagement: 8,
361            judgments_per_engagement: 10,
362            generate_banking: false,                // Off by default
363            generate_graph_export: false,           // Off by default
364            generate_sourcing: false,               // Off by default
365            generate_bank_reconciliation: false,    // Off by default
366            generate_financial_statements: false,   // Off by default
367            generate_accounting_standards: false,   // Off by default
368            generate_manufacturing: false,          // Off by default
369            generate_sales_kpi_budgets: false,      // Off by default
370            generate_tax: false,                    // Off by default
371            generate_esg: false,                    // Off by default
372            generate_intercompany: false,           // Off by default
373            generate_evolution_events: true,        // On by default
374            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
375            generate_compliance_regulations: false, // Off by default
376            generate_period_close: true,            // On by default
377            generate_hr: false,                     // Off by default
378            generate_treasury: false,               // Off by default
379            generate_project_accounting: false,     // Off by default
380        }
381    }
382}
383
384impl PhaseConfig {
385    /// Derive phase flags from [`GeneratorConfig`].
386    ///
387    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
388    /// CLI flags can override individual fields after calling this method.
389    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
390        Self {
391            // Always-on phases
392            generate_master_data: true,
393            generate_document_flows: true,
394            generate_journal_entries: true,
395            validate_balances: true,
396            generate_period_close: true,
397            generate_evolution_events: true,
398            show_progress: true,
399
400            // Feature-gated phases — derived from config sections
401            generate_audit: cfg.audit.enabled,
402            generate_banking: cfg.banking.enabled,
403            generate_graph_export: cfg.graph_export.enabled,
404            generate_sourcing: cfg.source_to_pay.enabled,
405            generate_intercompany: cfg.intercompany.enabled,
406            generate_financial_statements: cfg.financial_reporting.enabled,
407            generate_bank_reconciliation: cfg.financial_reporting.enabled,
408            generate_accounting_standards: cfg.accounting_standards.enabled,
409            generate_manufacturing: cfg.manufacturing.enabled,
410            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
411            generate_tax: cfg.tax.enabled,
412            generate_esg: cfg.esg.enabled,
413            generate_ocpm_events: cfg.ocpm.enabled,
414            generate_compliance_regulations: cfg.compliance_regulations.enabled,
415            generate_hr: cfg.hr.enabled,
416            generate_treasury: cfg.treasury.enabled,
417            generate_project_accounting: cfg.project_accounting.enabled,
418
419            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
420            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
421
422            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
423            inject_data_quality: cfg.data_quality.enabled,
424
425            // Count defaults (CLI can override after calling this method)
426            vendors_per_company: 50,
427            customers_per_company: 100,
428            materials_per_company: 200,
429            assets_per_company: 50,
430            employees_per_company: 100,
431            p2p_chains: 100,
432            o2c_chains: 100,
433            audit_engagements: 5,
434            workpapers_per_engagement: 20,
435            evidence_per_workpaper: 5,
436            risks_per_engagement: 15,
437            findings_per_engagement: 8,
438            judgments_per_engagement: 10,
439        }
440    }
441}
442
443/// Master data snapshot containing all generated entities.
444#[derive(Debug, Clone, Default)]
445pub struct MasterDataSnapshot {
446    /// Generated vendors.
447    pub vendors: Vec<Vendor>,
448    /// Generated customers.
449    pub customers: Vec<Customer>,
450    /// Generated materials.
451    pub materials: Vec<Material>,
452    /// Generated fixed assets.
453    pub assets: Vec<FixedAsset>,
454    /// Generated employees.
455    pub employees: Vec<Employee>,
456    /// Generated cost center hierarchy (two-level: departments + sub-departments).
457    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
458    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
459    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
460}
461
462/// Info about a completed hypergraph export.
463#[derive(Debug, Clone)]
464pub struct HypergraphExportInfo {
465    /// Number of nodes exported.
466    pub node_count: usize,
467    /// Number of pairwise edges exported.
468    pub edge_count: usize,
469    /// Number of hyperedges exported.
470    pub hyperedge_count: usize,
471    /// Output directory path.
472    pub output_path: PathBuf,
473}
474
475/// Document flow snapshot containing all generated document chains.
476#[derive(Debug, Clone, Default)]
477pub struct DocumentFlowSnapshot {
478    /// P2P document chains.
479    pub p2p_chains: Vec<P2PDocumentChain>,
480    /// O2C document chains.
481    pub o2c_chains: Vec<O2CDocumentChain>,
482    /// All purchase orders (flattened).
483    pub purchase_orders: Vec<documents::PurchaseOrder>,
484    /// All goods receipts (flattened).
485    pub goods_receipts: Vec<documents::GoodsReceipt>,
486    /// All vendor invoices (flattened).
487    pub vendor_invoices: Vec<documents::VendorInvoice>,
488    /// All sales orders (flattened).
489    pub sales_orders: Vec<documents::SalesOrder>,
490    /// All deliveries (flattened).
491    pub deliveries: Vec<documents::Delivery>,
492    /// All customer invoices (flattened).
493    pub customer_invoices: Vec<documents::CustomerInvoice>,
494    /// All payments (flattened).
495    pub payments: Vec<documents::Payment>,
496    /// Cross-document references collected from all document headers
497    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
498    pub document_references: Vec<documents::DocumentReference>,
499}
500
501/// Subledger snapshot containing generated subledger records.
502#[derive(Debug, Clone, Default)]
503pub struct SubledgerSnapshot {
504    /// AP invoices linked from document flow vendor invoices.
505    pub ap_invoices: Vec<APInvoice>,
506    /// AR invoices linked from document flow customer invoices.
507    pub ar_invoices: Vec<ARInvoice>,
508    /// FA subledger records (asset acquisitions from FA generator).
509    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
510    /// Inventory positions from inventory generator.
511    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
512    /// Inventory movements from inventory generator.
513    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
514    /// AR aging reports, one per company, computed after payment settlement.
515    pub ar_aging_reports: Vec<ARAgingReport>,
516    /// AP aging reports, one per company, computed after payment settlement.
517    pub ap_aging_reports: Vec<APAgingReport>,
518    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
519    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
520    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
521    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
522    /// Dunning runs executed after AR aging (one per company per dunning cycle).
523    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
524    /// Dunning letters generated across all dunning runs.
525    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
526}
527
528/// OCPM snapshot containing generated OCPM event log data.
529#[derive(Debug, Clone, Default)]
530pub struct OcpmSnapshot {
531    /// OCPM event log (if generated)
532    pub event_log: Option<OcpmEventLog>,
533    /// Number of events generated
534    pub event_count: usize,
535    /// Number of objects generated
536    pub object_count: usize,
537    /// Number of cases generated
538    pub case_count: usize,
539}
540
541/// Audit data snapshot containing all generated audit-related entities.
542#[derive(Debug, Clone, Default)]
543pub struct AuditSnapshot {
544    /// Audit engagements per ISA 210/220.
545    pub engagements: Vec<AuditEngagement>,
546    /// Workpapers per ISA 230.
547    pub workpapers: Vec<Workpaper>,
548    /// Audit evidence per ISA 500.
549    pub evidence: Vec<AuditEvidence>,
550    /// Risk assessments per ISA 315/330.
551    pub risk_assessments: Vec<RiskAssessment>,
552    /// Audit findings per ISA 265.
553    pub findings: Vec<AuditFinding>,
554    /// Professional judgments per ISA 200.
555    pub judgments: Vec<ProfessionalJudgment>,
556    /// External confirmations per ISA 505.
557    pub confirmations: Vec<ExternalConfirmation>,
558    /// Confirmation responses per ISA 505.
559    pub confirmation_responses: Vec<ConfirmationResponse>,
560    /// Audit procedure steps per ISA 330/530.
561    pub procedure_steps: Vec<AuditProcedureStep>,
562    /// Audit samples per ISA 530.
563    pub samples: Vec<AuditSample>,
564    /// Analytical procedure results per ISA 520.
565    pub analytical_results: Vec<AnalyticalProcedureResult>,
566    /// Internal audit functions per ISA 610.
567    pub ia_functions: Vec<InternalAuditFunction>,
568    /// Internal audit reports per ISA 610.
569    pub ia_reports: Vec<InternalAuditReport>,
570    /// Related parties per ISA 550.
571    pub related_parties: Vec<RelatedParty>,
572    /// Related party transactions per ISA 550.
573    pub related_party_transactions: Vec<RelatedPartyTransaction>,
574    // ---- ISA 600: Group Audits ----
575    /// Component auditors assigned by jurisdiction (ISA 600).
576    pub component_auditors: Vec<ComponentAuditor>,
577    /// Group audit plan with materiality allocations (ISA 600).
578    pub group_audit_plan: Option<GroupAuditPlan>,
579    /// Component instructions issued to component auditors (ISA 600).
580    pub component_instructions: Vec<ComponentInstruction>,
581    /// Reports received from component auditors (ISA 600).
582    pub component_reports: Vec<ComponentAuditorReport>,
583    // ---- ISA 210: Engagement Letters ----
584    /// Engagement letters per ISA 210.
585    pub engagement_letters: Vec<EngagementLetter>,
586    // ---- ISA 560 / IAS 10: Subsequent Events ----
587    /// Subsequent events per ISA 560 / IAS 10.
588    pub subsequent_events: Vec<SubsequentEvent>,
589    // ---- ISA 402: Service Organization Controls ----
590    /// Service organizations identified per ISA 402.
591    pub service_organizations: Vec<ServiceOrganization>,
592    /// SOC reports obtained per ISA 402.
593    pub soc_reports: Vec<SocReport>,
594    /// User entity controls documented per ISA 402.
595    pub user_entity_controls: Vec<UserEntityControl>,
596    // ---- ISA 570: Going Concern ----
597    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
598    pub going_concern_assessments:
599        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
600    // ---- ISA 540: Accounting Estimates ----
601    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
602    pub accounting_estimates:
603        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
604    // ---- ISA 700/701/705/706: Audit Opinions ----
605    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
606    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
607    /// Key Audit Matters per ISA 701 (flattened across all opinions).
608    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
609    // ---- SOX 302 / 404 ----
610    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
611    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
612    /// SOX Section 404 ICFR assessments (one per entity per year).
613    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
614    // ---- ISA 320: Materiality ----
615    /// Materiality calculations per entity per period (ISA 320).
616    pub materiality_calculations:
617        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
618    // ---- ISA 315: Combined Risk Assessments ----
619    /// Combined Risk Assessments per account area / assertion (ISA 315).
620    pub combined_risk_assessments:
621        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
622    // ---- ISA 530: Sampling Plans ----
623    /// Sampling plans per CRA at Moderate or higher (ISA 530).
624    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
625    /// Individual sampled items (key items + representative items) per ISA 530.
626    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
627    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
628    /// Significant classes of transactions per ISA 315 (one set per entity).
629    pub significant_transaction_classes:
630        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
631    // ---- ISA 520: Unusual Item Markers ----
632    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
633    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
634    // ---- ISA 520: Analytical Relationships ----
635    /// Analytical relationships (ratios, trends, correlations) per entity.
636    pub analytical_relationships:
637        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
638    // ---- PCAOB-ISA Cross-Reference ----
639    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
640    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
641    // ---- ISA Standard Reference ----
642    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
643    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
644    // ---- ISA 220 / ISA 300: Audit Scopes ----
645    /// Audit scope records (one per engagement) describing the audit boundary.
646    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
647    // ---- FSM Event Trail ----
648    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
649    /// Contains the ordered sequence of state-transition and procedure-step events
650    /// generated by the audit FSM engine.
651    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
652}
653
654/// Banking KYC/AML data snapshot containing all generated banking entities.
655#[derive(Debug, Clone, Default)]
656pub struct BankingSnapshot {
657    /// Banking customers (retail, business, trust).
658    pub customers: Vec<BankingCustomer>,
659    /// Bank accounts.
660    pub accounts: Vec<BankAccount>,
661    /// Bank transactions with AML labels.
662    pub transactions: Vec<BankTransaction>,
663    /// Transaction-level AML labels with features.
664    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
665    /// Customer-level AML labels.
666    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
667    /// Account-level AML labels.
668    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
669    /// Relationship-level AML labels.
670    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
671    /// Case narratives for AML scenarios.
672    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
673    /// Number of suspicious transactions.
674    pub suspicious_count: usize,
675    /// Number of AML scenarios generated.
676    pub scenario_count: usize,
677}
678
679/// Graph export snapshot containing exported graph metadata.
680#[derive(Debug, Clone, Default, Serialize)]
681pub struct GraphExportSnapshot {
682    /// Whether graph export was performed.
683    pub exported: bool,
684    /// Number of graphs exported.
685    pub graph_count: usize,
686    /// Exported graph metadata (by format name).
687    pub exports: HashMap<String, GraphExportInfo>,
688}
689
690/// Information about an exported graph.
691#[derive(Debug, Clone, Serialize)]
692pub struct GraphExportInfo {
693    /// Graph name.
694    pub name: String,
695    /// Export format (pytorch_geometric, neo4j, dgl).
696    pub format: String,
697    /// Output directory path.
698    pub output_path: PathBuf,
699    /// Number of nodes.
700    pub node_count: usize,
701    /// Number of edges.
702    pub edge_count: usize,
703}
704
705/// S2C sourcing data snapshot.
706#[derive(Debug, Clone, Default)]
707pub struct SourcingSnapshot {
708    /// Spend analyses.
709    pub spend_analyses: Vec<SpendAnalysis>,
710    /// Sourcing projects.
711    pub sourcing_projects: Vec<SourcingProject>,
712    /// Supplier qualifications.
713    pub qualifications: Vec<SupplierQualification>,
714    /// RFx events (RFI, RFP, RFQ).
715    pub rfx_events: Vec<RfxEvent>,
716    /// Supplier bids.
717    pub bids: Vec<SupplierBid>,
718    /// Bid evaluations.
719    pub bid_evaluations: Vec<BidEvaluation>,
720    /// Procurement contracts.
721    pub contracts: Vec<ProcurementContract>,
722    /// Catalog items.
723    pub catalog_items: Vec<CatalogItem>,
724    /// Supplier scorecards.
725    pub scorecards: Vec<SupplierScorecard>,
726}
727
728/// A single period's trial balance with metadata.
729#[derive(Debug, Clone, Serialize, Deserialize)]
730pub struct PeriodTrialBalance {
731    /// Fiscal year.
732    pub fiscal_year: u16,
733    /// Fiscal period (1-12).
734    pub fiscal_period: u8,
735    /// Period start date.
736    pub period_start: NaiveDate,
737    /// Period end date.
738    pub period_end: NaiveDate,
739    /// Trial balance entries for this period.
740    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
741}
742
743/// Financial reporting snapshot (financial statements + bank reconciliations).
744#[derive(Debug, Clone, Default)]
745pub struct FinancialReportingSnapshot {
746    /// Financial statements (balance sheet, income statement, cash flow).
747    /// For multi-entity configs this includes all standalone statements.
748    pub financial_statements: Vec<FinancialStatement>,
749    /// Standalone financial statements keyed by entity code.
750    /// Each entity has its own slice of statements.
751    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
752    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
753    pub consolidated_statements: Vec<FinancialStatement>,
754    /// Consolidation schedules (one per period) showing pre/post elimination detail.
755    pub consolidation_schedules: Vec<ConsolidationSchedule>,
756    /// Bank reconciliations.
757    pub bank_reconciliations: Vec<BankReconciliation>,
758    /// Period-close trial balances (one per period).
759    pub trial_balances: Vec<PeriodTrialBalance>,
760    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
761    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
762    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
763    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
764    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
765    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
766}
767
768/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
769#[derive(Debug, Clone, Default)]
770pub struct HrSnapshot {
771    /// Payroll runs (actual data).
772    pub payroll_runs: Vec<PayrollRun>,
773    /// Payroll line items (actual data).
774    pub payroll_line_items: Vec<PayrollLineItem>,
775    /// Time entries (actual data).
776    pub time_entries: Vec<TimeEntry>,
777    /// Expense reports (actual data).
778    pub expense_reports: Vec<ExpenseReport>,
779    /// Benefit enrollments (actual data).
780    pub benefit_enrollments: Vec<BenefitEnrollment>,
781    /// Defined benefit pension plans (IAS 19 / ASC 715).
782    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
783    /// Pension obligation (DBO) roll-forwards.
784    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
785    /// Plan asset roll-forwards.
786    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
787    /// Pension disclosures.
788    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
789    /// Journal entries generated from pension expense and OCI remeasurements.
790    pub pension_journal_entries: Vec<JournalEntry>,
791    /// Stock grants (ASC 718 / IFRS 2).
792    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
793    /// Stock-based compensation period expense records.
794    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
795    /// Journal entries generated from stock-based compensation expense.
796    pub stock_comp_journal_entries: Vec<JournalEntry>,
797    /// Payroll runs.
798    pub payroll_run_count: usize,
799    /// Payroll line item count.
800    pub payroll_line_item_count: usize,
801    /// Time entry count.
802    pub time_entry_count: usize,
803    /// Expense report count.
804    pub expense_report_count: usize,
805    /// Benefit enrollment count.
806    pub benefit_enrollment_count: usize,
807    /// Pension plan count.
808    pub pension_plan_count: usize,
809    /// Stock grant count.
810    pub stock_grant_count: usize,
811}
812
813/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
814#[derive(Debug, Clone, Default)]
815pub struct AccountingStandardsSnapshot {
816    /// Revenue recognition contracts (actual data).
817    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
818    /// Impairment tests (actual data).
819    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
820    /// Business combinations (IFRS 3 / ASC 805).
821    pub business_combinations:
822        Vec<datasynth_core::models::business_combination::BusinessCombination>,
823    /// Journal entries generated from business combinations (Day 1 + amortization).
824    pub business_combination_journal_entries: Vec<JournalEntry>,
825    /// ECL models (IFRS 9 / ASC 326).
826    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
827    /// ECL provision movements.
828    pub ecl_provision_movements:
829        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
830    /// Journal entries from ECL provision.
831    pub ecl_journal_entries: Vec<JournalEntry>,
832    /// Provisions (IAS 37 / ASC 450).
833    pub provisions: Vec<datasynth_core::models::provision::Provision>,
834    /// Provision movement roll-forwards (IAS 37 / ASC 450).
835    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
836    /// Contingent liabilities (IAS 37 / ASC 450).
837    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
838    /// Journal entries from provisions.
839    pub provision_journal_entries: Vec<JournalEntry>,
840    /// IAS 21 functional currency translation results (one per entity per period).
841    pub currency_translation_results:
842        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
843    /// Revenue recognition contract count.
844    pub revenue_contract_count: usize,
845    /// Impairment test count.
846    pub impairment_test_count: usize,
847    /// Business combination count.
848    pub business_combination_count: usize,
849    /// ECL model count.
850    pub ecl_model_count: usize,
851    /// Provision count.
852    pub provision_count: usize,
853    /// Currency translation result count (IAS 21).
854    pub currency_translation_count: usize,
855}
856
857/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
858#[derive(Debug, Clone, Default)]
859pub struct ComplianceRegulationsSnapshot {
860    /// Flattened standard records for output.
861    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
862    /// Cross-reference records.
863    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
864    /// Jurisdiction profile records.
865    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
866    /// Generated audit procedures.
867    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
868    /// Generated compliance findings.
869    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
870    /// Generated regulatory filings.
871    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
872    /// Compliance graph (if graph integration enabled).
873    pub compliance_graph: Option<datasynth_graph::Graph>,
874}
875
876/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
877#[derive(Debug, Clone, Default)]
878pub struct ManufacturingSnapshot {
879    /// Production orders (actual data).
880    pub production_orders: Vec<ProductionOrder>,
881    /// Quality inspections (actual data).
882    pub quality_inspections: Vec<QualityInspection>,
883    /// Cycle counts (actual data).
884    pub cycle_counts: Vec<CycleCount>,
885    /// BOM components (actual data).
886    pub bom_components: Vec<BomComponent>,
887    /// Inventory movements (actual data).
888    pub inventory_movements: Vec<InventoryMovement>,
889    /// Production order count.
890    pub production_order_count: usize,
891    /// Quality inspection count.
892    pub quality_inspection_count: usize,
893    /// Cycle count count.
894    pub cycle_count_count: usize,
895    /// BOM component count.
896    pub bom_component_count: usize,
897    /// Inventory movement count.
898    pub inventory_movement_count: usize,
899}
900
901/// Sales, KPI, and budget data snapshot.
902#[derive(Debug, Clone, Default)]
903pub struct SalesKpiBudgetsSnapshot {
904    /// Sales quotes (actual data).
905    pub sales_quotes: Vec<SalesQuote>,
906    /// Management KPIs (actual data).
907    pub kpis: Vec<ManagementKpi>,
908    /// Budgets (actual data).
909    pub budgets: Vec<Budget>,
910    /// Sales quote count.
911    pub sales_quote_count: usize,
912    /// Management KPI count.
913    pub kpi_count: usize,
914    /// Budget line count.
915    pub budget_line_count: usize,
916}
917
918/// Anomaly labels generated during injection.
919#[derive(Debug, Clone, Default)]
920pub struct AnomalyLabels {
921    /// All anomaly labels.
922    pub labels: Vec<LabeledAnomaly>,
923    /// Summary statistics.
924    pub summary: Option<AnomalySummary>,
925    /// Count by anomaly type.
926    pub by_type: HashMap<String, usize>,
927}
928
929/// Balance validation results from running balance tracker.
930#[derive(Debug, Clone, Default)]
931pub struct BalanceValidationResult {
932    /// Whether validation was performed.
933    pub validated: bool,
934    /// Whether balance sheet equation is satisfied.
935    pub is_balanced: bool,
936    /// Number of entries processed.
937    pub entries_processed: u64,
938    /// Total debits across all entries.
939    pub total_debits: rust_decimal::Decimal,
940    /// Total credits across all entries.
941    pub total_credits: rust_decimal::Decimal,
942    /// Number of accounts tracked.
943    pub accounts_tracked: usize,
944    /// Number of companies tracked.
945    pub companies_tracked: usize,
946    /// Validation errors encountered.
947    pub validation_errors: Vec<ValidationError>,
948    /// Whether any unbalanced entries were found.
949    pub has_unbalanced_entries: bool,
950}
951
952/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
953#[derive(Debug, Clone, Default)]
954pub struct TaxSnapshot {
955    /// Tax jurisdictions.
956    pub jurisdictions: Vec<TaxJurisdiction>,
957    /// Tax codes.
958    pub codes: Vec<TaxCode>,
959    /// Tax lines computed on documents.
960    pub tax_lines: Vec<TaxLine>,
961    /// Tax returns filed per period.
962    pub tax_returns: Vec<TaxReturn>,
963    /// Tax provisions.
964    pub tax_provisions: Vec<TaxProvision>,
965    /// Withholding tax records.
966    pub withholding_records: Vec<WithholdingTaxRecord>,
967    /// Tax anomaly labels.
968    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
969    /// Jurisdiction count.
970    pub jurisdiction_count: usize,
971    /// Code count.
972    pub code_count: usize,
973    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
974    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
975    /// Journal entries posting tax payable/receivable from computed tax lines.
976    pub tax_posting_journal_entries: Vec<JournalEntry>,
977}
978
979/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
980#[derive(Debug, Clone, Default, Serialize, Deserialize)]
981pub struct IntercompanySnapshot {
982    /// Group ownership structure (parent/subsidiary/associate relationships).
983    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
984    /// IC matched pairs (transaction pairs between related entities).
985    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
986    /// IC journal entries generated from matched pairs (seller side).
987    pub seller_journal_entries: Vec<JournalEntry>,
988    /// IC journal entries generated from matched pairs (buyer side).
989    pub buyer_journal_entries: Vec<JournalEntry>,
990    /// Elimination entries for consolidation.
991    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
992    /// NCI measurements derived from group structure ownership percentages.
993    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
994    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
995    #[serde(skip)]
996    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
997    /// IC matched pair count.
998    pub matched_pair_count: usize,
999    /// IC elimination entry count.
1000    pub elimination_entry_count: usize,
1001    /// IC matching rate (0.0 to 1.0).
1002    pub match_rate: f64,
1003}
1004
1005/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1006#[derive(Debug, Clone, Default)]
1007pub struct EsgSnapshot {
1008    /// Emission records (scope 1, 2, 3).
1009    pub emissions: Vec<EmissionRecord>,
1010    /// Energy consumption records.
1011    pub energy: Vec<EnergyConsumption>,
1012    /// Water usage records.
1013    pub water: Vec<WaterUsage>,
1014    /// Waste records.
1015    pub waste: Vec<WasteRecord>,
1016    /// Workforce diversity metrics.
1017    pub diversity: Vec<WorkforceDiversityMetric>,
1018    /// Pay equity metrics.
1019    pub pay_equity: Vec<PayEquityMetric>,
1020    /// Safety incidents.
1021    pub safety_incidents: Vec<SafetyIncident>,
1022    /// Safety metrics.
1023    pub safety_metrics: Vec<SafetyMetric>,
1024    /// Governance metrics.
1025    pub governance: Vec<GovernanceMetric>,
1026    /// Supplier ESG assessments.
1027    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1028    /// Materiality assessments.
1029    pub materiality: Vec<MaterialityAssessment>,
1030    /// ESG disclosures.
1031    pub disclosures: Vec<EsgDisclosure>,
1032    /// Climate scenarios.
1033    pub climate_scenarios: Vec<ClimateScenario>,
1034    /// ESG anomaly labels.
1035    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1036    /// Total emission record count.
1037    pub emission_count: usize,
1038    /// Total disclosure count.
1039    pub disclosure_count: usize,
1040}
1041
1042/// Treasury data snapshot (cash management, hedging, debt, pooling).
1043#[derive(Debug, Clone, Default)]
1044pub struct TreasurySnapshot {
1045    /// Cash positions (daily balances per account).
1046    pub cash_positions: Vec<CashPosition>,
1047    /// Cash forecasts.
1048    pub cash_forecasts: Vec<CashForecast>,
1049    /// Cash pools.
1050    pub cash_pools: Vec<CashPool>,
1051    /// Cash pool sweep transactions.
1052    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1053    /// Hedging instruments.
1054    pub hedging_instruments: Vec<HedgingInstrument>,
1055    /// Hedge relationships (ASC 815/IFRS 9 designations).
1056    pub hedge_relationships: Vec<HedgeRelationship>,
1057    /// Debt instruments.
1058    pub debt_instruments: Vec<DebtInstrument>,
1059    /// Bank guarantees and letters of credit.
1060    pub bank_guarantees: Vec<BankGuarantee>,
1061    /// Intercompany netting runs.
1062    pub netting_runs: Vec<NettingRun>,
1063    /// Treasury anomaly labels.
1064    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1065    /// Journal entries generated from treasury instruments (debt interest accruals,
1066    /// hedge MTM, cash pool sweeps).
1067    pub journal_entries: Vec<JournalEntry>,
1068}
1069
1070/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1071#[derive(Debug, Clone, Default)]
1072pub struct ProjectAccountingSnapshot {
1073    /// Projects with WBS hierarchies.
1074    pub projects: Vec<Project>,
1075    /// Project cost lines (linked from source documents).
1076    pub cost_lines: Vec<ProjectCostLine>,
1077    /// Revenue recognition records.
1078    pub revenue_records: Vec<ProjectRevenue>,
1079    /// Earned value metrics.
1080    pub earned_value_metrics: Vec<EarnedValueMetric>,
1081    /// Change orders.
1082    pub change_orders: Vec<ChangeOrder>,
1083    /// Project milestones.
1084    pub milestones: Vec<ProjectMilestone>,
1085}
1086
1087/// Complete result of enhanced generation run.
1088#[derive(Debug, Default)]
1089pub struct EnhancedGenerationResult {
1090    /// Generated chart of accounts.
1091    pub chart_of_accounts: ChartOfAccounts,
1092    /// Master data snapshot.
1093    pub master_data: MasterDataSnapshot,
1094    /// Document flow snapshot.
1095    pub document_flows: DocumentFlowSnapshot,
1096    /// Subledger snapshot (linked from document flows).
1097    pub subledger: SubledgerSnapshot,
1098    /// OCPM event log snapshot (if OCPM generation enabled).
1099    pub ocpm: OcpmSnapshot,
1100    /// Audit data snapshot (if audit generation enabled).
1101    pub audit: AuditSnapshot,
1102    /// Banking KYC/AML data snapshot (if banking generation enabled).
1103    pub banking: BankingSnapshot,
1104    /// Graph export snapshot (if graph export enabled).
1105    pub graph_export: GraphExportSnapshot,
1106    /// S2C sourcing data snapshot (if sourcing generation enabled).
1107    pub sourcing: SourcingSnapshot,
1108    /// Financial reporting snapshot (financial statements + bank reconciliations).
1109    pub financial_reporting: FinancialReportingSnapshot,
1110    /// HR data snapshot (payroll, time entries, expenses).
1111    pub hr: HrSnapshot,
1112    /// Accounting standards snapshot (revenue recognition, impairment).
1113    pub accounting_standards: AccountingStandardsSnapshot,
1114    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1115    pub manufacturing: ManufacturingSnapshot,
1116    /// Sales, KPI, and budget snapshot.
1117    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1118    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1119    pub tax: TaxSnapshot,
1120    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1121    pub esg: EsgSnapshot,
1122    /// Treasury data snapshot (cash management, hedging, debt).
1123    pub treasury: TreasurySnapshot,
1124    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1125    pub project_accounting: ProjectAccountingSnapshot,
1126    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1127    pub process_evolution: Vec<ProcessEvolutionEvent>,
1128    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1129    pub organizational_events: Vec<OrganizationalEvent>,
1130    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1131    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1132    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1133    pub intercompany: IntercompanySnapshot,
1134    /// Generated journal entries.
1135    pub journal_entries: Vec<JournalEntry>,
1136    /// Anomaly labels (if injection enabled).
1137    pub anomaly_labels: AnomalyLabels,
1138    /// Balance validation results (if validation enabled).
1139    pub balance_validation: BalanceValidationResult,
1140    /// Data quality statistics (if injection enabled).
1141    pub data_quality_stats: DataQualityStats,
1142    /// Data quality issue records (if injection enabled).
1143    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1144    /// Generation statistics.
1145    pub statistics: EnhancedGenerationStatistics,
1146    /// Data lineage graph (if tracking enabled).
1147    pub lineage: Option<super::lineage::LineageGraph>,
1148    /// Quality gate evaluation result.
1149    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1150    /// Internal controls (if controls generation enabled).
1151    pub internal_controls: Vec<InternalControl>,
1152    /// SoD (Segregation of Duties) violations identified during control application.
1153    ///
1154    /// Each record corresponds to a journal entry where `sod_violation == true`.
1155    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1156    /// Opening balances (if opening balance generation enabled).
1157    pub opening_balances: Vec<GeneratedOpeningBalance>,
1158    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1159    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1160    /// Counterfactual (original, mutated) JE pairs for ML training.
1161    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1162    /// Fraud red-flag indicators on P2P/O2C documents.
1163    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1164    /// Collusion rings (coordinated fraud networks).
1165    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1166    /// Bi-temporal version chains for vendor entities.
1167    pub temporal_vendor_chains:
1168        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1169    /// Entity relationship graph (nodes + edges with strength scores).
1170    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1171    /// Cross-process links (P2P ↔ O2C via inventory movements).
1172    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1173    /// Industry-specific GL accounts and metadata.
1174    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1175    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1176    pub compliance_regulations: ComplianceRegulationsSnapshot,
1177}
1178
1179/// Enhanced statistics about a generation run.
1180#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1181pub struct EnhancedGenerationStatistics {
1182    /// Total journal entries generated.
1183    pub total_entries: u64,
1184    /// Total line items generated.
1185    pub total_line_items: u64,
1186    /// Number of accounts in CoA.
1187    pub accounts_count: usize,
1188    /// Number of companies.
1189    pub companies_count: usize,
1190    /// Period in months.
1191    pub period_months: u32,
1192    /// Master data counts.
1193    pub vendor_count: usize,
1194    pub customer_count: usize,
1195    pub material_count: usize,
1196    pub asset_count: usize,
1197    pub employee_count: usize,
1198    /// Document flow counts.
1199    pub p2p_chain_count: usize,
1200    pub o2c_chain_count: usize,
1201    /// Subledger counts.
1202    pub ap_invoice_count: usize,
1203    pub ar_invoice_count: usize,
1204    /// OCPM counts.
1205    pub ocpm_event_count: usize,
1206    pub ocpm_object_count: usize,
1207    pub ocpm_case_count: usize,
1208    /// Audit counts.
1209    pub audit_engagement_count: usize,
1210    pub audit_workpaper_count: usize,
1211    pub audit_evidence_count: usize,
1212    pub audit_risk_count: usize,
1213    pub audit_finding_count: usize,
1214    pub audit_judgment_count: usize,
1215    /// ISA 505 confirmation counts.
1216    #[serde(default)]
1217    pub audit_confirmation_count: usize,
1218    #[serde(default)]
1219    pub audit_confirmation_response_count: usize,
1220    /// ISA 330/530 procedure step and sample counts.
1221    #[serde(default)]
1222    pub audit_procedure_step_count: usize,
1223    #[serde(default)]
1224    pub audit_sample_count: usize,
1225    /// ISA 520 analytical procedure counts.
1226    #[serde(default)]
1227    pub audit_analytical_result_count: usize,
1228    /// ISA 610 internal audit counts.
1229    #[serde(default)]
1230    pub audit_ia_function_count: usize,
1231    #[serde(default)]
1232    pub audit_ia_report_count: usize,
1233    /// ISA 550 related party counts.
1234    #[serde(default)]
1235    pub audit_related_party_count: usize,
1236    #[serde(default)]
1237    pub audit_related_party_transaction_count: usize,
1238    /// Anomaly counts.
1239    pub anomalies_injected: usize,
1240    /// Data quality issue counts.
1241    pub data_quality_issues: usize,
1242    /// Banking counts.
1243    pub banking_customer_count: usize,
1244    pub banking_account_count: usize,
1245    pub banking_transaction_count: usize,
1246    pub banking_suspicious_count: usize,
1247    /// Graph export counts.
1248    pub graph_export_count: usize,
1249    pub graph_node_count: usize,
1250    pub graph_edge_count: usize,
1251    /// LLM enrichment timing (milliseconds).
1252    #[serde(default)]
1253    pub llm_enrichment_ms: u64,
1254    /// Number of vendor names enriched by LLM.
1255    #[serde(default)]
1256    pub llm_vendors_enriched: usize,
1257    /// Diffusion enhancement timing (milliseconds).
1258    #[serde(default)]
1259    pub diffusion_enhancement_ms: u64,
1260    /// Number of diffusion samples generated.
1261    #[serde(default)]
1262    pub diffusion_samples_generated: usize,
1263    /// Causal generation timing (milliseconds).
1264    #[serde(default)]
1265    pub causal_generation_ms: u64,
1266    /// Number of causal samples generated.
1267    #[serde(default)]
1268    pub causal_samples_generated: usize,
1269    /// Whether causal validation passed.
1270    #[serde(default)]
1271    pub causal_validation_passed: Option<bool>,
1272    /// S2C sourcing counts.
1273    #[serde(default)]
1274    pub sourcing_project_count: usize,
1275    #[serde(default)]
1276    pub rfx_event_count: usize,
1277    #[serde(default)]
1278    pub bid_count: usize,
1279    #[serde(default)]
1280    pub contract_count: usize,
1281    #[serde(default)]
1282    pub catalog_item_count: usize,
1283    #[serde(default)]
1284    pub scorecard_count: usize,
1285    /// Financial reporting counts.
1286    #[serde(default)]
1287    pub financial_statement_count: usize,
1288    #[serde(default)]
1289    pub bank_reconciliation_count: usize,
1290    /// HR counts.
1291    #[serde(default)]
1292    pub payroll_run_count: usize,
1293    #[serde(default)]
1294    pub time_entry_count: usize,
1295    #[serde(default)]
1296    pub expense_report_count: usize,
1297    #[serde(default)]
1298    pub benefit_enrollment_count: usize,
1299    #[serde(default)]
1300    pub pension_plan_count: usize,
1301    #[serde(default)]
1302    pub stock_grant_count: usize,
1303    /// Accounting standards counts.
1304    #[serde(default)]
1305    pub revenue_contract_count: usize,
1306    #[serde(default)]
1307    pub impairment_test_count: usize,
1308    #[serde(default)]
1309    pub business_combination_count: usize,
1310    #[serde(default)]
1311    pub ecl_model_count: usize,
1312    #[serde(default)]
1313    pub provision_count: usize,
1314    /// Manufacturing counts.
1315    #[serde(default)]
1316    pub production_order_count: usize,
1317    #[serde(default)]
1318    pub quality_inspection_count: usize,
1319    #[serde(default)]
1320    pub cycle_count_count: usize,
1321    #[serde(default)]
1322    pub bom_component_count: usize,
1323    #[serde(default)]
1324    pub inventory_movement_count: usize,
1325    /// Sales & reporting counts.
1326    #[serde(default)]
1327    pub sales_quote_count: usize,
1328    #[serde(default)]
1329    pub kpi_count: usize,
1330    #[serde(default)]
1331    pub budget_line_count: usize,
1332    /// Tax counts.
1333    #[serde(default)]
1334    pub tax_jurisdiction_count: usize,
1335    #[serde(default)]
1336    pub tax_code_count: usize,
1337    /// ESG counts.
1338    #[serde(default)]
1339    pub esg_emission_count: usize,
1340    #[serde(default)]
1341    pub esg_disclosure_count: usize,
1342    /// Intercompany counts.
1343    #[serde(default)]
1344    pub ic_matched_pair_count: usize,
1345    #[serde(default)]
1346    pub ic_elimination_count: usize,
1347    /// Number of intercompany journal entries (seller + buyer side).
1348    #[serde(default)]
1349    pub ic_transaction_count: usize,
1350    /// Number of fixed asset subledger records.
1351    #[serde(default)]
1352    pub fa_subledger_count: usize,
1353    /// Number of inventory subledger records.
1354    #[serde(default)]
1355    pub inventory_subledger_count: usize,
1356    /// Treasury debt instrument count.
1357    #[serde(default)]
1358    pub treasury_debt_instrument_count: usize,
1359    /// Treasury hedging instrument count.
1360    #[serde(default)]
1361    pub treasury_hedging_instrument_count: usize,
1362    /// Project accounting project count.
1363    #[serde(default)]
1364    pub project_count: usize,
1365    /// Project accounting change order count.
1366    #[serde(default)]
1367    pub project_change_order_count: usize,
1368    /// Tax provision count.
1369    #[serde(default)]
1370    pub tax_provision_count: usize,
1371    /// Opening balance count.
1372    #[serde(default)]
1373    pub opening_balance_count: usize,
1374    /// Subledger reconciliation count.
1375    #[serde(default)]
1376    pub subledger_reconciliation_count: usize,
1377    /// Tax line count.
1378    #[serde(default)]
1379    pub tax_line_count: usize,
1380    /// Project cost line count.
1381    #[serde(default)]
1382    pub project_cost_line_count: usize,
1383    /// Cash position count.
1384    #[serde(default)]
1385    pub cash_position_count: usize,
1386    /// Cash forecast count.
1387    #[serde(default)]
1388    pub cash_forecast_count: usize,
1389    /// Cash pool count.
1390    #[serde(default)]
1391    pub cash_pool_count: usize,
1392    /// Process evolution event count.
1393    #[serde(default)]
1394    pub process_evolution_event_count: usize,
1395    /// Organizational event count.
1396    #[serde(default)]
1397    pub organizational_event_count: usize,
1398    /// Counterfactual pair count.
1399    #[serde(default)]
1400    pub counterfactual_pair_count: usize,
1401    /// Number of fraud red-flag indicators generated.
1402    #[serde(default)]
1403    pub red_flag_count: usize,
1404    /// Number of collusion rings generated.
1405    #[serde(default)]
1406    pub collusion_ring_count: usize,
1407    /// Number of bi-temporal vendor version chains generated.
1408    #[serde(default)]
1409    pub temporal_version_chain_count: usize,
1410    /// Number of nodes in the entity relationship graph.
1411    #[serde(default)]
1412    pub entity_relationship_node_count: usize,
1413    /// Number of edges in the entity relationship graph.
1414    #[serde(default)]
1415    pub entity_relationship_edge_count: usize,
1416    /// Number of cross-process links generated.
1417    #[serde(default)]
1418    pub cross_process_link_count: usize,
1419    /// Number of disruption events generated.
1420    #[serde(default)]
1421    pub disruption_event_count: usize,
1422    /// Number of industry-specific GL accounts generated.
1423    #[serde(default)]
1424    pub industry_gl_account_count: usize,
1425    /// Number of period-close journal entries generated (tax provision + closing entries).
1426    #[serde(default)]
1427    pub period_close_je_count: usize,
1428}
1429
1430/// Enhanced orchestrator with full feature integration.
1431pub struct EnhancedOrchestrator {
1432    config: GeneratorConfig,
1433    phase_config: PhaseConfig,
1434    coa: Option<Arc<ChartOfAccounts>>,
1435    master_data: MasterDataSnapshot,
1436    seed: u64,
1437    multi_progress: Option<MultiProgress>,
1438    /// Resource guard for memory, disk, and CPU monitoring
1439    resource_guard: ResourceGuard,
1440    /// Output path for disk space monitoring
1441    output_path: Option<PathBuf>,
1442    /// Copula generators for preserving correlations (from fingerprint)
1443    copula_generators: Vec<CopulaGeneratorSpec>,
1444    /// Country pack registry for localized data generation
1445    country_pack_registry: datasynth_core::CountryPackRegistry,
1446    /// Optional streaming sink for phase-by-phase output
1447    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1448}
1449
1450impl EnhancedOrchestrator {
1451    /// Create a new enhanced orchestrator.
1452    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1453        datasynth_config::validate_config(&config)?;
1454
1455        let seed = config.global.seed.unwrap_or_else(rand::random);
1456
1457        // Build resource guard from config
1458        let resource_guard = Self::build_resource_guard(&config, None);
1459
1460        // Build country pack registry from config
1461        let country_pack_registry = match &config.country_packs {
1462            Some(cp) => {
1463                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1464                    .map_err(|e| SynthError::config(e.to_string()))?
1465            }
1466            None => datasynth_core::CountryPackRegistry::builtin_only()
1467                .map_err(|e| SynthError::config(e.to_string()))?,
1468        };
1469
1470        Ok(Self {
1471            config,
1472            phase_config,
1473            coa: None,
1474            master_data: MasterDataSnapshot::default(),
1475            seed,
1476            multi_progress: None,
1477            resource_guard,
1478            output_path: None,
1479            copula_generators: Vec::new(),
1480            country_pack_registry,
1481            phase_sink: None,
1482        })
1483    }
1484
1485    /// Create with default phase config.
1486    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1487        Self::new(config, PhaseConfig::default())
1488    }
1489
1490    /// Set a streaming phase sink for real-time output (builder pattern).
1491    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1492        self.phase_sink = Some(sink);
1493        self
1494    }
1495
1496    /// Set a streaming phase sink on an existing orchestrator.
1497    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1498        self.phase_sink = Some(sink);
1499    }
1500
1501    /// Emit a batch of items to the phase sink (if configured).
1502    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1503        if let Some(ref sink) = self.phase_sink {
1504            for item in items {
1505                if let Ok(value) = serde_json::to_value(item) {
1506                    if let Err(e) = sink.emit(phase, type_name, &value) {
1507                        warn!(
1508                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1509                        );
1510                    }
1511                }
1512            }
1513            if let Err(e) = sink.phase_complete(phase) {
1514                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1515            }
1516        }
1517    }
1518
1519    /// Enable/disable progress bars.
1520    pub fn with_progress(mut self, show: bool) -> Self {
1521        self.phase_config.show_progress = show;
1522        if show {
1523            self.multi_progress = Some(MultiProgress::new());
1524        }
1525        self
1526    }
1527
1528    /// Set the output path for disk space monitoring.
1529    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1530        let path = path.into();
1531        self.output_path = Some(path.clone());
1532        // Rebuild resource guard with the output path
1533        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1534        self
1535    }
1536
1537    /// Access the country pack registry.
1538    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1539        &self.country_pack_registry
1540    }
1541
1542    /// Look up a country pack by country code string.
1543    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1544        self.country_pack_registry.get_by_str(country)
1545    }
1546
1547    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1548    /// company, defaulting to `"US"` if no companies are configured.
1549    fn primary_country_code(&self) -> &str {
1550        self.config
1551            .companies
1552            .first()
1553            .map(|c| c.country.as_str())
1554            .unwrap_or("US")
1555    }
1556
1557    /// Resolve the country pack for the primary (first) company.
1558    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1559        self.country_pack_for(self.primary_country_code())
1560    }
1561
1562    /// Resolve the CoA framework from config/country-pack.
1563    fn resolve_coa_framework(&self) -> CoAFramework {
1564        if self.config.accounting_standards.enabled {
1565            match self.config.accounting_standards.framework {
1566                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1567                    return CoAFramework::FrenchPcg;
1568                }
1569                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1570                    return CoAFramework::GermanSkr04;
1571                }
1572                _ => {}
1573            }
1574        }
1575        // Fallback: derive from country pack
1576        let pack = self.primary_pack();
1577        match pack.accounting.framework.as_str() {
1578            "french_gaap" => CoAFramework::FrenchPcg,
1579            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1580            _ => CoAFramework::UsGaap,
1581        }
1582    }
1583
1584    /// Check if copula generators are available.
1585    ///
1586    /// Returns true if the orchestrator has copula generators for preserving
1587    /// correlations (typically from fingerprint-based generation).
1588    pub fn has_copulas(&self) -> bool {
1589        !self.copula_generators.is_empty()
1590    }
1591
1592    /// Get the copula generators.
1593    ///
1594    /// Returns a reference to the copula generators for use during generation.
1595    /// These can be used to generate correlated samples that preserve the
1596    /// statistical relationships from the source data.
1597    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1598        &self.copula_generators
1599    }
1600
1601    /// Get a mutable reference to the copula generators.
1602    ///
1603    /// Allows generators to sample from copulas during data generation.
1604    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1605        &mut self.copula_generators
1606    }
1607
1608    /// Sample correlated values from a named copula.
1609    ///
1610    /// Returns None if the copula doesn't exist.
1611    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1612        self.copula_generators
1613            .iter_mut()
1614            .find(|c| c.name == copula_name)
1615            .map(|c| c.generator.sample())
1616    }
1617
1618    /// Create an orchestrator from a fingerprint file.
1619    ///
1620    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1621    /// and creates an orchestrator configured to generate data matching
1622    /// the statistical properties of the original data.
1623    ///
1624    /// # Arguments
1625    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1626    /// * `phase_config` - Phase configuration for generation
1627    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1628    ///
1629    /// # Example
1630    /// ```no_run
1631    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1632    /// use std::path::Path;
1633    ///
1634    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1635    ///     Path::new("fingerprint.dsf"),
1636    ///     PhaseConfig::default(),
1637    ///     1.0,
1638    /// ).unwrap();
1639    /// ```
1640    pub fn from_fingerprint(
1641        fingerprint_path: &std::path::Path,
1642        phase_config: PhaseConfig,
1643        scale: f64,
1644    ) -> SynthResult<Self> {
1645        info!("Loading fingerprint from: {}", fingerprint_path.display());
1646
1647        // Read the fingerprint
1648        let reader = FingerprintReader::new();
1649        let fingerprint = reader
1650            .read_from_file(fingerprint_path)
1651            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
1652
1653        Self::from_fingerprint_data(fingerprint, phase_config, scale)
1654    }
1655
1656    /// Create an orchestrator from a loaded fingerprint.
1657    ///
1658    /// # Arguments
1659    /// * `fingerprint` - The loaded fingerprint
1660    /// * `phase_config` - Phase configuration for generation
1661    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1662    pub fn from_fingerprint_data(
1663        fingerprint: Fingerprint,
1664        phase_config: PhaseConfig,
1665        scale: f64,
1666    ) -> SynthResult<Self> {
1667        info!(
1668            "Synthesizing config from fingerprint (version: {}, tables: {})",
1669            fingerprint.manifest.version,
1670            fingerprint.schema.tables.len()
1671        );
1672
1673        // Generate a seed for the synthesis
1674        let seed: u64 = rand::random();
1675        info!("Fingerprint synthesis seed: {}", seed);
1676
1677        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
1678        let options = SynthesisOptions {
1679            scale,
1680            seed: Some(seed),
1681            preserve_correlations: true,
1682            inject_anomalies: true,
1683        };
1684        let synthesizer = ConfigSynthesizer::with_options(options);
1685
1686        // Synthesize full result including copula generators
1687        let synthesis_result = synthesizer
1688            .synthesize_full(&fingerprint, seed)
1689            .map_err(|e| {
1690                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
1691            })?;
1692
1693        // Start with a base config from the fingerprint's industry if available
1694        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
1695            Self::base_config_for_industry(industry)
1696        } else {
1697            Self::base_config_for_industry("manufacturing")
1698        };
1699
1700        // Apply the synthesized patches
1701        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
1702
1703        // Log synthesis results
1704        info!(
1705            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
1706            fingerprint.schema.tables.len(),
1707            scale,
1708            synthesis_result.copula_generators.len()
1709        );
1710
1711        if !synthesis_result.copula_generators.is_empty() {
1712            for spec in &synthesis_result.copula_generators {
1713                info!(
1714                    "  Copula '{}' for table '{}': {} columns",
1715                    spec.name,
1716                    spec.table,
1717                    spec.columns.len()
1718                );
1719            }
1720        }
1721
1722        // Create the orchestrator with the synthesized config
1723        let mut orchestrator = Self::new(config, phase_config)?;
1724
1725        // Store copula generators for use during generation
1726        orchestrator.copula_generators = synthesis_result.copula_generators;
1727
1728        Ok(orchestrator)
1729    }
1730
1731    /// Create a base config for a given industry.
1732    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
1733        use datasynth_config::presets::create_preset;
1734        use datasynth_config::TransactionVolume;
1735        use datasynth_core::models::{CoAComplexity, IndustrySector};
1736
1737        let sector = match industry.to_lowercase().as_str() {
1738            "manufacturing" => IndustrySector::Manufacturing,
1739            "retail" => IndustrySector::Retail,
1740            "financial" | "financial_services" => IndustrySector::FinancialServices,
1741            "healthcare" => IndustrySector::Healthcare,
1742            "technology" | "tech" => IndustrySector::Technology,
1743            _ => IndustrySector::Manufacturing,
1744        };
1745
1746        // Create a preset with reasonable defaults
1747        create_preset(
1748            sector,
1749            1,  // company count
1750            12, // period months
1751            CoAComplexity::Medium,
1752            TransactionVolume::TenK,
1753        )
1754    }
1755
1756    /// Apply a config patch to a GeneratorConfig.
1757    fn apply_config_patch(
1758        mut config: GeneratorConfig,
1759        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
1760    ) -> GeneratorConfig {
1761        use datasynth_fingerprint::synthesis::ConfigValue;
1762
1763        for (key, value) in patch.values() {
1764            match (key.as_str(), value) {
1765                // Transaction count is handled via TransactionVolume enum on companies
1766                // Log it but cannot directly set it (would need to modify company volumes)
1767                ("transactions.count", ConfigValue::Integer(n)) => {
1768                    info!(
1769                        "Fingerprint suggests {} transactions (apply via company volumes)",
1770                        n
1771                    );
1772                }
1773                ("global.period_months", ConfigValue::Integer(n)) => {
1774                    config.global.period_months = (*n).clamp(1, 120) as u32;
1775                }
1776                ("global.start_date", ConfigValue::String(s)) => {
1777                    config.global.start_date = s.clone();
1778                }
1779                ("global.seed", ConfigValue::Integer(n)) => {
1780                    config.global.seed = Some(*n as u64);
1781                }
1782                ("fraud.enabled", ConfigValue::Bool(b)) => {
1783                    config.fraud.enabled = *b;
1784                }
1785                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
1786                    config.fraud.fraud_rate = *f;
1787                }
1788                ("data_quality.enabled", ConfigValue::Bool(b)) => {
1789                    config.data_quality.enabled = *b;
1790                }
1791                // Handle anomaly injection paths (mapped to fraud config)
1792                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
1793                    config.fraud.enabled = *b;
1794                }
1795                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
1796                    config.fraud.fraud_rate = *f;
1797                }
1798                _ => {
1799                    debug!("Ignoring unknown config patch key: {}", key);
1800                }
1801            }
1802        }
1803
1804        config
1805    }
1806
1807    /// Build a resource guard from the configuration.
1808    fn build_resource_guard(
1809        config: &GeneratorConfig,
1810        output_path: Option<PathBuf>,
1811    ) -> ResourceGuard {
1812        let mut builder = ResourceGuardBuilder::new();
1813
1814        // Configure memory limit if set
1815        if config.global.memory_limit_mb > 0 {
1816            builder = builder.memory_limit(config.global.memory_limit_mb);
1817        }
1818
1819        // Configure disk monitoring for output path
1820        if let Some(path) = output_path {
1821            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
1822        }
1823
1824        // Use conservative degradation settings for production safety
1825        builder = builder.conservative();
1826
1827        builder.build()
1828    }
1829
1830    /// Check resources (memory, disk, CPU) and return degradation level.
1831    ///
1832    /// Returns an error if hard limits are exceeded.
1833    /// Returns Ok(DegradationLevel) indicating current resource state.
1834    fn check_resources(&self) -> SynthResult<DegradationLevel> {
1835        self.resource_guard.check()
1836    }
1837
1838    /// Check resources with logging.
1839    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
1840        let level = self.resource_guard.check()?;
1841
1842        if level != DegradationLevel::Normal {
1843            warn!(
1844                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
1845                phase,
1846                level,
1847                self.resource_guard.current_memory_mb(),
1848                self.resource_guard.available_disk_mb()
1849            );
1850        }
1851
1852        Ok(level)
1853    }
1854
1855    /// Get current degradation actions based on resource state.
1856    fn get_degradation_actions(&self) -> DegradationActions {
1857        self.resource_guard.get_actions()
1858    }
1859
1860    /// Legacy method for backwards compatibility - now uses ResourceGuard.
1861    fn check_memory_limit(&self) -> SynthResult<()> {
1862        self.check_resources()?;
1863        Ok(())
1864    }
1865
1866    /// Run the complete generation workflow.
1867    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
1868        info!("Starting enhanced generation workflow");
1869        info!(
1870            "Config: industry={:?}, period_months={}, companies={}",
1871            self.config.global.industry,
1872            self.config.global.period_months,
1873            self.config.companies.len()
1874        );
1875
1876        // Set decimal serialization mode (thread-local, affects JSON output).
1877        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
1878        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
1879        datasynth_core::serde_decimal::set_numeric_native(is_native);
1880        struct NumericModeGuard;
1881        impl Drop for NumericModeGuard {
1882            fn drop(&mut self) {
1883                datasynth_core::serde_decimal::set_numeric_native(false);
1884            }
1885        }
1886        let _numeric_guard = if is_native {
1887            Some(NumericModeGuard)
1888        } else {
1889            None
1890        };
1891
1892        // Initial resource check before starting
1893        let initial_level = self.check_resources_with_log("initial")?;
1894        if initial_level == DegradationLevel::Emergency {
1895            return Err(SynthError::resource(
1896                "Insufficient resources to start generation",
1897            ));
1898        }
1899
1900        let mut stats = EnhancedGenerationStatistics {
1901            companies_count: self.config.companies.len(),
1902            period_months: self.config.global.period_months,
1903            ..Default::default()
1904        };
1905
1906        // Phase 1: Chart of Accounts
1907        let coa = self.phase_chart_of_accounts(&mut stats)?;
1908
1909        // Phase 2: Master Data
1910        self.phase_master_data(&mut stats)?;
1911
1912        // Emit master data to stream sink
1913        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
1914        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
1915        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
1916
1917        // Phase 3: Document Flows + Subledger Linking
1918        let (mut document_flows, mut subledger, fa_journal_entries) =
1919            self.phase_document_flows(&mut stats)?;
1920
1921        // Emit document flows to stream sink
1922        self.emit_phase_items(
1923            "document_flows",
1924            "PurchaseOrder",
1925            &document_flows.purchase_orders,
1926        );
1927        self.emit_phase_items(
1928            "document_flows",
1929            "GoodsReceipt",
1930            &document_flows.goods_receipts,
1931        );
1932        self.emit_phase_items(
1933            "document_flows",
1934            "VendorInvoice",
1935            &document_flows.vendor_invoices,
1936        );
1937        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
1938        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
1939
1940        // Phase 3b: Opening Balances (before JE generation)
1941        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
1942
1943        // Phase 3c: Convert opening balances to journal entries and prepend them.
1944        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
1945        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
1946        // balance map type.
1947        let opening_balance_jes: Vec<JournalEntry> = opening_balances
1948            .iter()
1949            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
1950            .collect();
1951        if !opening_balance_jes.is_empty() {
1952            debug!(
1953                "Prepending {} opening balance JEs to entries",
1954                opening_balance_jes.len()
1955            );
1956        }
1957
1958        // Phase 4: Journal Entries
1959        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
1960
1961        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
1962        // starts from the correct initial state.
1963        if !opening_balance_jes.is_empty() {
1964            let mut combined = opening_balance_jes;
1965            combined.extend(entries);
1966            entries = combined;
1967        }
1968
1969        // Phase 4c: Append FA acquisition journal entries to main entries
1970        if !fa_journal_entries.is_empty() {
1971            debug!(
1972                "Appending {} FA acquisition JEs to main entries",
1973                fa_journal_entries.len()
1974            );
1975            entries.extend(fa_journal_entries);
1976        }
1977
1978        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
1979        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
1980
1981        // Get current degradation actions for optional phases
1982        let actions = self.get_degradation_actions();
1983
1984        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
1985        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
1986
1987        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
1988        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
1989        if !sourcing.contracts.is_empty() {
1990            let mut linked_count = 0usize;
1991            // Collect (vendor_id, po_id) pairs from P2P chains
1992            let po_vendor_pairs: Vec<(String, String)> = document_flows
1993                .p2p_chains
1994                .iter()
1995                .map(|chain| {
1996                    (
1997                        chain.purchase_order.vendor_id.clone(),
1998                        chain.purchase_order.header.document_id.clone(),
1999                    )
2000                })
2001                .collect();
2002
2003            for chain in &mut document_flows.p2p_chains {
2004                if chain.purchase_order.contract_id.is_none() {
2005                    if let Some(contract) = sourcing
2006                        .contracts
2007                        .iter()
2008                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2009                    {
2010                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2011                        linked_count += 1;
2012                    }
2013                }
2014            }
2015
2016            // Populate reverse FK: purchase_order_ids on each contract
2017            for contract in &mut sourcing.contracts {
2018                let po_ids: Vec<String> = po_vendor_pairs
2019                    .iter()
2020                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2021                    .map(|(_, po_id)| po_id.clone())
2022                    .collect();
2023                if !po_ids.is_empty() {
2024                    contract.purchase_order_ids = po_ids;
2025                }
2026            }
2027
2028            if linked_count > 0 {
2029                debug!(
2030                    "Linked {} purchase orders to S2C contracts by vendor match",
2031                    linked_count
2032                );
2033            }
2034        }
2035
2036        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2037        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2038
2039        // Phase 5c: Append IC journal entries to main entries
2040        if !intercompany.seller_journal_entries.is_empty()
2041            || !intercompany.buyer_journal_entries.is_empty()
2042        {
2043            let ic_je_count = intercompany.seller_journal_entries.len()
2044                + intercompany.buyer_journal_entries.len();
2045            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2046            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2047            debug!(
2048                "Appended {} IC journal entries to main entries",
2049                ic_je_count
2050            );
2051        }
2052
2053        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2054        if !intercompany.elimination_entries.is_empty() {
2055            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2056                &intercompany.elimination_entries,
2057            );
2058            if !elim_jes.is_empty() {
2059                debug!(
2060                    "Appended {} elimination journal entries to main entries",
2061                    elim_jes.len()
2062                );
2063                // IC elimination net-zero validation
2064                let elim_debit: rust_decimal::Decimal =
2065                    elim_jes.iter().map(|je| je.total_debit()).sum();
2066                let elim_credit: rust_decimal::Decimal =
2067                    elim_jes.iter().map(|je| je.total_credit()).sum();
2068                if elim_debit != elim_credit {
2069                    warn!(
2070                        "IC elimination entries not balanced: debits={}, credits={}, diff={}",
2071                        elim_debit,
2072                        elim_credit,
2073                        elim_debit - elim_credit
2074                    );
2075                }
2076                entries.extend(elim_jes);
2077            }
2078        }
2079
2080        // Phase 5e: Wire IC source documents into document flow snapshot
2081        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2082            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2083                document_flows
2084                    .customer_invoices
2085                    .extend(ic_docs.seller_invoices.iter().cloned());
2086                document_flows
2087                    .purchase_orders
2088                    .extend(ic_docs.buyer_orders.iter().cloned());
2089                document_flows
2090                    .goods_receipts
2091                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2092                document_flows
2093                    .vendor_invoices
2094                    .extend(ic_docs.buyer_invoices.iter().cloned());
2095                debug!(
2096                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2097                    ic_docs.seller_invoices.len(),
2098                    ic_docs.buyer_orders.len(),
2099                    ic_docs.buyer_goods_receipts.len(),
2100                    ic_docs.buyer_invoices.len(),
2101                );
2102            }
2103        }
2104
2105        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2106        let hr = self.phase_hr_data(&mut stats)?;
2107
2108        // Phase 6b: Generate JEs from payroll runs
2109        if !hr.payroll_runs.is_empty() {
2110            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2111            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2112            entries.extend(payroll_jes);
2113        }
2114
2115        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2116        if !hr.pension_journal_entries.is_empty() {
2117            debug!(
2118                "Generated {} JEs from pension plans",
2119                hr.pension_journal_entries.len()
2120            );
2121            entries.extend(hr.pension_journal_entries.iter().cloned());
2122        }
2123
2124        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2125        if !hr.stock_comp_journal_entries.is_empty() {
2126            debug!(
2127                "Generated {} JEs from stock-based compensation",
2128                hr.stock_comp_journal_entries.len()
2129            );
2130            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2131        }
2132
2133        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2134        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2135
2136        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2137        if !manufacturing_snap.production_orders.is_empty() {
2138            let currency = self
2139                .config
2140                .companies
2141                .first()
2142                .map(|c| c.currency.as_str())
2143                .unwrap_or("USD");
2144            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2145                &manufacturing_snap.production_orders,
2146                &manufacturing_snap.quality_inspections,
2147                currency,
2148            );
2149            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2150            entries.extend(mfg_jes);
2151        }
2152
2153        // Phase 7a-warranty: Generate warranty provisions per company
2154        if !manufacturing_snap.quality_inspections.is_empty() {
2155            let framework = match self.config.accounting_standards.framework {
2156                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2157                _ => "US_GAAP",
2158            };
2159            for company in &self.config.companies {
2160                let company_orders: Vec<_> = manufacturing_snap
2161                    .production_orders
2162                    .iter()
2163                    .filter(|o| o.company_code == company.code)
2164                    .cloned()
2165                    .collect();
2166                let company_inspections: Vec<_> = manufacturing_snap
2167                    .quality_inspections
2168                    .iter()
2169                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2170                    .cloned()
2171                    .collect();
2172                if company_inspections.is_empty() {
2173                    continue;
2174                }
2175                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2176                let warranty_result = warranty_gen.generate(
2177                    &company.code,
2178                    &company_orders,
2179                    &company_inspections,
2180                    &company.currency,
2181                    framework,
2182                );
2183                if !warranty_result.journal_entries.is_empty() {
2184                    debug!(
2185                        "Generated {} warranty provision JEs for {}",
2186                        warranty_result.journal_entries.len(),
2187                        company.code
2188                    );
2189                    entries.extend(warranty_result.journal_entries);
2190                }
2191            }
2192        }
2193
2194        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2195        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2196        {
2197            let cogs_currency = self
2198                .config
2199                .companies
2200                .first()
2201                .map(|c| c.currency.as_str())
2202                .unwrap_or("USD");
2203            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2204                &document_flows.deliveries,
2205                &manufacturing_snap.production_orders,
2206                cogs_currency,
2207            );
2208            if !cogs_jes.is_empty() {
2209                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2210                entries.extend(cogs_jes);
2211            }
2212        }
2213
2214        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2215        //
2216        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2217        // subledger inventory positions.  Here we reconcile them so that position balances
2218        // reflect the actual stock movements within the generation period.
2219        if !manufacturing_snap.inventory_movements.is_empty()
2220            && !subledger.inventory_positions.is_empty()
2221        {
2222            use datasynth_core::models::MovementType as MfgMovementType;
2223            let mut receipt_count = 0usize;
2224            let mut issue_count = 0usize;
2225            for movement in &manufacturing_snap.inventory_movements {
2226                // Find a matching position by material code and company
2227                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2228                    p.material_id == movement.material_code
2229                        && p.company_code == movement.entity_code
2230                }) {
2231                    match movement.movement_type {
2232                        MfgMovementType::GoodsReceipt => {
2233                            // Increase stock and update weighted-average cost
2234                            pos.add_quantity(
2235                                movement.quantity,
2236                                movement.value,
2237                                movement.movement_date,
2238                            );
2239                            receipt_count += 1;
2240                        }
2241                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2242                            // Decrease stock (best-effort; silently skip if insufficient)
2243                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2244                            issue_count += 1;
2245                        }
2246                        _ => {}
2247                    }
2248                }
2249            }
2250            debug!(
2251                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2252                manufacturing_snap.inventory_movements.len(),
2253                receipt_count,
2254                issue_count,
2255            );
2256        }
2257
2258        // Update final entry/line-item stats after all JE-generating phases
2259        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2260        if !entries.is_empty() {
2261            stats.total_entries = entries.len() as u64;
2262            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2263            debug!(
2264                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2265                stats.total_entries, stats.total_line_items
2266            );
2267        }
2268
2269        // Phase 7b: Apply internal controls to journal entries
2270        if self.config.internal_controls.enabled && !entries.is_empty() {
2271            info!("Phase 7b: Applying internal controls to journal entries");
2272            let control_config = ControlGeneratorConfig {
2273                exception_rate: self.config.internal_controls.exception_rate,
2274                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2275                enable_sox_marking: true,
2276                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2277                    self.config.internal_controls.sox_materiality_threshold,
2278                )
2279                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2280                ..Default::default()
2281            };
2282            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2283            for entry in &mut entries {
2284                control_gen.apply_controls(entry, &coa);
2285            }
2286            let with_controls = entries
2287                .iter()
2288                .filter(|e| !e.header.control_ids.is_empty())
2289                .count();
2290            info!(
2291                "Applied controls to {} entries ({} with control IDs assigned)",
2292                entries.len(),
2293                with_controls
2294            );
2295        }
2296
2297        // Phase 7c: Extract SoD violations from annotated journal entries.
2298        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2299        // Here we materialise those flags into standalone SodViolation records.
2300        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2301            .iter()
2302            .filter(|e| e.header.sod_violation)
2303            .filter_map(|e| {
2304                e.header.sod_conflict_type.map(|ct| {
2305                    use datasynth_core::models::{RiskLevel, SodViolation};
2306                    let severity = match ct {
2307                        datasynth_core::models::SodConflictType::PaymentReleaser
2308                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2309                            RiskLevel::Critical
2310                        }
2311                        datasynth_core::models::SodConflictType::PreparerApprover
2312                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2313                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2314                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2315                            RiskLevel::High
2316                        }
2317                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2318                            RiskLevel::Medium
2319                        }
2320                    };
2321                    let action = format!(
2322                        "SoD conflict {:?} on entry {} ({})",
2323                        ct, e.header.document_id, e.header.company_code
2324                    );
2325                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2326                })
2327            })
2328            .collect();
2329        if !sod_violations.is_empty() {
2330            info!(
2331                "Phase 7c: Extracted {} SoD violations from {} entries",
2332                sod_violations.len(),
2333                entries.len()
2334            );
2335        }
2336
2337        // Emit journal entries to stream sink (after all JE-generating phases)
2338        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2339
2340        // Phase 8: Anomaly Injection (after all JE-generating phases)
2341        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2342
2343        // Emit anomaly labels to stream sink
2344        self.emit_phase_items(
2345            "anomaly_injection",
2346            "LabeledAnomaly",
2347            &anomaly_labels.labels,
2348        );
2349
2350        // Propagate fraud labels from journal entries to source documents.
2351        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2352        // instead of tracing through document_references.json.
2353        {
2354            use std::collections::HashMap;
2355            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2356            //
2357            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2358            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2359            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2360            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2361            // we register BOTH the prefixed form (raw reference) AND the bare form
2362            // (post-colon portion) in the map. Also register the JE's document_id
2363            // UUID so documents that set `journal_entry_id` match via that path.
2364            //
2365            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2366            // looked up "foo", silently producing 0 propagations.
2367            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2368            for je in &entries {
2369                if je.header.is_fraud {
2370                    if let Some(ref fraud_type) = je.header.fraud_type {
2371                        if let Some(ref reference) = je.header.reference {
2372                            // Register the full reference ("GR:PO-2024-000001")
2373                            fraud_map.insert(reference.clone(), *fraud_type);
2374                            // Also register the bare document ID ("PO-2024-000001")
2375                            // by stripping the "PREFIX:" if present.
2376                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2377                                if !bare.is_empty() {
2378                                    fraud_map.insert(bare.to_string(), *fraud_type);
2379                                }
2380                            }
2381                        }
2382                        // Also tag via journal_entry_id on document headers
2383                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2384                    }
2385                }
2386            }
2387            if !fraud_map.is_empty() {
2388                let mut propagated = 0usize;
2389                // Use DocumentHeader::propagate_fraud method for each doc type
2390                macro_rules! propagate_to {
2391                    ($collection:expr) => {
2392                        for doc in &mut $collection {
2393                            if doc.header.propagate_fraud(&fraud_map) {
2394                                propagated += 1;
2395                            }
2396                        }
2397                    };
2398                }
2399                propagate_to!(document_flows.purchase_orders);
2400                propagate_to!(document_flows.goods_receipts);
2401                propagate_to!(document_flows.vendor_invoices);
2402                propagate_to!(document_flows.payments);
2403                propagate_to!(document_flows.sales_orders);
2404                propagate_to!(document_flows.deliveries);
2405                propagate_to!(document_flows.customer_invoices);
2406                if propagated > 0 {
2407                    info!(
2408                        "Propagated fraud labels to {} document flow records",
2409                        propagated
2410                    );
2411                }
2412            }
2413        }
2414
2415        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2416        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2417
2418        // Emit red flags to stream sink
2419        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2420
2421        // Phase 26b: Collusion Ring Generation (after red flags)
2422        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2423
2424        // Emit collusion rings to stream sink
2425        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2426
2427        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2428        let balance_validation = self.phase_balance_validation(&entries)?;
2429
2430        // Phase 9b: GL-to-Subledger Reconciliation
2431        let subledger_reconciliation =
2432            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2433
2434        // Phase 10: Data Quality Injection
2435        let (data_quality_stats, quality_issues) =
2436            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2437
2438        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2439        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2440
2441        // Phase 11: Audit Data
2442        let audit = self.phase_audit_data(&entries, &mut stats)?;
2443
2444        // Phase 12: Banking KYC/AML Data
2445        let mut banking = self.phase_banking_data(&mut stats)?;
2446
2447        // Phase 12.5: Bridge document-flow Payments → BankTransactions
2448        // Creates coherence between the accounting layer (payments, JEs) and the
2449        // banking layer (bank transactions). A vendor invoice payment now appears
2450        // on both sides with cross-references and fraud labels propagated.
2451        if self.phase_config.generate_banking
2452            && !document_flows.payments.is_empty()
2453            && !banking.accounts.is_empty()
2454        {
2455            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
2456            if bridge_rate > 0.0 {
2457                let mut bridge =
2458                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
2459                        self.seed,
2460                    );
2461                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
2462                    &document_flows.payments,
2463                    &banking.customers,
2464                    &banking.accounts,
2465                    bridge_rate,
2466                );
2467                info!(
2468                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
2469                    bridge_stats.bridged_count,
2470                    bridge_stats.transactions_emitted,
2471                    bridge_stats.fraud_propagated,
2472                );
2473                let bridged_count = bridged_txns.len();
2474                banking.transactions.extend(bridged_txns);
2475
2476                // Re-run velocity computation so bridged txns also get features
2477                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
2478                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
2479                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
2480                        &mut banking.transactions,
2481                    );
2482                }
2483
2484                // Recompute suspicious count after bridging
2485                banking.suspicious_count = banking
2486                    .transactions
2487                    .iter()
2488                    .filter(|t| t.is_suspicious)
2489                    .count();
2490                stats.banking_transaction_count = banking.transactions.len();
2491                stats.banking_suspicious_count = banking.suspicious_count;
2492            }
2493        }
2494
2495        // Phase 13: Graph Export
2496        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
2497
2498        // Phase 14: LLM Enrichment
2499        self.phase_llm_enrichment(&mut stats);
2500
2501        // Phase 15: Diffusion Enhancement
2502        self.phase_diffusion_enhancement(&mut stats);
2503
2504        // Phase 16: Causal Overlay
2505        self.phase_causal_overlay(&mut stats);
2506
2507        // Phase 17: Bank Reconciliation + Financial Statements
2508        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
2509        // provision data (from accounting_standards / tax snapshots) can be wired in.
2510        let mut financial_reporting = self.phase_financial_reporting(
2511            &document_flows,
2512            &entries,
2513            &coa,
2514            &hr,
2515            &audit,
2516            &mut stats,
2517        )?;
2518
2519        // BS coherence check: assets = liabilities + equity
2520        {
2521            use datasynth_core::models::StatementType;
2522            for stmt in &financial_reporting.consolidated_statements {
2523                if stmt.statement_type == StatementType::BalanceSheet {
2524                    let total_assets: rust_decimal::Decimal = stmt
2525                        .line_items
2526                        .iter()
2527                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
2528                        .map(|li| li.amount)
2529                        .sum();
2530                    let total_le: rust_decimal::Decimal = stmt
2531                        .line_items
2532                        .iter()
2533                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
2534                        .map(|li| li.amount)
2535                        .sum();
2536                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
2537                        warn!(
2538                            "BS equation imbalance: assets={}, L+E={}",
2539                            total_assets, total_le
2540                        );
2541                    }
2542                }
2543            }
2544        }
2545
2546        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
2547        let accounting_standards =
2548            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
2549
2550        // Phase 18a: Merge ECL journal entries into main GL
2551        if !accounting_standards.ecl_journal_entries.is_empty() {
2552            debug!(
2553                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
2554                accounting_standards.ecl_journal_entries.len()
2555            );
2556            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
2557        }
2558
2559        // Phase 18a: Merge provision journal entries into main GL
2560        if !accounting_standards.provision_journal_entries.is_empty() {
2561            debug!(
2562                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
2563                accounting_standards.provision_journal_entries.len()
2564            );
2565            entries.extend(
2566                accounting_standards
2567                    .provision_journal_entries
2568                    .iter()
2569                    .cloned(),
2570            );
2571        }
2572
2573        // Phase 18b: OCPM Events (after all process data is available)
2574        let ocpm = self.phase_ocpm_events(
2575            &document_flows,
2576            &sourcing,
2577            &hr,
2578            &manufacturing_snap,
2579            &banking,
2580            &audit,
2581            &financial_reporting,
2582            &mut stats,
2583        )?;
2584
2585        // Emit OCPM events to stream sink
2586        if let Some(ref event_log) = ocpm.event_log {
2587            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
2588        }
2589
2590        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
2591        if let Some(ref event_log) = ocpm.event_log {
2592            // Build reverse index: document_ref → (event_id, case_id, object_ids)
2593            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
2594                std::collections::HashMap::new();
2595            for (idx, event) in event_log.events.iter().enumerate() {
2596                if let Some(ref doc_ref) = event.document_ref {
2597                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
2598                }
2599            }
2600
2601            if !doc_index.is_empty() {
2602                let mut annotated = 0usize;
2603                for entry in &mut entries {
2604                    let doc_id_str = entry.header.document_id.to_string();
2605                    // Collect matching event indices from document_id and reference
2606                    let mut matched_indices: Vec<usize> = Vec::new();
2607                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
2608                        matched_indices.extend(indices);
2609                    }
2610                    if let Some(ref reference) = entry.header.reference {
2611                        let bare_ref = reference
2612                            .find(':')
2613                            .map(|i| &reference[i + 1..])
2614                            .unwrap_or(reference.as_str());
2615                        if let Some(indices) = doc_index.get(bare_ref) {
2616                            for &idx in indices {
2617                                if !matched_indices.contains(&idx) {
2618                                    matched_indices.push(idx);
2619                                }
2620                            }
2621                        }
2622                    }
2623                    // Apply matches to JE header
2624                    if !matched_indices.is_empty() {
2625                        for &idx in &matched_indices {
2626                            let event = &event_log.events[idx];
2627                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
2628                                entry.header.ocpm_event_ids.push(event.event_id);
2629                            }
2630                            for obj_ref in &event.object_refs {
2631                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
2632                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
2633                                }
2634                            }
2635                            if entry.header.ocpm_case_id.is_none() {
2636                                entry.header.ocpm_case_id = event.case_id;
2637                            }
2638                        }
2639                        annotated += 1;
2640                    }
2641                }
2642                debug!(
2643                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
2644                    annotated
2645                );
2646            }
2647        }
2648
2649        // Phase 19: Sales Quotes, Management KPIs, Budgets
2650        let sales_kpi_budgets =
2651            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
2652
2653        // Phase 22: Treasury Data Generation
2654        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
2655        // are included in the pre-tax income used by phase_tax_generation.
2656        let treasury =
2657            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
2658
2659        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
2660        if !treasury.journal_entries.is_empty() {
2661            debug!(
2662                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
2663                treasury.journal_entries.len()
2664            );
2665            entries.extend(treasury.journal_entries.iter().cloned());
2666        }
2667
2668        // Phase 20: Tax Generation
2669        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
2670
2671        // Phase 20 JEs: Merge tax posting journal entries into main GL
2672        if !tax.tax_posting_journal_entries.is_empty() {
2673            debug!(
2674                "Merging {} tax posting JEs into GL",
2675                tax.tax_posting_journal_entries.len()
2676            );
2677            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
2678        }
2679
2680        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
2681        // Build supplementary cash flow items from upstream JE data (depreciation,
2682        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
2683        {
2684            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
2685
2686            let framework_str = {
2687                use datasynth_config::schema::AccountingFrameworkConfig;
2688                match self
2689                    .config
2690                    .accounting_standards
2691                    .framework
2692                    .unwrap_or_default()
2693                {
2694                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
2695                        "IFRS"
2696                    }
2697                    _ => "US_GAAP",
2698                }
2699            };
2700
2701            // Sum depreciation debits (account 6000) from close JEs
2702            let depreciation_total: rust_decimal::Decimal = entries
2703                .iter()
2704                .filter(|je| je.header.document_type == "CL")
2705                .flat_map(|je| je.lines.iter())
2706                .filter(|l| l.gl_account.starts_with("6000"))
2707                .map(|l| l.debit_amount)
2708                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2709
2710            // Sum interest expense debits (account 7100)
2711            let interest_paid: rust_decimal::Decimal = entries
2712                .iter()
2713                .flat_map(|je| je.lines.iter())
2714                .filter(|l| l.gl_account.starts_with("7100"))
2715                .map(|l| l.debit_amount)
2716                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2717
2718            // Sum tax expense debits (account 8000)
2719            let tax_paid: rust_decimal::Decimal = entries
2720                .iter()
2721                .flat_map(|je| je.lines.iter())
2722                .filter(|l| l.gl_account.starts_with("8000"))
2723                .map(|l| l.debit_amount)
2724                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2725
2726            // Sum capex debits on fixed assets (account 1500)
2727            let capex: rust_decimal::Decimal = entries
2728                .iter()
2729                .flat_map(|je| je.lines.iter())
2730                .filter(|l| l.gl_account.starts_with("1500"))
2731                .map(|l| l.debit_amount)
2732                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2733
2734            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
2735            let dividends_paid: rust_decimal::Decimal = entries
2736                .iter()
2737                .flat_map(|je| je.lines.iter())
2738                .filter(|l| l.gl_account == "2170")
2739                .map(|l| l.debit_amount)
2740                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
2741
2742            let cf_data = CashFlowSourceData {
2743                depreciation_total,
2744                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
2745                delta_ar: rust_decimal::Decimal::ZERO,
2746                delta_ap: rust_decimal::Decimal::ZERO,
2747                delta_inventory: rust_decimal::Decimal::ZERO,
2748                capex,
2749                debt_issuance: rust_decimal::Decimal::ZERO,
2750                debt_repayment: rust_decimal::Decimal::ZERO,
2751                interest_paid,
2752                tax_paid,
2753                dividends_paid,
2754                framework: framework_str.to_string(),
2755            };
2756
2757            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
2758            if !enhanced_cf_items.is_empty() {
2759                // Merge into ALL cash flow statements (standalone + consolidated)
2760                use datasynth_core::models::StatementType;
2761                let merge_count = enhanced_cf_items.len();
2762                for stmt in financial_reporting
2763                    .financial_statements
2764                    .iter_mut()
2765                    .chain(financial_reporting.consolidated_statements.iter_mut())
2766                    .chain(
2767                        financial_reporting
2768                            .standalone_statements
2769                            .values_mut()
2770                            .flat_map(|v| v.iter_mut()),
2771                    )
2772                {
2773                    if stmt.statement_type == StatementType::CashFlowStatement {
2774                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
2775                    }
2776                }
2777                info!(
2778                    "Enhanced cash flow: {} supplementary items merged into CF statements",
2779                    merge_count
2780                );
2781            }
2782        }
2783
2784        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
2785        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
2786        self.generate_notes_to_financial_statements(
2787            &mut financial_reporting,
2788            &accounting_standards,
2789            &tax,
2790            &hr,
2791            &audit,
2792            &treasury,
2793        );
2794
2795        // Phase 20b: Supplement segment reports from real JEs (v2.4)
2796        // When we have 2+ companies, derive segment data from actual journal entries
2797        // to complement or replace the FS-generator-based segments.
2798        if self.config.companies.len() >= 2 && !entries.is_empty() {
2799            let companies: Vec<(String, String)> = self
2800                .config
2801                .companies
2802                .iter()
2803                .map(|c| (c.code.clone(), c.name.clone()))
2804                .collect();
2805            let ic_elim: rust_decimal::Decimal =
2806                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
2807            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
2808                .unwrap_or(NaiveDate::MIN);
2809            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
2810            let period_label = format!(
2811                "{}-{:02}",
2812                end_date.year(),
2813                (end_date - chrono::Days::new(1)).month()
2814            );
2815
2816            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
2817            let (je_segments, je_recon) =
2818                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
2819            if !je_segments.is_empty() {
2820                info!(
2821                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
2822                    je_segments.len(),
2823                    ic_elim,
2824                );
2825                // Replace if existing segment_reports were empty; otherwise supplement
2826                if financial_reporting.segment_reports.is_empty() {
2827                    financial_reporting.segment_reports = je_segments;
2828                    financial_reporting.segment_reconciliations = vec![je_recon];
2829                } else {
2830                    financial_reporting.segment_reports.extend(je_segments);
2831                    financial_reporting.segment_reconciliations.push(je_recon);
2832                }
2833            }
2834        }
2835
2836        // Phase 21: ESG Data Generation
2837        let esg_snap =
2838            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
2839
2840        // Phase 23: Project Accounting Data Generation
2841        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
2842
2843        // Phase 24: Process Evolution + Organizational Events
2844        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
2845
2846        // Phase 24b: Disruption Events
2847        let disruption_events = self.phase_disruption_events(&mut stats)?;
2848
2849        // Phase 27: Bi-Temporal Vendor Version Chains
2850        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
2851
2852        // Phase 28: Entity Relationship Graph + Cross-Process Links
2853        let (entity_relationship_graph, cross_process_links) =
2854            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
2855
2856        // Phase 29: Industry-specific GL accounts
2857        let industry_output = self.phase_industry_data(&mut stats);
2858
2859        // Phase: Compliance regulations (must run before hypergraph so it can be included)
2860        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
2861
2862        // Phase 19b: Hypergraph Export (after all data is available)
2863        self.phase_hypergraph_export(
2864            &coa,
2865            &entries,
2866            &document_flows,
2867            &sourcing,
2868            &hr,
2869            &manufacturing_snap,
2870            &banking,
2871            &audit,
2872            &financial_reporting,
2873            &ocpm,
2874            &compliance_regulations,
2875            &mut stats,
2876        )?;
2877
2878        // Phase 10c: Additional graph builders (approval, entity, banking)
2879        // These run after all data is available since they need banking/IC data.
2880        if self.phase_config.generate_graph_export {
2881            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
2882        }
2883
2884        // Log informational messages for config sections not yet fully wired
2885        if self.config.streaming.enabled {
2886            info!("Note: streaming config is enabled but batch mode does not use it");
2887        }
2888        if self.config.vendor_network.enabled {
2889            debug!("Vendor network config available; relationship graph generation is partial");
2890        }
2891        if self.config.customer_segmentation.enabled {
2892            debug!("Customer segmentation config available; segment-aware generation is partial");
2893        }
2894
2895        // Log final resource statistics
2896        let resource_stats = self.resource_guard.stats();
2897        info!(
2898            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
2899            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
2900            resource_stats.disk.estimated_bytes_written,
2901            resource_stats.degradation_level
2902        );
2903
2904        // Flush any remaining stream sink data
2905        if let Some(ref sink) = self.phase_sink {
2906            if let Err(e) = sink.flush() {
2907                warn!("Stream sink flush failed: {e}");
2908            }
2909        }
2910
2911        // Build data lineage graph
2912        let lineage = self.build_lineage_graph();
2913
2914        // Evaluate quality gates if enabled in config
2915        let gate_result = if self.config.quality_gates.enabled {
2916            let profile_name = &self.config.quality_gates.profile;
2917            match datasynth_eval::gates::get_profile(profile_name) {
2918                Some(profile) => {
2919                    // Build an evaluation populated with actual generation metrics.
2920                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
2921
2922                    // Populate balance sheet evaluation from balance validation results
2923                    if balance_validation.validated {
2924                        eval.coherence.balance =
2925                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
2926                                equation_balanced: balance_validation.is_balanced,
2927                                max_imbalance: (balance_validation.total_debits
2928                                    - balance_validation.total_credits)
2929                                    .abs(),
2930                                periods_evaluated: 1,
2931                                periods_imbalanced: if balance_validation.is_balanced {
2932                                    0
2933                                } else {
2934                                    1
2935                                },
2936                                period_results: Vec::new(),
2937                                companies_evaluated: self.config.companies.len(),
2938                            });
2939                    }
2940
2941                    // Set coherence passes based on balance validation
2942                    eval.coherence.passes = balance_validation.is_balanced;
2943                    if !balance_validation.is_balanced {
2944                        eval.coherence
2945                            .failures
2946                            .push("Balance sheet equation not satisfied".to_string());
2947                    }
2948
2949                    // Set statistical score based on entry count (basic sanity)
2950                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
2951                    eval.statistical.passes = !entries.is_empty();
2952
2953                    // Set quality score from data quality stats
2954                    eval.quality.overall_score = 0.9; // Default high for generated data
2955                    eval.quality.passes = true;
2956
2957                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
2958                    info!(
2959                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
2960                        profile_name, result.gates_passed, result.gates_total, result.summary
2961                    );
2962                    Some(result)
2963                }
2964                None => {
2965                    warn!(
2966                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
2967                        profile_name
2968                    );
2969                    None
2970                }
2971            }
2972        } else {
2973            None
2974        };
2975
2976        // Generate internal controls if enabled
2977        let internal_controls = if self.config.internal_controls.enabled {
2978            InternalControl::standard_controls()
2979        } else {
2980            Vec::new()
2981        };
2982
2983        Ok(EnhancedGenerationResult {
2984            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
2985            master_data: std::mem::take(&mut self.master_data),
2986            document_flows,
2987            subledger,
2988            ocpm,
2989            audit,
2990            banking,
2991            graph_export,
2992            sourcing,
2993            financial_reporting,
2994            hr,
2995            accounting_standards,
2996            manufacturing: manufacturing_snap,
2997            sales_kpi_budgets,
2998            tax,
2999            esg: esg_snap,
3000            treasury,
3001            project_accounting,
3002            process_evolution,
3003            organizational_events,
3004            disruption_events,
3005            intercompany,
3006            journal_entries: entries,
3007            anomaly_labels,
3008            balance_validation,
3009            data_quality_stats,
3010            quality_issues,
3011            statistics: stats,
3012            lineage: Some(lineage),
3013            gate_result,
3014            internal_controls,
3015            sod_violations,
3016            opening_balances,
3017            subledger_reconciliation,
3018            counterfactual_pairs,
3019            red_flags,
3020            collusion_rings,
3021            temporal_vendor_chains,
3022            entity_relationship_graph,
3023            cross_process_links,
3024            industry_output,
3025            compliance_regulations,
3026        })
3027    }
3028
3029    // ========================================================================
3030    // Generation Phase Methods
3031    // ========================================================================
3032
3033    /// Phase 1: Generate Chart of Accounts and update statistics.
3034    fn phase_chart_of_accounts(
3035        &mut self,
3036        stats: &mut EnhancedGenerationStatistics,
3037    ) -> SynthResult<Arc<ChartOfAccounts>> {
3038        info!("Phase 1: Generating Chart of Accounts");
3039        let coa = self.generate_coa()?;
3040        stats.accounts_count = coa.account_count();
3041        info!(
3042            "Chart of Accounts generated: {} accounts",
3043            stats.accounts_count
3044        );
3045        self.check_resources_with_log("post-coa")?;
3046        Ok(coa)
3047    }
3048
3049    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3050    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3051        if self.phase_config.generate_master_data {
3052            info!("Phase 2: Generating Master Data");
3053            self.generate_master_data()?;
3054            stats.vendor_count = self.master_data.vendors.len();
3055            stats.customer_count = self.master_data.customers.len();
3056            stats.material_count = self.master_data.materials.len();
3057            stats.asset_count = self.master_data.assets.len();
3058            stats.employee_count = self.master_data.employees.len();
3059            info!(
3060                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3061                stats.vendor_count, stats.customer_count, stats.material_count,
3062                stats.asset_count, stats.employee_count
3063            );
3064            self.check_resources_with_log("post-master-data")?;
3065        } else {
3066            debug!("Phase 2: Skipped (master data generation disabled)");
3067        }
3068        Ok(())
3069    }
3070
3071    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3072    fn phase_document_flows(
3073        &mut self,
3074        stats: &mut EnhancedGenerationStatistics,
3075    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3076        let mut document_flows = DocumentFlowSnapshot::default();
3077        let mut subledger = SubledgerSnapshot::default();
3078        // Dunning JEs (interest + charges) accumulated here and merged into the
3079        // main FA-JE list below so they appear in the GL.
3080        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3081
3082        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3083            info!("Phase 3: Generating Document Flows");
3084            self.generate_document_flows(&mut document_flows)?;
3085            stats.p2p_chain_count = document_flows.p2p_chains.len();
3086            stats.o2c_chain_count = document_flows.o2c_chains.len();
3087            info!(
3088                "Document flows generated: {} P2P chains, {} O2C chains",
3089                stats.p2p_chain_count, stats.o2c_chain_count
3090            );
3091
3092            // Phase 3b: Link document flows to subledgers (for data coherence)
3093            debug!("Phase 3b: Linking document flows to subledgers");
3094            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3095            stats.ap_invoice_count = subledger.ap_invoices.len();
3096            stats.ar_invoice_count = subledger.ar_invoices.len();
3097            debug!(
3098                "Subledgers linked: {} AP invoices, {} AR invoices",
3099                stats.ap_invoice_count, stats.ar_invoice_count
3100            );
3101
3102            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3103            // Without this step the subledger is systematically overstated because
3104            // amount_remaining is set at invoice creation and never reduced by
3105            // the payments that were generated in the document-flow phase.
3106            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3107            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3108            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3109            debug!("Payment settlements applied to AP and AR subledgers");
3110
3111            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3112            // The as-of date is the last day of the configured period.
3113            if let Ok(start_date) =
3114                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3115            {
3116                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3117                    - chrono::Days::new(1);
3118                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
3119                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
3120                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
3121                // derived from JE-level aggregation and will typically differ. This is a known
3122                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
3123                // generated independently. A future reconciliation phase should align them by
3124                // using subledger totals as the authoritative source for BS Receivables.
3125                for company in &self.config.companies {
3126                    let ar_report = ARAgingReport::from_invoices(
3127                        company.code.clone(),
3128                        &subledger.ar_invoices,
3129                        as_of_date,
3130                    );
3131                    subledger.ar_aging_reports.push(ar_report);
3132
3133                    let ap_report = APAgingReport::from_invoices(
3134                        company.code.clone(),
3135                        &subledger.ap_invoices,
3136                        as_of_date,
3137                    );
3138                    subledger.ap_aging_reports.push(ap_report);
3139                }
3140                debug!(
3141                    "AR/AP aging reports built: {} AR, {} AP",
3142                    subledger.ar_aging_reports.len(),
3143                    subledger.ap_aging_reports.len()
3144                );
3145
3146                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
3147                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
3148                {
3149                    use datasynth_generators::DunningGenerator;
3150                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
3151                    for company in &self.config.companies {
3152                        let currency = company.currency.as_str();
3153                        // Collect mutable references to AR invoices for this company
3154                        // (dunning generator updates dunning_info on invoices in-place).
3155                        let mut company_invoices: Vec<
3156                            datasynth_core::models::subledger::ar::ARInvoice,
3157                        > = subledger
3158                            .ar_invoices
3159                            .iter()
3160                            .filter(|inv| inv.company_code == company.code)
3161                            .cloned()
3162                            .collect();
3163
3164                        if company_invoices.is_empty() {
3165                            continue;
3166                        }
3167
3168                        let result = dunning_gen.execute_dunning_run(
3169                            &company.code,
3170                            as_of_date,
3171                            &mut company_invoices,
3172                            currency,
3173                        );
3174
3175                        // Write back updated dunning info to the main AR invoice list
3176                        for updated in &company_invoices {
3177                            if let Some(orig) = subledger
3178                                .ar_invoices
3179                                .iter_mut()
3180                                .find(|i| i.invoice_number == updated.invoice_number)
3181                            {
3182                                orig.dunning_info = updated.dunning_info.clone();
3183                            }
3184                        }
3185
3186                        subledger.dunning_runs.push(result.dunning_run);
3187                        subledger.dunning_letters.extend(result.letters);
3188                        // Dunning JEs (interest + charges) collected into local buffer.
3189                        dunning_journal_entries.extend(result.journal_entries);
3190                    }
3191                    debug!(
3192                        "Dunning runs complete: {} runs, {} letters",
3193                        subledger.dunning_runs.len(),
3194                        subledger.dunning_letters.len()
3195                    );
3196                }
3197            }
3198
3199            self.check_resources_with_log("post-document-flows")?;
3200        } else {
3201            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
3202        }
3203
3204        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
3205        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
3206        if !self.master_data.assets.is_empty() {
3207            debug!("Generating FA subledger records");
3208            let company_code = self
3209                .config
3210                .companies
3211                .first()
3212                .map(|c| c.code.as_str())
3213                .unwrap_or("1000");
3214            let currency = self
3215                .config
3216                .companies
3217                .first()
3218                .map(|c| c.currency.as_str())
3219                .unwrap_or("USD");
3220
3221            let mut fa_gen = datasynth_generators::FAGenerator::new(
3222                datasynth_generators::FAGeneratorConfig::default(),
3223                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
3224            );
3225
3226            for asset in &self.master_data.assets {
3227                let (record, je) = fa_gen.generate_asset_acquisition(
3228                    company_code,
3229                    &format!("{:?}", asset.asset_class),
3230                    &asset.description,
3231                    asset.acquisition_date,
3232                    currency,
3233                    asset.cost_center.as_deref(),
3234                );
3235                subledger.fa_records.push(record);
3236                fa_journal_entries.push(je);
3237            }
3238
3239            stats.fa_subledger_count = subledger.fa_records.len();
3240            debug!(
3241                "FA subledger records generated: {} (with {} acquisition JEs)",
3242                stats.fa_subledger_count,
3243                fa_journal_entries.len()
3244            );
3245        }
3246
3247        // Generate Inventory subledger records from master data materials
3248        if !self.master_data.materials.is_empty() {
3249            debug!("Generating Inventory subledger records");
3250            let first_company = self.config.companies.first();
3251            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
3252            let inv_currency = first_company
3253                .map(|c| c.currency.clone())
3254                .unwrap_or_else(|| "USD".to_string());
3255
3256            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
3257                datasynth_generators::InventoryGeneratorConfig::default(),
3258                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
3259                inv_currency.clone(),
3260            );
3261
3262            for (i, material) in self.master_data.materials.iter().enumerate() {
3263                let plant = format!("PLANT{:02}", (i % 3) + 1);
3264                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
3265                let initial_qty = rust_decimal::Decimal::from(
3266                    material
3267                        .safety_stock
3268                        .to_string()
3269                        .parse::<i64>()
3270                        .unwrap_or(100),
3271                );
3272
3273                let position = inv_gen.generate_position(
3274                    company_code,
3275                    &plant,
3276                    &storage_loc,
3277                    &material.material_id,
3278                    &material.description,
3279                    initial_qty,
3280                    Some(material.standard_cost),
3281                    &inv_currency,
3282                );
3283                subledger.inventory_positions.push(position);
3284            }
3285
3286            stats.inventory_subledger_count = subledger.inventory_positions.len();
3287            debug!(
3288                "Inventory subledger records generated: {}",
3289                stats.inventory_subledger_count
3290            );
3291        }
3292
3293        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
3294        if !subledger.fa_records.is_empty() {
3295            if let Ok(start_date) =
3296                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3297            {
3298                let company_code = self
3299                    .config
3300                    .companies
3301                    .first()
3302                    .map(|c| c.code.as_str())
3303                    .unwrap_or("1000");
3304                let fiscal_year = start_date.year();
3305                let start_period = start_date.month();
3306                let end_period =
3307                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
3308
3309                let depr_cfg = FaDepreciationScheduleConfig {
3310                    fiscal_year,
3311                    start_period,
3312                    end_period,
3313                    seed_offset: 800,
3314                };
3315                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
3316                let runs = depr_gen.generate(company_code, &subledger.fa_records);
3317                let run_count = runs.len();
3318                subledger.depreciation_runs = runs;
3319                debug!(
3320                    "Depreciation runs generated: {} runs for {} periods",
3321                    run_count, self.config.global.period_months
3322                );
3323            }
3324        }
3325
3326        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
3327        if !subledger.inventory_positions.is_empty() {
3328            if let Ok(start_date) =
3329                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3330            {
3331                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3332                    - chrono::Days::new(1);
3333
3334                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
3335                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
3336
3337                for company in &self.config.companies {
3338                    let result = inv_val_gen.generate(
3339                        &company.code,
3340                        &subledger.inventory_positions,
3341                        as_of_date,
3342                    );
3343                    subledger.inventory_valuations.push(result);
3344                }
3345                debug!(
3346                    "Inventory valuations generated: {} company reports",
3347                    subledger.inventory_valuations.len()
3348                );
3349            }
3350        }
3351
3352        Ok((document_flows, subledger, fa_journal_entries))
3353    }
3354
3355    /// Phase 3c: Generate OCPM events from document flows.
3356    #[allow(clippy::too_many_arguments)]
3357    fn phase_ocpm_events(
3358        &mut self,
3359        document_flows: &DocumentFlowSnapshot,
3360        sourcing: &SourcingSnapshot,
3361        hr: &HrSnapshot,
3362        manufacturing: &ManufacturingSnapshot,
3363        banking: &BankingSnapshot,
3364        audit: &AuditSnapshot,
3365        financial_reporting: &FinancialReportingSnapshot,
3366        stats: &mut EnhancedGenerationStatistics,
3367    ) -> SynthResult<OcpmSnapshot> {
3368        let degradation = self.check_resources()?;
3369        if degradation >= DegradationLevel::Reduced {
3370            debug!(
3371                "Phase skipped due to resource pressure (degradation: {:?})",
3372                degradation
3373            );
3374            return Ok(OcpmSnapshot::default());
3375        }
3376        if self.phase_config.generate_ocpm_events {
3377            info!("Phase 3c: Generating OCPM Events");
3378            let ocpm_snapshot = self.generate_ocpm_events(
3379                document_flows,
3380                sourcing,
3381                hr,
3382                manufacturing,
3383                banking,
3384                audit,
3385                financial_reporting,
3386            )?;
3387            stats.ocpm_event_count = ocpm_snapshot.event_count;
3388            stats.ocpm_object_count = ocpm_snapshot.object_count;
3389            stats.ocpm_case_count = ocpm_snapshot.case_count;
3390            info!(
3391                "OCPM events generated: {} events, {} objects, {} cases",
3392                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
3393            );
3394            self.check_resources_with_log("post-ocpm")?;
3395            Ok(ocpm_snapshot)
3396        } else {
3397            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
3398            Ok(OcpmSnapshot::default())
3399        }
3400    }
3401
3402    /// Phase 4: Generate journal entries from document flows and standalone generation.
3403    fn phase_journal_entries(
3404        &mut self,
3405        coa: &Arc<ChartOfAccounts>,
3406        document_flows: &DocumentFlowSnapshot,
3407        _stats: &mut EnhancedGenerationStatistics,
3408    ) -> SynthResult<Vec<JournalEntry>> {
3409        let mut entries = Vec::new();
3410
3411        // Phase 4a: Generate JEs from document flows (for data coherence)
3412        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
3413            debug!("Phase 4a: Generating JEs from document flows");
3414            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
3415            debug!("Generated {} JEs from document flows", flow_entries.len());
3416            entries.extend(flow_entries);
3417        }
3418
3419        // Phase 4b: Generate standalone journal entries
3420        if self.phase_config.generate_journal_entries {
3421            info!("Phase 4: Generating Journal Entries");
3422            let je_entries = self.generate_journal_entries(coa)?;
3423            info!("Generated {} standalone journal entries", je_entries.len());
3424            entries.extend(je_entries);
3425        } else {
3426            debug!("Phase 4: Skipped (journal entry generation disabled)");
3427        }
3428
3429        if !entries.is_empty() {
3430            // Note: stats.total_entries/total_line_items are set in generate()
3431            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
3432            self.check_resources_with_log("post-journal-entries")?;
3433        }
3434
3435        Ok(entries)
3436    }
3437
3438    /// Phase 5: Inject anomalies into journal entries.
3439    fn phase_anomaly_injection(
3440        &mut self,
3441        entries: &mut [JournalEntry],
3442        actions: &DegradationActions,
3443        stats: &mut EnhancedGenerationStatistics,
3444    ) -> SynthResult<AnomalyLabels> {
3445        if self.phase_config.inject_anomalies
3446            && !entries.is_empty()
3447            && !actions.skip_anomaly_injection
3448        {
3449            info!("Phase 5: Injecting Anomalies");
3450            let result = self.inject_anomalies(entries)?;
3451            stats.anomalies_injected = result.labels.len();
3452            info!("Injected {} anomalies", stats.anomalies_injected);
3453            self.check_resources_with_log("post-anomaly-injection")?;
3454            Ok(result)
3455        } else if actions.skip_anomaly_injection {
3456            warn!("Phase 5: Skipped due to resource degradation");
3457            Ok(AnomalyLabels::default())
3458        } else {
3459            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
3460            Ok(AnomalyLabels::default())
3461        }
3462    }
3463
3464    /// Phase 6: Validate balance sheet equation on journal entries.
3465    fn phase_balance_validation(
3466        &mut self,
3467        entries: &[JournalEntry],
3468    ) -> SynthResult<BalanceValidationResult> {
3469        if self.phase_config.validate_balances && !entries.is_empty() {
3470            debug!("Phase 6: Validating Balances");
3471            let balance_validation = self.validate_journal_entries(entries)?;
3472            if balance_validation.is_balanced {
3473                debug!("Balance validation passed");
3474            } else {
3475                warn!(
3476                    "Balance validation found {} errors",
3477                    balance_validation.validation_errors.len()
3478                );
3479            }
3480            Ok(balance_validation)
3481        } else {
3482            Ok(BalanceValidationResult::default())
3483        }
3484    }
3485
3486    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
3487    fn phase_data_quality_injection(
3488        &mut self,
3489        entries: &mut [JournalEntry],
3490        actions: &DegradationActions,
3491        stats: &mut EnhancedGenerationStatistics,
3492    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
3493        if self.phase_config.inject_data_quality
3494            && !entries.is_empty()
3495            && !actions.skip_data_quality
3496        {
3497            info!("Phase 7: Injecting Data Quality Variations");
3498            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
3499            stats.data_quality_issues = dq_stats.records_with_issues;
3500            info!("Injected {} data quality issues", stats.data_quality_issues);
3501            self.check_resources_with_log("post-data-quality")?;
3502            Ok((dq_stats, quality_issues))
3503        } else if actions.skip_data_quality {
3504            warn!("Phase 7: Skipped due to resource degradation");
3505            Ok((DataQualityStats::default(), Vec::new()))
3506        } else {
3507            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
3508            Ok((DataQualityStats::default(), Vec::new()))
3509        }
3510    }
3511
3512    /// Phase 10b: Generate period-close journal entries.
3513    ///
3514    /// Generates:
3515    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
3516    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
3517    ///    for the configured period.
3518    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
3519    /// 3. Income statement closing JE per company: transfer net income after tax to retained
3520    ///    earnings via the Income Summary (3600) clearing account.
3521    fn phase_period_close(
3522        &mut self,
3523        entries: &mut Vec<JournalEntry>,
3524        subledger: &SubledgerSnapshot,
3525        stats: &mut EnhancedGenerationStatistics,
3526    ) -> SynthResult<()> {
3527        if !self.phase_config.generate_period_close || entries.is_empty() {
3528            debug!("Phase 10b: Skipped (period close disabled or no entries)");
3529            return Ok(());
3530        }
3531
3532        info!("Phase 10b: Generating period-close journal entries");
3533
3534        use datasynth_core::accounts::{
3535            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
3536        };
3537        use rust_decimal::Decimal;
3538
3539        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3540            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
3541        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3542        // Posting date for close entries is the last day of the period
3543        let close_date = end_date - chrono::Days::new(1);
3544
3545        // Statutory tax rate (21% — configurable rates come in later tiers)
3546        let tax_rate = Decimal::new(21, 2); // 0.21
3547
3548        // Collect company codes from config
3549        let company_codes: Vec<String> = self
3550            .config
3551            .companies
3552            .iter()
3553            .map(|c| c.code.clone())
3554            .collect();
3555
3556        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
3557        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
3558        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
3559
3560        // --- Depreciation JEs (per asset) ---
3561        // Compute period depreciation for each active fixed asset using straight-line method.
3562        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
3563        let period_months = self.config.global.period_months;
3564        for asset in &subledger.fa_records {
3565            // Skip assets that are inactive / fully depreciated / non-depreciable
3566            use datasynth_core::models::subledger::fa::AssetStatus;
3567            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
3568                continue;
3569            }
3570            let useful_life_months = asset.useful_life_months();
3571            if useful_life_months == 0 {
3572                // Land or CIP — not depreciated
3573                continue;
3574            }
3575            let salvage_value = asset.salvage_value();
3576            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
3577            if depreciable_base == Decimal::ZERO {
3578                continue;
3579            }
3580            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
3581                * Decimal::from(period_months))
3582            .round_dp(2);
3583            if period_depr <= Decimal::ZERO {
3584                continue;
3585            }
3586
3587            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
3588            depr_header.document_type = "CL".to_string();
3589            depr_header.header_text = Some(format!(
3590                "Depreciation - {} {}",
3591                asset.asset_number, asset.description
3592            ));
3593            depr_header.created_by = "CLOSE_ENGINE".to_string();
3594            depr_header.source = TransactionSource::Automated;
3595            depr_header.business_process = Some(BusinessProcess::R2R);
3596
3597            let doc_id = depr_header.document_id;
3598            let mut depr_je = JournalEntry::new(depr_header);
3599
3600            // DR Depreciation Expense (6000)
3601            depr_je.add_line(JournalEntryLine::debit(
3602                doc_id,
3603                1,
3604                expense_accounts::DEPRECIATION.to_string(),
3605                period_depr,
3606            ));
3607            // CR Accumulated Depreciation (1510)
3608            depr_je.add_line(JournalEntryLine::credit(
3609                doc_id,
3610                2,
3611                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
3612                period_depr,
3613            ));
3614
3615            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
3616            close_jes.push(depr_je);
3617        }
3618
3619        if !subledger.fa_records.is_empty() {
3620            debug!(
3621                "Generated {} depreciation JEs from {} FA records",
3622                close_jes.len(),
3623                subledger.fa_records.len()
3624            );
3625        }
3626
3627        // --- Accrual entries (standard period-end accruals per company) ---
3628        // Generate standard accrued expense entries (utilities, rent, interest) using
3629        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
3630        {
3631            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
3632            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
3633
3634            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
3635            let accrual_items: &[(&str, &str, &str)] = &[
3636                ("Accrued Utilities", "6200", "2100"),
3637                ("Accrued Rent", "6300", "2100"),
3638                ("Accrued Interest", "6100", "2150"),
3639            ];
3640
3641            for company_code in &company_codes {
3642                // Estimate company revenue from existing JEs
3643                let company_revenue: Decimal = entries
3644                    .iter()
3645                    .filter(|e| e.header.company_code == *company_code)
3646                    .flat_map(|e| e.lines.iter())
3647                    .filter(|l| l.gl_account.starts_with('4'))
3648                    .map(|l| l.credit_amount - l.debit_amount)
3649                    .fold(Decimal::ZERO, |acc, v| acc + v);
3650
3651                if company_revenue <= Decimal::ZERO {
3652                    continue;
3653                }
3654
3655                // Use 0.5% of period revenue per accrual item as a proxy
3656                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
3657                if accrual_base <= Decimal::ZERO {
3658                    continue;
3659                }
3660
3661                for (description, expense_acct, liability_acct) in accrual_items {
3662                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
3663                        company_code,
3664                        description,
3665                        accrual_base,
3666                        expense_acct,
3667                        liability_acct,
3668                        close_date,
3669                        None,
3670                    );
3671                    close_jes.push(accrual_je);
3672                    if let Some(rev_je) = reversal_je {
3673                        close_jes.push(rev_je);
3674                    }
3675                }
3676            }
3677
3678            debug!(
3679                "Generated accrual entries for {} companies",
3680                company_codes.len()
3681            );
3682        }
3683
3684        for company_code in &company_codes {
3685            // Calculate net income for this company from existing JEs:
3686            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
3687            // Revenue (4xxx): credit-normal, so net = credits - debits
3688            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
3689            let mut total_revenue = Decimal::ZERO;
3690            let mut total_expenses = Decimal::ZERO;
3691
3692            for entry in entries.iter() {
3693                if entry.header.company_code != *company_code {
3694                    continue;
3695                }
3696                for line in &entry.lines {
3697                    let category = AccountCategory::from_account(&line.gl_account);
3698                    match category {
3699                        AccountCategory::Revenue => {
3700                            // Revenue is credit-normal: net revenue = credits - debits
3701                            total_revenue += line.credit_amount - line.debit_amount;
3702                        }
3703                        AccountCategory::Cogs
3704                        | AccountCategory::OperatingExpense
3705                        | AccountCategory::OtherIncomeExpense
3706                        | AccountCategory::Tax => {
3707                            // Expenses are debit-normal: net expense = debits - credits
3708                            total_expenses += line.debit_amount - line.credit_amount;
3709                        }
3710                        _ => {}
3711                    }
3712                }
3713            }
3714
3715            let pre_tax_income = total_revenue - total_expenses;
3716
3717            // Skip if no income statement activity
3718            if pre_tax_income == Decimal::ZERO {
3719                debug!(
3720                    "Company {}: no pre-tax income, skipping period close",
3721                    company_code
3722                );
3723                continue;
3724            }
3725
3726            // --- Tax provision / DTA JE ---
3727            if pre_tax_income > Decimal::ZERO {
3728                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
3729                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
3730
3731                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
3732                tax_header.document_type = "CL".to_string();
3733                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
3734                tax_header.created_by = "CLOSE_ENGINE".to_string();
3735                tax_header.source = TransactionSource::Automated;
3736                tax_header.business_process = Some(BusinessProcess::R2R);
3737
3738                let doc_id = tax_header.document_id;
3739                let mut tax_je = JournalEntry::new(tax_header);
3740
3741                // DR Tax Expense (8000)
3742                tax_je.add_line(JournalEntryLine::debit(
3743                    doc_id,
3744                    1,
3745                    tax_accounts::TAX_EXPENSE.to_string(),
3746                    tax_amount,
3747                ));
3748                // CR Income Tax Payable (2130)
3749                tax_je.add_line(JournalEntryLine::credit(
3750                    doc_id,
3751                    2,
3752                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
3753                    tax_amount,
3754                ));
3755
3756                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
3757                close_jes.push(tax_je);
3758            } else {
3759                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
3760                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
3761                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
3762                if dta_amount > Decimal::ZERO {
3763                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
3764                    dta_header.document_type = "CL".to_string();
3765                    dta_header.header_text =
3766                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
3767                    dta_header.created_by = "CLOSE_ENGINE".to_string();
3768                    dta_header.source = TransactionSource::Automated;
3769                    dta_header.business_process = Some(BusinessProcess::R2R);
3770
3771                    let doc_id = dta_header.document_id;
3772                    let mut dta_je = JournalEntry::new(dta_header);
3773
3774                    // DR Deferred Tax Asset (1600)
3775                    dta_je.add_line(JournalEntryLine::debit(
3776                        doc_id,
3777                        1,
3778                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
3779                        dta_amount,
3780                    ));
3781                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
3782                    // reflecting the benefit of the future deductible temporary difference.
3783                    dta_je.add_line(JournalEntryLine::credit(
3784                        doc_id,
3785                        2,
3786                        tax_accounts::TAX_EXPENSE.to_string(),
3787                        dta_amount,
3788                    ));
3789
3790                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
3791                    close_jes.push(dta_je);
3792                    debug!(
3793                        "Company {}: loss year — recognised DTA of {}",
3794                        company_code, dta_amount
3795                    );
3796                }
3797            }
3798
3799            // --- Dividend JEs (v2.4) ---
3800            // If the entity is profitable after tax, declare a 10% dividend payout.
3801            // This runs AFTER tax provision so the dividend is based on post-tax income
3802            // but BEFORE the retained earnings close so the RE transfer reflects the
3803            // reduced balance.
3804            let tax_provision = if pre_tax_income > Decimal::ZERO {
3805                (pre_tax_income * tax_rate).round_dp(2)
3806            } else {
3807                Decimal::ZERO
3808            };
3809            let net_income = pre_tax_income - tax_provision;
3810
3811            if net_income > Decimal::ZERO {
3812                use datasynth_generators::DividendGenerator;
3813                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
3814                let mut div_gen = DividendGenerator::new(self.seed + 460);
3815                let currency_str = self
3816                    .config
3817                    .companies
3818                    .iter()
3819                    .find(|c| c.code == *company_code)
3820                    .map(|c| c.currency.as_str())
3821                    .unwrap_or("USD");
3822                let div_result = div_gen.generate(
3823                    company_code,
3824                    close_date,
3825                    Decimal::new(1, 0), // $1 per share placeholder
3826                    dividend_amount,
3827                    currency_str,
3828                );
3829                let div_je_count = div_result.journal_entries.len();
3830                close_jes.extend(div_result.journal_entries);
3831                debug!(
3832                    "Company {}: declared dividend of {} ({} JEs)",
3833                    company_code, dividend_amount, div_je_count
3834                );
3835            }
3836
3837            // --- Income statement closing JE ---
3838            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
3839            // For a loss year the DTA JE above already recognises the deferred benefit; here we
3840            // close the pre-tax loss into Retained Earnings as-is.
3841            if net_income != Decimal::ZERO {
3842                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
3843                close_header.document_type = "CL".to_string();
3844                close_header.header_text =
3845                    Some(format!("Income statement close - {}", company_code));
3846                close_header.created_by = "CLOSE_ENGINE".to_string();
3847                close_header.source = TransactionSource::Automated;
3848                close_header.business_process = Some(BusinessProcess::R2R);
3849
3850                let doc_id = close_header.document_id;
3851                let mut close_je = JournalEntry::new(close_header);
3852
3853                let abs_net_income = net_income.abs();
3854
3855                if net_income > Decimal::ZERO {
3856                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
3857                    close_je.add_line(JournalEntryLine::debit(
3858                        doc_id,
3859                        1,
3860                        equity_accounts::INCOME_SUMMARY.to_string(),
3861                        abs_net_income,
3862                    ));
3863                    close_je.add_line(JournalEntryLine::credit(
3864                        doc_id,
3865                        2,
3866                        equity_accounts::RETAINED_EARNINGS.to_string(),
3867                        abs_net_income,
3868                    ));
3869                } else {
3870                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
3871                    close_je.add_line(JournalEntryLine::debit(
3872                        doc_id,
3873                        1,
3874                        equity_accounts::RETAINED_EARNINGS.to_string(),
3875                        abs_net_income,
3876                    ));
3877                    close_je.add_line(JournalEntryLine::credit(
3878                        doc_id,
3879                        2,
3880                        equity_accounts::INCOME_SUMMARY.to_string(),
3881                        abs_net_income,
3882                    ));
3883                }
3884
3885                debug_assert!(
3886                    close_je.is_balanced(),
3887                    "Income statement closing JE must be balanced"
3888                );
3889                close_jes.push(close_je);
3890            }
3891        }
3892
3893        let close_count = close_jes.len();
3894        if close_count > 0 {
3895            info!("Generated {} period-close journal entries", close_count);
3896            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
3897            entries.extend(close_jes);
3898            stats.period_close_je_count = close_count;
3899
3900            // Update total entry/line-item stats
3901            stats.total_entries = entries.len() as u64;
3902            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
3903        } else {
3904            debug!("No period-close entries generated (no income statement activity)");
3905        }
3906
3907        Ok(())
3908    }
3909
3910    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
3911    fn phase_audit_data(
3912        &mut self,
3913        entries: &[JournalEntry],
3914        stats: &mut EnhancedGenerationStatistics,
3915    ) -> SynthResult<AuditSnapshot> {
3916        if self.phase_config.generate_audit {
3917            info!("Phase 8: Generating Audit Data");
3918            let audit_snapshot = self.generate_audit_data(entries)?;
3919            stats.audit_engagement_count = audit_snapshot.engagements.len();
3920            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
3921            stats.audit_evidence_count = audit_snapshot.evidence.len();
3922            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
3923            stats.audit_finding_count = audit_snapshot.findings.len();
3924            stats.audit_judgment_count = audit_snapshot.judgments.len();
3925            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
3926            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
3927            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
3928            stats.audit_sample_count = audit_snapshot.samples.len();
3929            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
3930            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
3931            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
3932            stats.audit_related_party_count = audit_snapshot.related_parties.len();
3933            stats.audit_related_party_transaction_count =
3934                audit_snapshot.related_party_transactions.len();
3935            info!(
3936                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
3937                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
3938                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
3939                 {} RP transactions",
3940                stats.audit_engagement_count,
3941                stats.audit_workpaper_count,
3942                stats.audit_evidence_count,
3943                stats.audit_risk_count,
3944                stats.audit_finding_count,
3945                stats.audit_judgment_count,
3946                stats.audit_confirmation_count,
3947                stats.audit_procedure_step_count,
3948                stats.audit_sample_count,
3949                stats.audit_analytical_result_count,
3950                stats.audit_ia_function_count,
3951                stats.audit_ia_report_count,
3952                stats.audit_related_party_count,
3953                stats.audit_related_party_transaction_count,
3954            );
3955            self.check_resources_with_log("post-audit")?;
3956            Ok(audit_snapshot)
3957        } else {
3958            debug!("Phase 8: Skipped (audit generation disabled)");
3959            Ok(AuditSnapshot::default())
3960        }
3961    }
3962
3963    /// Phase 9: Generate banking KYC/AML data.
3964    fn phase_banking_data(
3965        &mut self,
3966        stats: &mut EnhancedGenerationStatistics,
3967    ) -> SynthResult<BankingSnapshot> {
3968        if self.phase_config.generate_banking {
3969            info!("Phase 9: Generating Banking KYC/AML Data");
3970            let banking_snapshot = self.generate_banking_data()?;
3971            stats.banking_customer_count = banking_snapshot.customers.len();
3972            stats.banking_account_count = banking_snapshot.accounts.len();
3973            stats.banking_transaction_count = banking_snapshot.transactions.len();
3974            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
3975            info!(
3976                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
3977                stats.banking_customer_count, stats.banking_account_count,
3978                stats.banking_transaction_count, stats.banking_suspicious_count
3979            );
3980            self.check_resources_with_log("post-banking")?;
3981            Ok(banking_snapshot)
3982        } else {
3983            debug!("Phase 9: Skipped (banking generation disabled)");
3984            Ok(BankingSnapshot::default())
3985        }
3986    }
3987
3988    /// Phase 10: Export accounting network graphs for ML training.
3989    fn phase_graph_export(
3990        &mut self,
3991        entries: &[JournalEntry],
3992        coa: &Arc<ChartOfAccounts>,
3993        stats: &mut EnhancedGenerationStatistics,
3994    ) -> SynthResult<GraphExportSnapshot> {
3995        if self.phase_config.generate_graph_export && !entries.is_empty() {
3996            info!("Phase 10: Exporting Accounting Network Graphs");
3997            match self.export_graphs(entries, coa, stats) {
3998                Ok(snapshot) => {
3999                    info!(
4000                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4001                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4002                    );
4003                    Ok(snapshot)
4004                }
4005                Err(e) => {
4006                    warn!("Phase 10: Graph export failed: {}", e);
4007                    Ok(GraphExportSnapshot::default())
4008                }
4009            }
4010        } else {
4011            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4012            Ok(GraphExportSnapshot::default())
4013        }
4014    }
4015
4016    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4017    #[allow(clippy::too_many_arguments)]
4018    fn phase_hypergraph_export(
4019        &self,
4020        coa: &Arc<ChartOfAccounts>,
4021        entries: &[JournalEntry],
4022        document_flows: &DocumentFlowSnapshot,
4023        sourcing: &SourcingSnapshot,
4024        hr: &HrSnapshot,
4025        manufacturing: &ManufacturingSnapshot,
4026        banking: &BankingSnapshot,
4027        audit: &AuditSnapshot,
4028        financial_reporting: &FinancialReportingSnapshot,
4029        ocpm: &OcpmSnapshot,
4030        compliance: &ComplianceRegulationsSnapshot,
4031        stats: &mut EnhancedGenerationStatistics,
4032    ) -> SynthResult<()> {
4033        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4034            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4035            match self.export_hypergraph(
4036                coa,
4037                entries,
4038                document_flows,
4039                sourcing,
4040                hr,
4041                manufacturing,
4042                banking,
4043                audit,
4044                financial_reporting,
4045                ocpm,
4046                compliance,
4047                stats,
4048            ) {
4049                Ok(info) => {
4050                    info!(
4051                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4052                        info.node_count, info.edge_count, info.hyperedge_count
4053                    );
4054                }
4055                Err(e) => {
4056                    warn!("Phase 10b: Hypergraph export failed: {}", e);
4057                }
4058            }
4059        } else {
4060            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
4061        }
4062        Ok(())
4063    }
4064
4065    /// Phase 11: LLM Enrichment.
4066    ///
4067    /// Uses an LLM provider (mock by default) to enrich vendor names with
4068    /// realistic, context-aware names. This phase is non-blocking: failures
4069    /// log a warning but do not stop the generation pipeline.
4070    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
4071        if !self.config.llm.enabled {
4072            debug!("Phase 11: Skipped (LLM enrichment disabled)");
4073            return;
4074        }
4075
4076        info!("Phase 11: Starting LLM Enrichment");
4077        let start = std::time::Instant::now();
4078
4079        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4080            // Select provider: use HttpLlmProvider when a non-mock provider is configured
4081            // and the corresponding API key environment variable is present.
4082            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
4083                let schema_provider = &self.config.llm.provider;
4084                let api_key_env = match schema_provider.as_str() {
4085                    "openai" => Some("OPENAI_API_KEY"),
4086                    "anthropic" => Some("ANTHROPIC_API_KEY"),
4087                    "custom" => Some("LLM_API_KEY"),
4088                    _ => None,
4089                };
4090                if let Some(key_env) = api_key_env {
4091                    if std::env::var(key_env).is_ok() {
4092                        let llm_config = datasynth_core::llm::LlmConfig {
4093                            model: self.config.llm.model.clone(),
4094                            api_key_env: key_env.to_string(),
4095                            ..datasynth_core::llm::LlmConfig::default()
4096                        };
4097                        match HttpLlmProvider::new(llm_config) {
4098                            Ok(p) => Arc::new(p),
4099                            Err(e) => {
4100                                warn!(
4101                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
4102                                    e
4103                                );
4104                                Arc::new(MockLlmProvider::new(self.seed))
4105                            }
4106                        }
4107                    } else {
4108                        Arc::new(MockLlmProvider::new(self.seed))
4109                    }
4110                } else {
4111                    Arc::new(MockLlmProvider::new(self.seed))
4112                }
4113            };
4114            let enricher = VendorLlmEnricher::new(provider);
4115
4116            let industry = format!("{:?}", self.config.global.industry);
4117            let max_enrichments = self
4118                .config
4119                .llm
4120                .max_vendor_enrichments
4121                .min(self.master_data.vendors.len());
4122
4123            let mut enriched_count = 0usize;
4124            for vendor in self.master_data.vendors.iter_mut().take(max_enrichments) {
4125                match enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
4126                    Ok(name) => {
4127                        vendor.name = name;
4128                        enriched_count += 1;
4129                    }
4130                    Err(e) => {
4131                        warn!(
4132                            "LLM vendor enrichment failed for {}: {}",
4133                            vendor.vendor_id, e
4134                        );
4135                    }
4136                }
4137            }
4138
4139            enriched_count
4140        }));
4141
4142        match result {
4143            Ok(enriched_count) => {
4144                stats.llm_vendors_enriched = enriched_count;
4145                let elapsed = start.elapsed();
4146                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4147                info!(
4148                    "Phase 11 complete: {} vendors enriched in {}ms",
4149                    enriched_count, stats.llm_enrichment_ms
4150                );
4151            }
4152            Err(_) => {
4153                let elapsed = start.elapsed();
4154                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
4155                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
4156            }
4157        }
4158    }
4159
4160    /// Phase 12: Diffusion Enhancement.
4161    ///
4162    /// Generates a sample set using the statistical diffusion backend to
4163    /// demonstrate distribution-matching data generation. This phase is
4164    /// non-blocking: failures log a warning but do not stop the pipeline.
4165    fn phase_diffusion_enhancement(&self, stats: &mut EnhancedGenerationStatistics) {
4166        if !self.config.diffusion.enabled {
4167            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
4168            return;
4169        }
4170
4171        info!("Phase 12: Starting Diffusion Enhancement");
4172        let start = std::time::Instant::now();
4173
4174        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4175            // Target distribution: transaction amounts (log-normal-like)
4176            let means = vec![5000.0, 3.0, 2.0]; // amount, line_items, approval_level
4177            let stds = vec![2000.0, 1.5, 1.0];
4178
4179            let diffusion_config = DiffusionConfig {
4180                n_steps: self.config.diffusion.n_steps,
4181                seed: self.seed,
4182                ..Default::default()
4183            };
4184
4185            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
4186
4187            let n_samples = self.config.diffusion.sample_size;
4188            let n_features = 3; // amount, line_items, approval_level
4189            let samples = backend.generate(n_samples, n_features, self.seed);
4190
4191            samples.len()
4192        }));
4193
4194        match result {
4195            Ok(sample_count) => {
4196                stats.diffusion_samples_generated = sample_count;
4197                let elapsed = start.elapsed();
4198                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4199                info!(
4200                    "Phase 12 complete: {} diffusion samples generated in {}ms",
4201                    sample_count, stats.diffusion_enhancement_ms
4202                );
4203            }
4204            Err(_) => {
4205                let elapsed = start.elapsed();
4206                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
4207                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
4208            }
4209        }
4210    }
4211
4212    /// Phase 13: Causal Overlay.
4213    ///
4214    /// Builds a structural causal model from a built-in template (e.g.,
4215    /// fraud_detection) and generates causal samples. Optionally validates
4216    /// that the output respects the causal structure. This phase is
4217    /// non-blocking: failures log a warning but do not stop the pipeline.
4218    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
4219        if !self.config.causal.enabled {
4220            debug!("Phase 13: Skipped (causal generation disabled)");
4221            return;
4222        }
4223
4224        info!("Phase 13: Starting Causal Overlay");
4225        let start = std::time::Instant::now();
4226
4227        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4228            // Select template based on config
4229            let graph = match self.config.causal.template.as_str() {
4230                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
4231                _ => CausalGraph::fraud_detection_template(),
4232            };
4233
4234            let scm = StructuralCausalModel::new(graph.clone())
4235                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
4236
4237            let n_samples = self.config.causal.sample_size;
4238            let samples = scm
4239                .generate(n_samples, self.seed)
4240                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
4241
4242            // Optionally validate causal structure
4243            let validation_passed = if self.config.causal.validate {
4244                let report = CausalValidator::validate_causal_structure(&samples, &graph);
4245                if report.valid {
4246                    info!(
4247                        "Causal validation passed: all {} checks OK",
4248                        report.checks.len()
4249                    );
4250                } else {
4251                    warn!(
4252                        "Causal validation: {} violations detected: {:?}",
4253                        report.violations.len(),
4254                        report.violations
4255                    );
4256                }
4257                Some(report.valid)
4258            } else {
4259                None
4260            };
4261
4262            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
4263        }));
4264
4265        match result {
4266            Ok(Ok((sample_count, validation_passed))) => {
4267                stats.causal_samples_generated = sample_count;
4268                stats.causal_validation_passed = validation_passed;
4269                let elapsed = start.elapsed();
4270                stats.causal_generation_ms = elapsed.as_millis() as u64;
4271                info!(
4272                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
4273                    sample_count, stats.causal_generation_ms, validation_passed,
4274                );
4275            }
4276            Ok(Err(e)) => {
4277                let elapsed = start.elapsed();
4278                stats.causal_generation_ms = elapsed.as_millis() as u64;
4279                warn!("Phase 13: Causal generation failed: {}", e);
4280            }
4281            Err(_) => {
4282                let elapsed = start.elapsed();
4283                stats.causal_generation_ms = elapsed.as_millis() as u64;
4284                warn!("Phase 13: Causal generation failed (panic caught), continuing");
4285            }
4286        }
4287    }
4288
4289    /// Phase 14: Generate S2C sourcing data.
4290    fn phase_sourcing_data(
4291        &mut self,
4292        stats: &mut EnhancedGenerationStatistics,
4293    ) -> SynthResult<SourcingSnapshot> {
4294        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
4295            debug!("Phase 14: Skipped (sourcing generation disabled)");
4296            return Ok(SourcingSnapshot::default());
4297        }
4298        let degradation = self.check_resources()?;
4299        if degradation >= DegradationLevel::Reduced {
4300            debug!(
4301                "Phase skipped due to resource pressure (degradation: {:?})",
4302                degradation
4303            );
4304            return Ok(SourcingSnapshot::default());
4305        }
4306
4307        info!("Phase 14: Generating S2C Sourcing Data");
4308        let seed = self.seed;
4309
4310        // Gather vendor data from master data
4311        let vendor_ids: Vec<String> = self
4312            .master_data
4313            .vendors
4314            .iter()
4315            .map(|v| v.vendor_id.clone())
4316            .collect();
4317        if vendor_ids.is_empty() {
4318            debug!("Phase 14: Skipped (no vendors available)");
4319            return Ok(SourcingSnapshot::default());
4320        }
4321
4322        let categories: Vec<(String, String)> = vec![
4323            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
4324            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
4325            ("CAT-IT".to_string(), "IT Equipment".to_string()),
4326            ("CAT-SVC".to_string(), "Professional Services".to_string()),
4327            ("CAT-LOG".to_string(), "Logistics".to_string()),
4328        ];
4329        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
4330            .iter()
4331            .map(|(id, name)| {
4332                (
4333                    id.clone(),
4334                    name.clone(),
4335                    rust_decimal::Decimal::from(100_000),
4336                )
4337            })
4338            .collect();
4339
4340        let company_code = self
4341            .config
4342            .companies
4343            .first()
4344            .map(|c| c.code.as_str())
4345            .unwrap_or("1000");
4346        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4347            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4348        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4349        let fiscal_year = start_date.year() as u16;
4350        let owner_ids: Vec<String> = self
4351            .master_data
4352            .employees
4353            .iter()
4354            .take(5)
4355            .map(|e| e.employee_id.clone())
4356            .collect();
4357        let owner_id = owner_ids
4358            .first()
4359            .map(std::string::String::as_str)
4360            .unwrap_or("BUYER-001");
4361
4362        // Step 1: Spend Analysis
4363        let mut spend_gen = SpendAnalysisGenerator::new(seed);
4364        let spend_analyses =
4365            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
4366
4367        // Step 2: Sourcing Projects
4368        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
4369        let sourcing_projects = if owner_ids.is_empty() {
4370            Vec::new()
4371        } else {
4372            project_gen.generate(
4373                company_code,
4374                &categories_with_spend,
4375                &owner_ids,
4376                start_date,
4377                self.config.global.period_months,
4378            )
4379        };
4380        stats.sourcing_project_count = sourcing_projects.len();
4381
4382        // Step 3: Qualifications
4383        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
4384        let mut qual_gen = QualificationGenerator::new(seed + 2);
4385        let qualifications = qual_gen.generate(
4386            company_code,
4387            &qual_vendor_ids,
4388            sourcing_projects.first().map(|p| p.project_id.as_str()),
4389            owner_id,
4390            start_date,
4391        );
4392
4393        // Step 4: RFx Events
4394        let mut rfx_gen = RfxGenerator::new(seed + 3);
4395        let rfx_events: Vec<RfxEvent> = sourcing_projects
4396            .iter()
4397            .map(|proj| {
4398                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
4399                rfx_gen.generate(
4400                    company_code,
4401                    &proj.project_id,
4402                    &proj.category_id,
4403                    &qualified_vids,
4404                    owner_id,
4405                    start_date,
4406                    50000.0,
4407                )
4408            })
4409            .collect();
4410        stats.rfx_event_count = rfx_events.len();
4411
4412        // Step 5: Bids
4413        let mut bid_gen = BidGenerator::new(seed + 4);
4414        let mut all_bids = Vec::new();
4415        for rfx in &rfx_events {
4416            let bidder_count = vendor_ids.len().clamp(2, 5);
4417            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
4418            let bids = bid_gen.generate(rfx, &responding, start_date);
4419            all_bids.extend(bids);
4420        }
4421        stats.bid_count = all_bids.len();
4422
4423        // Step 6: Bid Evaluations
4424        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
4425        let bid_evaluations: Vec<BidEvaluation> = rfx_events
4426            .iter()
4427            .map(|rfx| {
4428                let rfx_bids: Vec<SupplierBid> = all_bids
4429                    .iter()
4430                    .filter(|b| b.rfx_id == rfx.rfx_id)
4431                    .cloned()
4432                    .collect();
4433                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
4434            })
4435            .collect();
4436
4437        // Step 7: Contracts from winning bids
4438        let mut contract_gen = ContractGenerator::new(seed + 6);
4439        let contracts: Vec<ProcurementContract> = bid_evaluations
4440            .iter()
4441            .zip(rfx_events.iter())
4442            .filter_map(|(eval, rfx)| {
4443                eval.ranked_bids.first().and_then(|winner| {
4444                    all_bids
4445                        .iter()
4446                        .find(|b| b.bid_id == winner.bid_id)
4447                        .map(|winning_bid| {
4448                            contract_gen.generate_from_bid(
4449                                winning_bid,
4450                                Some(&rfx.sourcing_project_id),
4451                                &rfx.category_id,
4452                                owner_id,
4453                                start_date,
4454                            )
4455                        })
4456                })
4457            })
4458            .collect();
4459        stats.contract_count = contracts.len();
4460
4461        // Step 8: Catalog Items
4462        let mut catalog_gen = CatalogGenerator::new(seed + 7);
4463        let catalog_items = catalog_gen.generate(&contracts);
4464        stats.catalog_item_count = catalog_items.len();
4465
4466        // Step 9: Scorecards
4467        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
4468        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
4469            .iter()
4470            .fold(
4471                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
4472                |mut acc, c| {
4473                    acc.entry(c.vendor_id.clone()).or_default().push(c);
4474                    acc
4475                },
4476            )
4477            .into_iter()
4478            .collect();
4479        let scorecards = scorecard_gen.generate(
4480            company_code,
4481            &vendor_contracts,
4482            start_date,
4483            end_date,
4484            owner_id,
4485        );
4486        stats.scorecard_count = scorecards.len();
4487
4488        // Back-populate cross-references on sourcing projects (Task 35)
4489        // Link each project to its RFx events, contracts, and spend analyses
4490        let mut sourcing_projects = sourcing_projects;
4491        for project in &mut sourcing_projects {
4492            // Link RFx events generated for this project
4493            project.rfx_ids = rfx_events
4494                .iter()
4495                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
4496                .map(|rfx| rfx.rfx_id.clone())
4497                .collect();
4498
4499            // Link contract awarded from this project's RFx
4500            project.contract_id = contracts
4501                .iter()
4502                .find(|c| {
4503                    c.sourcing_project_id
4504                        .as_deref()
4505                        .is_some_and(|sp| sp == project.project_id)
4506                })
4507                .map(|c| c.contract_id.clone());
4508
4509            // Link spend analysis for matching category (use category_id as the reference)
4510            project.spend_analysis_id = spend_analyses
4511                .iter()
4512                .find(|sa| sa.category_id == project.category_id)
4513                .map(|sa| sa.category_id.clone());
4514        }
4515
4516        info!(
4517            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
4518            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
4519            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
4520        );
4521        self.check_resources_with_log("post-sourcing")?;
4522
4523        Ok(SourcingSnapshot {
4524            spend_analyses,
4525            sourcing_projects,
4526            qualifications,
4527            rfx_events,
4528            bids: all_bids,
4529            bid_evaluations,
4530            contracts,
4531            catalog_items,
4532            scorecards,
4533        })
4534    }
4535
4536    /// Build a [`GroupStructure`] from the current company configuration.
4537    ///
4538    /// The first company in the configuration is treated as the ultimate parent.
4539    /// All remaining companies become wholly-owned (100 %) subsidiaries with
4540    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
4541    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
4542        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
4543
4544        let parent_code = self
4545            .config
4546            .companies
4547            .first()
4548            .map(|c| c.code.clone())
4549            .unwrap_or_else(|| "PARENT".to_string());
4550
4551        let mut group = GroupStructure::new(parent_code);
4552
4553        for company in self.config.companies.iter().skip(1) {
4554            let sub =
4555                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
4556            group.add_subsidiary(sub);
4557        }
4558
4559        group
4560    }
4561
4562    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
4563    fn phase_intercompany(
4564        &mut self,
4565        journal_entries: &[JournalEntry],
4566        stats: &mut EnhancedGenerationStatistics,
4567    ) -> SynthResult<IntercompanySnapshot> {
4568        // Skip if intercompany is disabled in config
4569        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
4570            debug!("Phase 14b: Skipped (intercompany generation disabled)");
4571            return Ok(IntercompanySnapshot::default());
4572        }
4573
4574        // Intercompany requires at least 2 companies
4575        if self.config.companies.len() < 2 {
4576            debug!(
4577                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
4578                self.config.companies.len()
4579            );
4580            return Ok(IntercompanySnapshot::default());
4581        }
4582
4583        info!("Phase 14b: Generating Intercompany Transactions");
4584
4585        // Build the group structure early — used by ISA 600 component auditor scope
4586        // and consolidated financial statement generators downstream.
4587        let group_structure = self.build_group_structure();
4588        debug!(
4589            "Group structure built: parent={}, subsidiaries={}",
4590            group_structure.parent_entity,
4591            group_structure.subsidiaries.len()
4592        );
4593
4594        let seed = self.seed;
4595        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4596            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4597        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4598
4599        // Build ownership structure from company configs
4600        // First company is treated as the parent, remaining are subsidiaries
4601        let parent_code = self.config.companies[0].code.clone();
4602        let mut ownership_structure =
4603            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
4604
4605        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
4606            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
4607                format!("REL{:03}", i + 1),
4608                parent_code.clone(),
4609                company.code.clone(),
4610                rust_decimal::Decimal::from(100), // Default 100% ownership
4611                start_date,
4612            );
4613            ownership_structure.add_relationship(relationship);
4614        }
4615
4616        // Convert config transfer pricing method to core model enum
4617        let tp_method = match self.config.intercompany.transfer_pricing_method {
4618            datasynth_config::schema::TransferPricingMethod::CostPlus => {
4619                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
4620            }
4621            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
4622                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
4623            }
4624            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
4625                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
4626            }
4627            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
4628                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
4629            }
4630            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
4631                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
4632            }
4633        };
4634
4635        // Build IC generator config from schema config
4636        let ic_currency = self
4637            .config
4638            .companies
4639            .first()
4640            .map(|c| c.currency.clone())
4641            .unwrap_or_else(|| "USD".to_string());
4642        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
4643            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
4644            transfer_pricing_method: tp_method,
4645            markup_percent: rust_decimal::Decimal::from_f64_retain(
4646                self.config.intercompany.markup_percent,
4647            )
4648            .unwrap_or(rust_decimal::Decimal::from(5)),
4649            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
4650            default_currency: ic_currency,
4651            ..Default::default()
4652        };
4653
4654        // Create IC generator
4655        let mut ic_generator = datasynth_generators::ICGenerator::new(
4656            ic_gen_config,
4657            ownership_structure.clone(),
4658            seed + 50,
4659        );
4660
4661        // Generate IC transactions for the period
4662        // Use ~3 transactions per day as a reasonable default
4663        let transactions_per_day = 3;
4664        let matched_pairs = ic_generator.generate_transactions_for_period(
4665            start_date,
4666            end_date,
4667            transactions_per_day,
4668        );
4669
4670        // Generate IC source P2P/O2C documents
4671        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
4672        debug!(
4673            "Generated {} IC seller invoices, {} IC buyer POs",
4674            ic_doc_chains.seller_invoices.len(),
4675            ic_doc_chains.buyer_orders.len()
4676        );
4677
4678        // Generate journal entries from matched pairs
4679        let mut seller_entries = Vec::new();
4680        let mut buyer_entries = Vec::new();
4681        let fiscal_year = start_date.year();
4682
4683        for pair in &matched_pairs {
4684            let fiscal_period = pair.posting_date.month();
4685            let (seller_je, buyer_je) =
4686                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
4687            seller_entries.push(seller_je);
4688            buyer_entries.push(buyer_je);
4689        }
4690
4691        // Run matching engine
4692        let matching_config = datasynth_generators::ICMatchingConfig {
4693            base_currency: self
4694                .config
4695                .companies
4696                .first()
4697                .map(|c| c.currency.clone())
4698                .unwrap_or_else(|| "USD".to_string()),
4699            ..Default::default()
4700        };
4701        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
4702        matching_engine.load_matched_pairs(&matched_pairs);
4703        let matching_result = matching_engine.run_matching(end_date);
4704
4705        // Generate elimination entries if configured
4706        let mut elimination_entries = Vec::new();
4707        if self.config.intercompany.generate_eliminations {
4708            let elim_config = datasynth_generators::EliminationConfig {
4709                consolidation_entity: "GROUP".to_string(),
4710                base_currency: self
4711                    .config
4712                    .companies
4713                    .first()
4714                    .map(|c| c.currency.clone())
4715                    .unwrap_or_else(|| "USD".to_string()),
4716                ..Default::default()
4717            };
4718
4719            let mut elim_generator =
4720                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
4721
4722            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
4723            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
4724                matching_result
4725                    .matched_balances
4726                    .iter()
4727                    .chain(matching_result.unmatched_balances.iter())
4728                    .cloned()
4729                    .collect();
4730
4731            // Build investment and equity maps from the group structure so that the
4732            // elimination generator can produce equity-investment elimination entries
4733            // (parent's investment in subsidiary vs. subsidiary's equity capital).
4734            //
4735            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
4736            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
4737            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
4738            //
4739            // Net assets are derived from the journal entries using account-range heuristics:
4740            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
4741            // no JE data is available (IC phase runs early in the generation pipeline).
4742            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
4743                std::collections::HashMap::new();
4744            let mut equity_amounts: std::collections::HashMap<
4745                String,
4746                std::collections::HashMap<String, rust_decimal::Decimal>,
4747            > = std::collections::HashMap::new();
4748            {
4749                use rust_decimal::Decimal;
4750                let hundred = Decimal::from(100u32);
4751                let ten_pct = Decimal::new(10, 2); // 0.10
4752                let thirty_pct = Decimal::new(30, 2); // 0.30
4753                let sixty_pct = Decimal::new(60, 2); // 0.60
4754                let parent_code = &group_structure.parent_entity;
4755                for sub in &group_structure.subsidiaries {
4756                    let net_assets = {
4757                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4758                        if na > Decimal::ZERO {
4759                            na
4760                        } else {
4761                            Decimal::from(1_000_000u64)
4762                        }
4763                    };
4764                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
4765                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
4766                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
4767
4768                    // Split subsidiary equity into conventional components:
4769                    // 10 % share capital / 30 % APIC / 60 % retained earnings
4770                    let mut eq_map = std::collections::HashMap::new();
4771                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
4772                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
4773                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
4774                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
4775                }
4776            }
4777
4778            let journal = elim_generator.generate_eliminations(
4779                &fiscal_period,
4780                end_date,
4781                &all_balances,
4782                &matched_pairs,
4783                &investment_amounts,
4784                &equity_amounts,
4785            );
4786
4787            elimination_entries = journal.entries.clone();
4788        }
4789
4790        let matched_pair_count = matched_pairs.len();
4791        let elimination_entry_count = elimination_entries.len();
4792        let match_rate = matching_result.match_rate;
4793
4794        stats.ic_matched_pair_count = matched_pair_count;
4795        stats.ic_elimination_count = elimination_entry_count;
4796        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
4797
4798        info!(
4799            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
4800            matched_pair_count,
4801            stats.ic_transaction_count,
4802            seller_entries.len(),
4803            buyer_entries.len(),
4804            elimination_entry_count,
4805            match_rate * 100.0
4806        );
4807        self.check_resources_with_log("post-intercompany")?;
4808
4809        // ----------------------------------------------------------------
4810        // NCI measurements: derive from group structure ownership percentages
4811        // ----------------------------------------------------------------
4812        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
4813            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
4814            use rust_decimal::Decimal;
4815
4816            let eight_pct = Decimal::new(8, 2); // 0.08
4817
4818            group_structure
4819                .subsidiaries
4820                .iter()
4821                .filter(|sub| {
4822                    sub.nci_percentage > Decimal::ZERO
4823                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
4824                })
4825                .map(|sub| {
4826                    // Compute net assets from actual journal entries for this subsidiary.
4827                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
4828                    // IC phase runs before the main JE batch has been populated).
4829                    let net_assets_from_jes =
4830                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
4831
4832                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
4833                        net_assets_from_jes.round_dp(2)
4834                    } else {
4835                        // Fallback: use a plausible base amount
4836                        Decimal::from(1_000_000u64)
4837                    };
4838
4839                    // Net income approximated as 8% of net assets
4840                    let net_income = (net_assets * eight_pct).round_dp(2);
4841
4842                    NciMeasurement::compute(
4843                        sub.entity_code.clone(),
4844                        sub.nci_percentage,
4845                        net_assets,
4846                        net_income,
4847                    )
4848                })
4849                .collect()
4850        };
4851
4852        if !nci_measurements.is_empty() {
4853            info!(
4854                "NCI measurements: {} subsidiaries with non-controlling interests",
4855                nci_measurements.len()
4856            );
4857        }
4858
4859        Ok(IntercompanySnapshot {
4860            group_structure: Some(group_structure),
4861            matched_pairs,
4862            seller_journal_entries: seller_entries,
4863            buyer_journal_entries: buyer_entries,
4864            elimination_entries,
4865            nci_measurements,
4866            ic_document_chains: Some(ic_doc_chains),
4867            matched_pair_count,
4868            elimination_entry_count,
4869            match_rate,
4870        })
4871    }
4872
4873    /// Phase 15: Generate bank reconciliations and financial statements.
4874    fn phase_financial_reporting(
4875        &mut self,
4876        document_flows: &DocumentFlowSnapshot,
4877        journal_entries: &[JournalEntry],
4878        coa: &Arc<ChartOfAccounts>,
4879        _hr: &HrSnapshot,
4880        _audit: &AuditSnapshot,
4881        stats: &mut EnhancedGenerationStatistics,
4882    ) -> SynthResult<FinancialReportingSnapshot> {
4883        let fs_enabled = self.phase_config.generate_financial_statements
4884            || self.config.financial_reporting.enabled;
4885        let br_enabled = self.phase_config.generate_bank_reconciliation;
4886
4887        if !fs_enabled && !br_enabled {
4888            debug!("Phase 15: Skipped (financial reporting disabled)");
4889            return Ok(FinancialReportingSnapshot::default());
4890        }
4891
4892        info!("Phase 15: Generating Financial Reporting Data");
4893
4894        let seed = self.seed;
4895        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4896            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4897
4898        let mut financial_statements = Vec::new();
4899        let mut bank_reconciliations = Vec::new();
4900        let mut trial_balances = Vec::new();
4901        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
4902        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
4903            Vec::new();
4904        // Standalone statements keyed by entity code
4905        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
4906            std::collections::HashMap::new();
4907        // Consolidated statements (one per period)
4908        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
4909        // Consolidation schedules (one per period)
4910        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
4911
4912        // Generate financial statements from JE-derived trial balances.
4913        //
4914        // When journal entries are available, we use cumulative trial balances for
4915        // balance sheet accounts and current-period trial balances for income
4916        // statement accounts. We also track prior-period trial balances so the
4917        // generator can produce comparative amounts, and we build a proper
4918        // cash flow statement from working capital changes rather than random data.
4919        if fs_enabled {
4920            let has_journal_entries = !journal_entries.is_empty();
4921
4922            // Use FinancialStatementGenerator for balance sheet and income statement,
4923            // but build cash flow ourselves from TB data when JEs are available.
4924            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
4925            // Separate generator for consolidated statements (different seed offset)
4926            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
4927
4928            // Collect elimination JEs once (reused across periods)
4929            let elimination_entries: Vec<&JournalEntry> = journal_entries
4930                .iter()
4931                .filter(|je| je.header.is_elimination)
4932                .collect();
4933
4934            // Generate one set of statements per period, per entity
4935            for period in 0..self.config.global.period_months {
4936                let period_start = start_date + chrono::Months::new(period);
4937                let period_end =
4938                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
4939                let fiscal_year = period_end.year() as u16;
4940                let fiscal_period = period_end.month() as u8;
4941                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
4942
4943                // Build per-entity trial balances for this period (non-elimination JEs)
4944                // We accumulate them for the consolidation step.
4945                let mut entity_tb_map: std::collections::HashMap<
4946                    String,
4947                    std::collections::HashMap<String, rust_decimal::Decimal>,
4948                > = std::collections::HashMap::new();
4949
4950                // --- Standalone: one set of statements per company ---
4951                for (company_idx, company) in self.config.companies.iter().enumerate() {
4952                    let company_code = company.code.as_str();
4953                    let currency = company.currency.as_str();
4954                    // Use a unique seed offset per company to keep statements deterministic
4955                    // and distinct across companies
4956                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
4957                    let mut company_fs_gen =
4958                        FinancialStatementGenerator::new(seed + company_seed_offset);
4959
4960                    if has_journal_entries {
4961                        let tb_entries = Self::build_cumulative_trial_balance(
4962                            journal_entries,
4963                            coa,
4964                            company_code,
4965                            start_date,
4966                            period_end,
4967                            fiscal_year,
4968                            fiscal_period,
4969                        );
4970
4971                        // Accumulate per-entity category balances for consolidation
4972                        let entity_cat_map =
4973                            entity_tb_map.entry(company_code.to_string()).or_default();
4974                        for tb_entry in &tb_entries {
4975                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
4976                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
4977                        }
4978
4979                        let stmts = company_fs_gen.generate(
4980                            company_code,
4981                            currency,
4982                            &tb_entries,
4983                            period_start,
4984                            period_end,
4985                            fiscal_year,
4986                            fiscal_period,
4987                            None,
4988                            "SYS-AUTOCLOSE",
4989                        );
4990
4991                        let mut entity_stmts = Vec::new();
4992                        for stmt in stmts {
4993                            if stmt.statement_type == StatementType::CashFlowStatement {
4994                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
4995                                let cf_items = Self::build_cash_flow_from_trial_balances(
4996                                    &tb_entries,
4997                                    None,
4998                                    net_income,
4999                                );
5000                                entity_stmts.push(FinancialStatement {
5001                                    cash_flow_items: cf_items,
5002                                    ..stmt
5003                                });
5004                            } else {
5005                                entity_stmts.push(stmt);
5006                            }
5007                        }
5008
5009                        // Add to the flat financial_statements list (used by KPI/budget)
5010                        financial_statements.extend(entity_stmts.clone());
5011
5012                        // Store standalone per-entity
5013                        standalone_statements
5014                            .entry(company_code.to_string())
5015                            .or_default()
5016                            .extend(entity_stmts);
5017
5018                        // Only store trial balance for the first company in the period
5019                        // to avoid duplicates in the trial_balances list
5020                        if company_idx == 0 {
5021                            trial_balances.push(PeriodTrialBalance {
5022                                fiscal_year,
5023                                fiscal_period,
5024                                period_start,
5025                                period_end,
5026                                entries: tb_entries,
5027                            });
5028                        }
5029                    } else {
5030                        // Fallback: no JEs available
5031                        let tb_entries = Self::build_trial_balance_from_entries(
5032                            journal_entries,
5033                            coa,
5034                            company_code,
5035                            fiscal_year,
5036                            fiscal_period,
5037                        );
5038
5039                        let stmts = company_fs_gen.generate(
5040                            company_code,
5041                            currency,
5042                            &tb_entries,
5043                            period_start,
5044                            period_end,
5045                            fiscal_year,
5046                            fiscal_period,
5047                            None,
5048                            "SYS-AUTOCLOSE",
5049                        );
5050                        financial_statements.extend(stmts.clone());
5051                        standalone_statements
5052                            .entry(company_code.to_string())
5053                            .or_default()
5054                            .extend(stmts);
5055
5056                        if company_idx == 0 && !tb_entries.is_empty() {
5057                            trial_balances.push(PeriodTrialBalance {
5058                                fiscal_year,
5059                                fiscal_period,
5060                                period_start,
5061                                period_end,
5062                                entries: tb_entries,
5063                            });
5064                        }
5065                    }
5066                }
5067
5068                // --- Consolidated: aggregate all entities + apply eliminations ---
5069                // Use the primary (first) company's currency for the consolidated statement
5070                let group_currency = self
5071                    .config
5072                    .companies
5073                    .first()
5074                    .map(|c| c.currency.as_str())
5075                    .unwrap_or("USD");
5076
5077                // Build owned elimination entries for this period
5078                let period_eliminations: Vec<JournalEntry> = elimination_entries
5079                    .iter()
5080                    .filter(|je| {
5081                        je.header.fiscal_year == fiscal_year
5082                            && je.header.fiscal_period == fiscal_period
5083                    })
5084                    .map(|je| (*je).clone())
5085                    .collect();
5086
5087                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
5088                    &entity_tb_map,
5089                    &period_eliminations,
5090                    &period_label,
5091                );
5092
5093                // Build a pseudo trial balance from consolidated line items for the
5094                // FinancialStatementGenerator to use (only for cash flow direction).
5095                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
5096                    .line_items
5097                    .iter()
5098                    .map(|li| {
5099                        let net = li.post_elimination_total;
5100                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
5101                            (net, rust_decimal::Decimal::ZERO)
5102                        } else {
5103                            (rust_decimal::Decimal::ZERO, -net)
5104                        };
5105                        datasynth_generators::TrialBalanceEntry {
5106                            account_code: li.account_category.clone(),
5107                            account_name: li.account_category.clone(),
5108                            category: li.account_category.clone(),
5109                            debit_balance: debit,
5110                            credit_balance: credit,
5111                        }
5112                    })
5113                    .collect();
5114
5115                let mut cons_stmts = cons_gen.generate(
5116                    "GROUP",
5117                    group_currency,
5118                    &cons_tb,
5119                    period_start,
5120                    period_end,
5121                    fiscal_year,
5122                    fiscal_period,
5123                    None,
5124                    "SYS-AUTOCLOSE",
5125                );
5126
5127                // Split consolidated line items by statement type.
5128                // The consolidation generator returns BS items first, then IS items,
5129                // identified by their CONS- prefix and category.
5130                let bs_categories: &[&str] = &[
5131                    "CASH",
5132                    "RECEIVABLES",
5133                    "INVENTORY",
5134                    "FIXEDASSETS",
5135                    "PAYABLES",
5136                    "ACCRUEDLIABILITIES",
5137                    "LONGTERMDEBT",
5138                    "EQUITY",
5139                ];
5140                let (bs_items, is_items): (Vec<_>, Vec<_>) =
5141                    cons_line_items.into_iter().partition(|li| {
5142                        let upper = li.label.to_uppercase();
5143                        bs_categories.iter().any(|c| upper == *c)
5144                    });
5145
5146                for stmt in &mut cons_stmts {
5147                    stmt.is_consolidated = true;
5148                    match stmt.statement_type {
5149                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
5150                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
5151                        _ => {} // CF and equity change statements keep generator output
5152                    }
5153                }
5154
5155                consolidated_statements.extend(cons_stmts);
5156                consolidation_schedules.push(schedule);
5157            }
5158
5159            // Backward compat: if only 1 company, use existing code path logic
5160            // (prior_cumulative_tb for comparative amounts). Already handled above;
5161            // the prior_ref is omitted to keep this change minimal.
5162            let _ = &mut fs_gen; // suppress unused warning
5163
5164            stats.financial_statement_count = financial_statements.len();
5165            info!(
5166                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
5167                stats.financial_statement_count,
5168                consolidated_statements.len(),
5169                has_journal_entries
5170            );
5171
5172            // ----------------------------------------------------------------
5173            // IFRS 8 / ASC 280: Operating Segment Reporting
5174            // ----------------------------------------------------------------
5175            // Build entity seeds from the company configuration.
5176            let entity_seeds: Vec<SegmentSeed> = self
5177                .config
5178                .companies
5179                .iter()
5180                .map(|c| SegmentSeed {
5181                    code: c.code.clone(),
5182                    name: c.name.clone(),
5183                    currency: c.currency.clone(),
5184                })
5185                .collect();
5186
5187            let mut seg_gen = SegmentGenerator::new(seed + 30);
5188
5189            // Generate one set of segment reports per period.
5190            // We extract consolidated revenue / profit / assets from the consolidated
5191            // financial statements produced above, falling back to simple sums when
5192            // no consolidated statements were generated (single-entity path).
5193            for period in 0..self.config.global.period_months {
5194                let period_end =
5195                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5196                let fiscal_year = period_end.year() as u16;
5197                let fiscal_period = period_end.month() as u8;
5198                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
5199
5200                use datasynth_core::models::StatementType;
5201
5202                // Try to find consolidated income statement for this period
5203                let cons_is = consolidated_statements.iter().find(|s| {
5204                    s.fiscal_year == fiscal_year
5205                        && s.fiscal_period == fiscal_period
5206                        && s.statement_type == StatementType::IncomeStatement
5207                });
5208                let cons_bs = consolidated_statements.iter().find(|s| {
5209                    s.fiscal_year == fiscal_year
5210                        && s.fiscal_period == fiscal_period
5211                        && s.statement_type == StatementType::BalanceSheet
5212                });
5213
5214                // If consolidated statements not available fall back to the flat list
5215                let is_stmt = cons_is.or_else(|| {
5216                    financial_statements.iter().find(|s| {
5217                        s.fiscal_year == fiscal_year
5218                            && s.fiscal_period == fiscal_period
5219                            && s.statement_type == StatementType::IncomeStatement
5220                    })
5221                });
5222                let bs_stmt = cons_bs.or_else(|| {
5223                    financial_statements.iter().find(|s| {
5224                        s.fiscal_year == fiscal_year
5225                            && s.fiscal_period == fiscal_period
5226                            && s.statement_type == StatementType::BalanceSheet
5227                    })
5228                });
5229
5230                let consolidated_revenue = is_stmt
5231                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5232                    .map(|li| -li.amount) // revenue is stored as negative in IS
5233                    .unwrap_or(rust_decimal::Decimal::ZERO);
5234
5235                let consolidated_profit = is_stmt
5236                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
5237                    .map(|li| li.amount)
5238                    .unwrap_or(rust_decimal::Decimal::ZERO);
5239
5240                let consolidated_assets = bs_stmt
5241                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
5242                    .map(|li| li.amount)
5243                    .unwrap_or(rust_decimal::Decimal::ZERO);
5244
5245                // Skip periods where we have no financial data
5246                if consolidated_revenue == rust_decimal::Decimal::ZERO
5247                    && consolidated_assets == rust_decimal::Decimal::ZERO
5248                {
5249                    continue;
5250                }
5251
5252                let group_code = self
5253                    .config
5254                    .companies
5255                    .first()
5256                    .map(|c| c.code.as_str())
5257                    .unwrap_or("GROUP");
5258
5259                // Compute period depreciation from JEs with document type "CL" hitting account
5260                // 6000 (depreciation expense).  These are generated by phase_period_close.
5261                let total_depr: rust_decimal::Decimal = journal_entries
5262                    .iter()
5263                    .filter(|je| je.header.document_type == "CL")
5264                    .flat_map(|je| je.lines.iter())
5265                    .filter(|l| l.gl_account.starts_with("6000"))
5266                    .map(|l| l.debit_amount)
5267                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
5268                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
5269                    Some(total_depr)
5270                } else {
5271                    None
5272                };
5273
5274                let (segs, recon) = seg_gen.generate(
5275                    group_code,
5276                    &period_label,
5277                    consolidated_revenue,
5278                    consolidated_profit,
5279                    consolidated_assets,
5280                    &entity_seeds,
5281                    depr_param,
5282                );
5283                segment_reports.extend(segs);
5284                segment_reconciliations.push(recon);
5285            }
5286
5287            info!(
5288                "Segment reports generated: {} segments, {} reconciliations",
5289                segment_reports.len(),
5290                segment_reconciliations.len()
5291            );
5292        }
5293
5294        // Generate bank reconciliations from payment data
5295        if br_enabled && !document_flows.payments.is_empty() {
5296            let employee_ids: Vec<String> = self
5297                .master_data
5298                .employees
5299                .iter()
5300                .map(|e| e.employee_id.clone())
5301                .collect();
5302            let mut br_gen =
5303                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
5304
5305            // Group payments by company code and period
5306            for company in &self.config.companies {
5307                let company_payments: Vec<PaymentReference> = document_flows
5308                    .payments
5309                    .iter()
5310                    .filter(|p| p.header.company_code == company.code)
5311                    .map(|p| PaymentReference {
5312                        id: p.header.document_id.clone(),
5313                        amount: if p.is_vendor { p.amount } else { -p.amount },
5314                        date: p.header.document_date,
5315                        reference: p
5316                            .check_number
5317                            .clone()
5318                            .or_else(|| p.wire_reference.clone())
5319                            .unwrap_or_else(|| p.header.document_id.clone()),
5320                    })
5321                    .collect();
5322
5323                if company_payments.is_empty() {
5324                    continue;
5325                }
5326
5327                let bank_account_id = format!("{}-MAIN", company.code);
5328
5329                // Generate one reconciliation per period
5330                for period in 0..self.config.global.period_months {
5331                    let period_start = start_date + chrono::Months::new(period);
5332                    let period_end =
5333                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
5334
5335                    let period_payments: Vec<PaymentReference> = company_payments
5336                        .iter()
5337                        .filter(|p| p.date >= period_start && p.date <= period_end)
5338                        .cloned()
5339                        .collect();
5340
5341                    let recon = br_gen.generate(
5342                        &company.code,
5343                        &bank_account_id,
5344                        period_start,
5345                        period_end,
5346                        &company.currency,
5347                        &period_payments,
5348                    );
5349                    bank_reconciliations.push(recon);
5350                }
5351            }
5352            info!(
5353                "Bank reconciliations generated: {} reconciliations",
5354                bank_reconciliations.len()
5355            );
5356        }
5357
5358        stats.bank_reconciliation_count = bank_reconciliations.len();
5359        self.check_resources_with_log("post-financial-reporting")?;
5360
5361        if !trial_balances.is_empty() {
5362            info!(
5363                "Period-close trial balances captured: {} periods",
5364                trial_balances.len()
5365            );
5366        }
5367
5368        // Notes to financial statements are generated in a separate post-processing step
5369        // (generate_notes_to_financial_statements) called after accounting_standards and tax
5370        // phases have completed, so that deferred tax and provision data can be wired in.
5371        let notes_to_financial_statements = Vec::new();
5372
5373        Ok(FinancialReportingSnapshot {
5374            financial_statements,
5375            standalone_statements,
5376            consolidated_statements,
5377            consolidation_schedules,
5378            bank_reconciliations,
5379            trial_balances,
5380            segment_reports,
5381            segment_reconciliations,
5382            notes_to_financial_statements,
5383        })
5384    }
5385
5386    /// Populate notes to financial statements using fully-resolved snapshots.
5387    ///
5388    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
5389    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
5390    /// can be wired into the notes context.  The method mutates
5391    /// `financial_reporting.notes_to_financial_statements` in-place.
5392    fn generate_notes_to_financial_statements(
5393        &self,
5394        financial_reporting: &mut FinancialReportingSnapshot,
5395        accounting_standards: &AccountingStandardsSnapshot,
5396        tax: &TaxSnapshot,
5397        hr: &HrSnapshot,
5398        audit: &AuditSnapshot,
5399        treasury: &TreasurySnapshot,
5400    ) {
5401        use datasynth_config::schema::AccountingFrameworkConfig;
5402        use datasynth_core::models::StatementType;
5403        use datasynth_generators::period_close::notes_generator::{
5404            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
5405        };
5406
5407        let seed = self.seed;
5408        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5409        {
5410            Ok(d) => d,
5411            Err(_) => return,
5412        };
5413
5414        let mut notes_gen = NotesGenerator::new(seed + 4235);
5415
5416        for company in &self.config.companies {
5417            let last_period_end = start_date
5418                + chrono::Months::new(self.config.global.period_months)
5419                - chrono::Days::new(1);
5420            let fiscal_year = last_period_end.year() as u16;
5421
5422            // Extract relevant amounts from the already-generated financial statements
5423            let entity_is = financial_reporting
5424                .standalone_statements
5425                .get(&company.code)
5426                .and_then(|stmts| {
5427                    stmts.iter().find(|s| {
5428                        s.fiscal_year == fiscal_year
5429                            && s.statement_type == StatementType::IncomeStatement
5430                    })
5431                });
5432            let entity_bs = financial_reporting
5433                .standalone_statements
5434                .get(&company.code)
5435                .and_then(|stmts| {
5436                    stmts.iter().find(|s| {
5437                        s.fiscal_year == fiscal_year
5438                            && s.statement_type == StatementType::BalanceSheet
5439                    })
5440                });
5441
5442            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
5443            let revenue_amount = entity_is
5444                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
5445                .map(|li| li.amount);
5446            let ppe_gross = entity_bs
5447                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
5448                .map(|li| li.amount);
5449
5450            let framework = match self
5451                .config
5452                .accounting_standards
5453                .framework
5454                .unwrap_or_default()
5455            {
5456                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
5457                    "IFRS".to_string()
5458                }
5459                _ => "US GAAP".to_string(),
5460            };
5461
5462            // ---- Deferred tax (IAS 12 / ASC 740) ----
5463            // Sum closing DTA and DTL from rollforward entries for this entity.
5464            let (entity_dta, entity_dtl) = {
5465                let mut dta = rust_decimal::Decimal::ZERO;
5466                let mut dtl = rust_decimal::Decimal::ZERO;
5467                for rf in &tax.deferred_tax.rollforwards {
5468                    if rf.entity_code == company.code {
5469                        dta += rf.closing_dta;
5470                        dtl += rf.closing_dtl;
5471                    }
5472                }
5473                (
5474                    if dta > rust_decimal::Decimal::ZERO {
5475                        Some(dta)
5476                    } else {
5477                        None
5478                    },
5479                    if dtl > rust_decimal::Decimal::ZERO {
5480                        Some(dtl)
5481                    } else {
5482                        None
5483                    },
5484                )
5485            };
5486
5487            // ---- Provisions (IAS 37 / ASC 450) ----
5488            // Filter provisions to this entity; sum best_estimate amounts.
5489            let entity_provisions: Vec<_> = accounting_standards
5490                .provisions
5491                .iter()
5492                .filter(|p| p.entity_code == company.code)
5493                .collect();
5494            let provision_count = entity_provisions.len();
5495            let total_provisions = if provision_count > 0 {
5496                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
5497            } else {
5498                None
5499            };
5500
5501            // ---- Pension data from HR snapshot ----
5502            let entity_pension_plan_count = hr
5503                .pension_plans
5504                .iter()
5505                .filter(|p| p.entity_code == company.code)
5506                .count();
5507            let entity_total_dbo: Option<rust_decimal::Decimal> = {
5508                let sum: rust_decimal::Decimal = hr
5509                    .pension_disclosures
5510                    .iter()
5511                    .filter(|d| {
5512                        hr.pension_plans
5513                            .iter()
5514                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
5515                    })
5516                    .map(|d| d.net_pension_liability)
5517                    .sum();
5518                let plan_assets_sum: rust_decimal::Decimal = hr
5519                    .pension_plan_assets
5520                    .iter()
5521                    .filter(|a| {
5522                        hr.pension_plans
5523                            .iter()
5524                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5525                    })
5526                    .map(|a| a.fair_value_closing)
5527                    .sum();
5528                if entity_pension_plan_count > 0 {
5529                    Some(sum + plan_assets_sum)
5530                } else {
5531                    None
5532                }
5533            };
5534            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
5535                let sum: rust_decimal::Decimal = hr
5536                    .pension_plan_assets
5537                    .iter()
5538                    .filter(|a| {
5539                        hr.pension_plans
5540                            .iter()
5541                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
5542                    })
5543                    .map(|a| a.fair_value_closing)
5544                    .sum();
5545                if entity_pension_plan_count > 0 {
5546                    Some(sum)
5547                } else {
5548                    None
5549                }
5550            };
5551
5552            // ---- Audit data: related parties + subsequent events ----
5553            // Audit snapshot covers all entities; use total counts (common case = single entity).
5554            let rp_count = audit.related_party_transactions.len();
5555            let se_count = audit.subsequent_events.len();
5556            let adjusting_count = audit
5557                .subsequent_events
5558                .iter()
5559                .filter(|e| {
5560                    matches!(
5561                        e.classification,
5562                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
5563                    )
5564                })
5565                .count();
5566
5567            let ctx = NotesGeneratorContext {
5568                entity_code: company.code.clone(),
5569                framework,
5570                period: format!("FY{}", fiscal_year),
5571                period_end: last_period_end,
5572                currency: company.currency.clone(),
5573                revenue_amount,
5574                total_ppe_gross: ppe_gross,
5575                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
5576                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
5577                deferred_tax_asset: entity_dta,
5578                deferred_tax_liability: entity_dtl,
5579                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
5580                provision_count,
5581                total_provisions,
5582                // Pension data from HR snapshot
5583                pension_plan_count: entity_pension_plan_count,
5584                total_dbo: entity_total_dbo,
5585                total_plan_assets: entity_total_plan_assets,
5586                // Audit data
5587                related_party_transaction_count: rp_count,
5588                subsequent_event_count: se_count,
5589                adjusting_event_count: adjusting_count,
5590                ..NotesGeneratorContext::default()
5591            };
5592
5593            let entity_notes = notes_gen.generate(&ctx);
5594            let standard_note_count = entity_notes.len() as u32;
5595            info!(
5596                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
5597                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
5598            );
5599            financial_reporting
5600                .notes_to_financial_statements
5601                .extend(entity_notes);
5602
5603            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
5604            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
5605                .debt_instruments
5606                .iter()
5607                .filter(|d| d.entity_id == company.code)
5608                .map(|d| {
5609                    (
5610                        format!("{:?}", d.instrument_type),
5611                        d.principal,
5612                        d.maturity_date.to_string(),
5613                    )
5614                })
5615                .collect();
5616
5617            let hedge_count = treasury.hedge_relationships.len();
5618            let effective_hedges = treasury
5619                .hedge_relationships
5620                .iter()
5621                .filter(|h| h.is_effective)
5622                .count();
5623            let total_notional: rust_decimal::Decimal = treasury
5624                .hedging_instruments
5625                .iter()
5626                .map(|h| h.notional_amount)
5627                .sum();
5628            let total_fair_value: rust_decimal::Decimal = treasury
5629                .hedging_instruments
5630                .iter()
5631                .map(|h| h.fair_value)
5632                .sum();
5633
5634            // Join provision_movements with provisions to get entity/type info
5635            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
5636                .provisions
5637                .iter()
5638                .filter(|p| p.entity_code == company.code)
5639                .map(|p| p.id.as_str())
5640                .collect();
5641            let provision_movements: Vec<(
5642                String,
5643                rust_decimal::Decimal,
5644                rust_decimal::Decimal,
5645                rust_decimal::Decimal,
5646            )> = accounting_standards
5647                .provision_movements
5648                .iter()
5649                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
5650                .map(|m| {
5651                    let prov_type = accounting_standards
5652                        .provisions
5653                        .iter()
5654                        .find(|p| p.id == m.provision_id)
5655                        .map(|p| format!("{:?}", p.provision_type))
5656                        .unwrap_or_else(|| "Unknown".to_string());
5657                    (prov_type, m.opening, m.additions, m.closing)
5658                })
5659                .collect();
5660
5661            let enhanced_ctx = EnhancedNotesContext {
5662                entity_code: company.code.clone(),
5663                period: format!("FY{}", fiscal_year),
5664                currency: company.currency.clone(),
5665                // Inventory breakdown: best-effort using zero (would need balance tracker)
5666                finished_goods_value: rust_decimal::Decimal::ZERO,
5667                wip_value: rust_decimal::Decimal::ZERO,
5668                raw_materials_value: rust_decimal::Decimal::ZERO,
5669                debt_instruments,
5670                hedge_count,
5671                effective_hedges,
5672                total_notional,
5673                total_fair_value,
5674                provision_movements,
5675            };
5676
5677            let enhanced_notes =
5678                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
5679            if !enhanced_notes.is_empty() {
5680                info!(
5681                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
5682                    company.code,
5683                    enhanced_notes.len(),
5684                    enhanced_ctx.debt_instruments.len(),
5685                    hedge_count,
5686                    enhanced_ctx.provision_movements.len(),
5687                );
5688                financial_reporting
5689                    .notes_to_financial_statements
5690                    .extend(enhanced_notes);
5691            }
5692        }
5693    }
5694
5695    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
5696    ///
5697    /// This ensures the trial balance is coherent with the JEs: every debit and credit
5698    /// posted in the journal entries flows through to the trial balance, using the real
5699    /// GL account numbers from the CoA.
5700    fn build_trial_balance_from_entries(
5701        journal_entries: &[JournalEntry],
5702        coa: &ChartOfAccounts,
5703        company_code: &str,
5704        fiscal_year: u16,
5705        fiscal_period: u8,
5706    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5707        use rust_decimal::Decimal;
5708
5709        // Accumulate total debits and credits per GL account
5710        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
5711        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
5712
5713        for je in journal_entries {
5714            // Filter to matching company, fiscal year, and period
5715            if je.header.company_code != company_code
5716                || je.header.fiscal_year != fiscal_year
5717                || je.header.fiscal_period != fiscal_period
5718            {
5719                continue;
5720            }
5721
5722            for line in &je.lines {
5723                let acct = &line.gl_account;
5724                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
5725                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
5726            }
5727        }
5728
5729        // Build a TrialBalanceEntry for each account that had activity
5730        let mut all_accounts: Vec<&String> = account_debits
5731            .keys()
5732            .chain(account_credits.keys())
5733            .collect::<std::collections::HashSet<_>>()
5734            .into_iter()
5735            .collect();
5736        all_accounts.sort();
5737
5738        let mut entries = Vec::new();
5739
5740        for acct_number in all_accounts {
5741            let debit = account_debits
5742                .get(acct_number)
5743                .copied()
5744                .unwrap_or(Decimal::ZERO);
5745            let credit = account_credits
5746                .get(acct_number)
5747                .copied()
5748                .unwrap_or(Decimal::ZERO);
5749
5750            if debit.is_zero() && credit.is_zero() {
5751                continue;
5752            }
5753
5754            // Look up account name from CoA, fall back to "Account {code}"
5755            let account_name = coa
5756                .get_account(acct_number)
5757                .map(|gl| gl.short_description.clone())
5758                .unwrap_or_else(|| format!("Account {acct_number}"));
5759
5760            // Map account code prefix to the category strings expected by
5761            // FinancialStatementGenerator (Cash, Receivables, Inventory,
5762            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
5763            // OperatingExpenses).
5764            let category = Self::category_from_account_code(acct_number);
5765
5766            entries.push(datasynth_generators::TrialBalanceEntry {
5767                account_code: acct_number.clone(),
5768                account_name,
5769                category,
5770                debit_balance: debit,
5771                credit_balance: credit,
5772            });
5773        }
5774
5775        entries
5776    }
5777
5778    /// Build a cumulative trial balance by aggregating all JEs from the start up to
5779    /// (and including) the given period end date.
5780    ///
5781    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
5782    /// while income statement accounts (revenue, expenses) show only the current period.
5783    /// The two are merged into a single Vec for the FinancialStatementGenerator.
5784    fn build_cumulative_trial_balance(
5785        journal_entries: &[JournalEntry],
5786        coa: &ChartOfAccounts,
5787        company_code: &str,
5788        start_date: NaiveDate,
5789        period_end: NaiveDate,
5790        fiscal_year: u16,
5791        fiscal_period: u8,
5792    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
5793        use rust_decimal::Decimal;
5794
5795        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
5796        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
5797        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
5798
5799        // Accumulate debits/credits for income statement accounts (current period only)
5800        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
5801        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
5802
5803        for je in journal_entries {
5804            if je.header.company_code != company_code {
5805                continue;
5806            }
5807
5808            for line in &je.lines {
5809                let acct = &line.gl_account;
5810                let category = Self::category_from_account_code(acct);
5811                let is_bs_account = matches!(
5812                    category.as_str(),
5813                    "Cash"
5814                        | "Receivables"
5815                        | "Inventory"
5816                        | "FixedAssets"
5817                        | "Payables"
5818                        | "AccruedLiabilities"
5819                        | "LongTermDebt"
5820                        | "Equity"
5821                );
5822
5823                if is_bs_account {
5824                    // Balance sheet: accumulate from start through period_end
5825                    if je.header.document_date <= period_end
5826                        && je.header.document_date >= start_date
5827                    {
5828                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5829                            line.debit_amount;
5830                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5831                            line.credit_amount;
5832                    }
5833                } else {
5834                    // Income statement: current period only
5835                    if je.header.fiscal_year == fiscal_year
5836                        && je.header.fiscal_period == fiscal_period
5837                    {
5838                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5839                            line.debit_amount;
5840                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
5841                            line.credit_amount;
5842                    }
5843                }
5844            }
5845        }
5846
5847        // Merge all accounts
5848        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
5849        all_accounts.extend(bs_debits.keys().cloned());
5850        all_accounts.extend(bs_credits.keys().cloned());
5851        all_accounts.extend(is_debits.keys().cloned());
5852        all_accounts.extend(is_credits.keys().cloned());
5853
5854        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
5855        sorted_accounts.sort();
5856
5857        let mut entries = Vec::new();
5858
5859        for acct_number in &sorted_accounts {
5860            let category = Self::category_from_account_code(acct_number);
5861            let is_bs_account = matches!(
5862                category.as_str(),
5863                "Cash"
5864                    | "Receivables"
5865                    | "Inventory"
5866                    | "FixedAssets"
5867                    | "Payables"
5868                    | "AccruedLiabilities"
5869                    | "LongTermDebt"
5870                    | "Equity"
5871            );
5872
5873            let (debit, credit) = if is_bs_account {
5874                (
5875                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5876                    bs_credits
5877                        .get(acct_number)
5878                        .copied()
5879                        .unwrap_or(Decimal::ZERO),
5880                )
5881            } else {
5882                (
5883                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
5884                    is_credits
5885                        .get(acct_number)
5886                        .copied()
5887                        .unwrap_or(Decimal::ZERO),
5888                )
5889            };
5890
5891            if debit.is_zero() && credit.is_zero() {
5892                continue;
5893            }
5894
5895            let account_name = coa
5896                .get_account(acct_number)
5897                .map(|gl| gl.short_description.clone())
5898                .unwrap_or_else(|| format!("Account {acct_number}"));
5899
5900            entries.push(datasynth_generators::TrialBalanceEntry {
5901                account_code: acct_number.clone(),
5902                account_name,
5903                category,
5904                debit_balance: debit,
5905                credit_balance: credit,
5906            });
5907        }
5908
5909        entries
5910    }
5911
5912    /// Build a JE-derived cash flow statement using the indirect method.
5913    ///
5914    /// Compares current and prior cumulative trial balances to derive working capital
5915    /// changes, producing a coherent cash flow statement tied to actual journal entries.
5916    fn build_cash_flow_from_trial_balances(
5917        current_tb: &[datasynth_generators::TrialBalanceEntry],
5918        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
5919        net_income: rust_decimal::Decimal,
5920    ) -> Vec<CashFlowItem> {
5921        use rust_decimal::Decimal;
5922
5923        // Helper: aggregate a TB by category and return net (debit - credit)
5924        let aggregate =
5925            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
5926                let mut map: HashMap<String, Decimal> = HashMap::new();
5927                for entry in tb {
5928                    let net = entry.debit_balance - entry.credit_balance;
5929                    *map.entry(entry.category.clone()).or_default() += net;
5930                }
5931                map
5932            };
5933
5934        let current = aggregate(current_tb);
5935        let prior = prior_tb.map(aggregate);
5936
5937        // Get balance for a category, defaulting to zero
5938        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
5939            *map.get(key).unwrap_or(&Decimal::ZERO)
5940        };
5941
5942        // Compute change: current - prior (or current if no prior)
5943        let change = |key: &str| -> Decimal {
5944            let curr = get(&current, key);
5945            match &prior {
5946                Some(p) => curr - get(p, key),
5947                None => curr,
5948            }
5949        };
5950
5951        // Operating activities (indirect method)
5952        // Depreciation add-back: approximate from FixedAssets decrease
5953        let fixed_asset_change = change("FixedAssets");
5954        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
5955            -fixed_asset_change
5956        } else {
5957            Decimal::ZERO
5958        };
5959
5960        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
5961        let ar_change = change("Receivables");
5962        let inventory_change = change("Inventory");
5963        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
5964        let ap_change = change("Payables");
5965        let accrued_change = change("AccruedLiabilities");
5966
5967        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
5968            + (-ap_change)
5969            + (-accrued_change);
5970
5971        // Investing activities
5972        let capex = if fixed_asset_change > Decimal::ZERO {
5973            -fixed_asset_change
5974        } else {
5975            Decimal::ZERO
5976        };
5977        let investing_cf = capex;
5978
5979        // Financing activities
5980        let debt_change = -change("LongTermDebt");
5981        let equity_change = -change("Equity");
5982        let financing_cf = debt_change + equity_change;
5983
5984        let net_change = operating_cf + investing_cf + financing_cf;
5985
5986        vec![
5987            CashFlowItem {
5988                item_code: "CF-NI".to_string(),
5989                label: "Net Income".to_string(),
5990                category: CashFlowCategory::Operating,
5991                amount: net_income,
5992                amount_prior: None,
5993                sort_order: 1,
5994                is_total: false,
5995            },
5996            CashFlowItem {
5997                item_code: "CF-DEP".to_string(),
5998                label: "Depreciation & Amortization".to_string(),
5999                category: CashFlowCategory::Operating,
6000                amount: depreciation_addback,
6001                amount_prior: None,
6002                sort_order: 2,
6003                is_total: false,
6004            },
6005            CashFlowItem {
6006                item_code: "CF-AR".to_string(),
6007                label: "Change in Accounts Receivable".to_string(),
6008                category: CashFlowCategory::Operating,
6009                amount: -ar_change,
6010                amount_prior: None,
6011                sort_order: 3,
6012                is_total: false,
6013            },
6014            CashFlowItem {
6015                item_code: "CF-AP".to_string(),
6016                label: "Change in Accounts Payable".to_string(),
6017                category: CashFlowCategory::Operating,
6018                amount: -ap_change,
6019                amount_prior: None,
6020                sort_order: 4,
6021                is_total: false,
6022            },
6023            CashFlowItem {
6024                item_code: "CF-INV".to_string(),
6025                label: "Change in Inventory".to_string(),
6026                category: CashFlowCategory::Operating,
6027                amount: -inventory_change,
6028                amount_prior: None,
6029                sort_order: 5,
6030                is_total: false,
6031            },
6032            CashFlowItem {
6033                item_code: "CF-OP".to_string(),
6034                label: "Net Cash from Operating Activities".to_string(),
6035                category: CashFlowCategory::Operating,
6036                amount: operating_cf,
6037                amount_prior: None,
6038                sort_order: 6,
6039                is_total: true,
6040            },
6041            CashFlowItem {
6042                item_code: "CF-CAPEX".to_string(),
6043                label: "Capital Expenditures".to_string(),
6044                category: CashFlowCategory::Investing,
6045                amount: capex,
6046                amount_prior: None,
6047                sort_order: 7,
6048                is_total: false,
6049            },
6050            CashFlowItem {
6051                item_code: "CF-INV-T".to_string(),
6052                label: "Net Cash from Investing Activities".to_string(),
6053                category: CashFlowCategory::Investing,
6054                amount: investing_cf,
6055                amount_prior: None,
6056                sort_order: 8,
6057                is_total: true,
6058            },
6059            CashFlowItem {
6060                item_code: "CF-DEBT".to_string(),
6061                label: "Net Borrowings / (Repayments)".to_string(),
6062                category: CashFlowCategory::Financing,
6063                amount: debt_change,
6064                amount_prior: None,
6065                sort_order: 9,
6066                is_total: false,
6067            },
6068            CashFlowItem {
6069                item_code: "CF-EQ".to_string(),
6070                label: "Equity Changes".to_string(),
6071                category: CashFlowCategory::Financing,
6072                amount: equity_change,
6073                amount_prior: None,
6074                sort_order: 10,
6075                is_total: false,
6076            },
6077            CashFlowItem {
6078                item_code: "CF-FIN-T".to_string(),
6079                label: "Net Cash from Financing Activities".to_string(),
6080                category: CashFlowCategory::Financing,
6081                amount: financing_cf,
6082                amount_prior: None,
6083                sort_order: 11,
6084                is_total: true,
6085            },
6086            CashFlowItem {
6087                item_code: "CF-NET".to_string(),
6088                label: "Net Change in Cash".to_string(),
6089                category: CashFlowCategory::Operating,
6090                amount: net_change,
6091                amount_prior: None,
6092                sort_order: 12,
6093                is_total: true,
6094            },
6095        ]
6096    }
6097
6098    /// Calculate net income from a set of trial balance entries.
6099    ///
6100    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
6101    fn calculate_net_income_from_tb(
6102        tb: &[datasynth_generators::TrialBalanceEntry],
6103    ) -> rust_decimal::Decimal {
6104        use rust_decimal::Decimal;
6105
6106        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
6107        for entry in tb {
6108            let net = entry.debit_balance - entry.credit_balance;
6109            *aggregated.entry(entry.category.clone()).or_default() += net;
6110        }
6111
6112        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
6113        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
6114        let opex = *aggregated
6115            .get("OperatingExpenses")
6116            .unwrap_or(&Decimal::ZERO);
6117        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
6118        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
6119
6120        // revenue is negative (credit-normal), expenses are positive (debit-normal)
6121        // other_income is typically negative (credit), other_expenses is typically positive
6122        let operating_income = revenue - cogs - opex - other_expenses - other_income;
6123        let tax_rate = Decimal::new(25, 2); // 0.25
6124        let tax = operating_income * tax_rate;
6125        operating_income - tax
6126    }
6127
6128    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
6129    ///
6130    /// Uses the first two digits of the account code to classify into the categories
6131    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
6132    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
6133    /// OperatingExpenses, OtherIncome, OtherExpenses.
6134    fn category_from_account_code(code: &str) -> String {
6135        let prefix: String = code.chars().take(2).collect();
6136        match prefix.as_str() {
6137            "10" => "Cash",
6138            "11" => "Receivables",
6139            "12" | "13" | "14" => "Inventory",
6140            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
6141            "20" => "Payables",
6142            "21" | "22" | "23" | "24" => "AccruedLiabilities",
6143            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
6144            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
6145            "40" | "41" | "42" | "43" | "44" => "Revenue",
6146            "50" | "51" | "52" => "CostOfSales",
6147            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
6148                "OperatingExpenses"
6149            }
6150            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
6151            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
6152            _ => "OperatingExpenses",
6153        }
6154        .to_string()
6155    }
6156
6157    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
6158    fn phase_hr_data(
6159        &mut self,
6160        stats: &mut EnhancedGenerationStatistics,
6161    ) -> SynthResult<HrSnapshot> {
6162        if !self.phase_config.generate_hr {
6163            debug!("Phase 16: Skipped (HR generation disabled)");
6164            return Ok(HrSnapshot::default());
6165        }
6166
6167        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
6168
6169        let seed = self.seed;
6170        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6171            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6172        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6173        let company_code = self
6174            .config
6175            .companies
6176            .first()
6177            .map(|c| c.code.as_str())
6178            .unwrap_or("1000");
6179        let currency = self
6180            .config
6181            .companies
6182            .first()
6183            .map(|c| c.currency.as_str())
6184            .unwrap_or("USD");
6185
6186        let employee_ids: Vec<String> = self
6187            .master_data
6188            .employees
6189            .iter()
6190            .map(|e| e.employee_id.clone())
6191            .collect();
6192
6193        if employee_ids.is_empty() {
6194            debug!("Phase 16: Skipped (no employees available)");
6195            return Ok(HrSnapshot::default());
6196        }
6197
6198        // Extract cost-center pool from master data employees for cross-reference
6199        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
6200        let cost_center_ids: Vec<String> = self
6201            .master_data
6202            .employees
6203            .iter()
6204            .filter_map(|e| e.cost_center.clone())
6205            .collect::<std::collections::HashSet<_>>()
6206            .into_iter()
6207            .collect();
6208
6209        let mut snapshot = HrSnapshot::default();
6210
6211        // Generate payroll runs (one per month)
6212        if self.config.hr.payroll.enabled {
6213            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
6214                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6215
6216            // Look up country pack for payroll deductions and labels
6217            let payroll_pack = self.primary_pack();
6218
6219            // Store the pack on the generator so generate() resolves
6220            // localized deduction rates and labels from it.
6221            payroll_gen.set_country_pack(payroll_pack.clone());
6222
6223            let employees_with_salary: Vec<(
6224                String,
6225                rust_decimal::Decimal,
6226                Option<String>,
6227                Option<String>,
6228            )> = self
6229                .master_data
6230                .employees
6231                .iter()
6232                .map(|e| {
6233                    // Use the employee's actual annual base salary.
6234                    // Fall back to $60,000 / yr if somehow zero.
6235                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
6236                        e.base_salary
6237                    } else {
6238                        rust_decimal::Decimal::from(60_000)
6239                    };
6240                    (
6241                        e.employee_id.clone(),
6242                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
6243                        e.cost_center.clone(),
6244                        e.department_id.clone(),
6245                    )
6246                })
6247                .collect();
6248
6249            // Use generate_with_changes when employee change history is available
6250            // so that salary adjustments, transfers, etc. are reflected in payroll.
6251            let change_history = &self.master_data.employee_change_history;
6252            let has_changes = !change_history.is_empty();
6253            if has_changes {
6254                debug!(
6255                    "Payroll will incorporate {} employee change events",
6256                    change_history.len()
6257                );
6258            }
6259
6260            for month in 0..self.config.global.period_months {
6261                let period_start = start_date + chrono::Months::new(month);
6262                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
6263                let (run, items) = if has_changes {
6264                    payroll_gen.generate_with_changes(
6265                        company_code,
6266                        &employees_with_salary,
6267                        period_start,
6268                        period_end,
6269                        currency,
6270                        change_history,
6271                    )
6272                } else {
6273                    payroll_gen.generate(
6274                        company_code,
6275                        &employees_with_salary,
6276                        period_start,
6277                        period_end,
6278                        currency,
6279                    )
6280                };
6281                snapshot.payroll_runs.push(run);
6282                snapshot.payroll_run_count += 1;
6283                snapshot.payroll_line_item_count += items.len();
6284                snapshot.payroll_line_items.extend(items);
6285            }
6286        }
6287
6288        // Generate time entries
6289        if self.config.hr.time_attendance.enabled {
6290            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
6291                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6292            let entries = time_gen.generate(
6293                &employee_ids,
6294                start_date,
6295                end_date,
6296                &self.config.hr.time_attendance,
6297            );
6298            snapshot.time_entry_count = entries.len();
6299            snapshot.time_entries = entries;
6300        }
6301
6302        // Generate expense reports
6303        if self.config.hr.expenses.enabled {
6304            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
6305                .with_pools(employee_ids.clone(), cost_center_ids.clone());
6306            expense_gen.set_country_pack(self.primary_pack().clone());
6307            let company_currency = self
6308                .config
6309                .companies
6310                .first()
6311                .map(|c| c.currency.as_str())
6312                .unwrap_or("USD");
6313            let reports = expense_gen.generate_with_currency(
6314                &employee_ids,
6315                start_date,
6316                end_date,
6317                &self.config.hr.expenses,
6318                company_currency,
6319            );
6320            snapshot.expense_report_count = reports.len();
6321            snapshot.expense_reports = reports;
6322        }
6323
6324        // Generate benefit enrollments (gated on payroll, since benefits require employees)
6325        if self.config.hr.payroll.enabled {
6326            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
6327            let employee_pairs: Vec<(String, String)> = self
6328                .master_data
6329                .employees
6330                .iter()
6331                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
6332                .collect();
6333            let enrollments =
6334                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
6335            snapshot.benefit_enrollment_count = enrollments.len();
6336            snapshot.benefit_enrollments = enrollments;
6337        }
6338
6339        // Generate defined benefit pension plans (IAS 19 / ASC 715)
6340        if self.phase_config.generate_hr {
6341            let entity_name = self
6342                .config
6343                .companies
6344                .first()
6345                .map(|c| c.name.as_str())
6346                .unwrap_or("Entity");
6347            let period_months = self.config.global.period_months;
6348            let period_label = {
6349                let y = start_date.year();
6350                let m = start_date.month();
6351                if period_months >= 12 {
6352                    format!("FY{y}")
6353                } else {
6354                    format!("{y}-{m:02}")
6355                }
6356            };
6357            let reporting_date =
6358                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6359
6360            // Compute average annual salary from actual payroll data when available.
6361            // PayrollRun.total_gross covers all employees for one pay period; we sum
6362            // across all runs and divide by employee_count to get per-employee total,
6363            // then annualise for sub-annual periods.
6364            let avg_salary: Option<rust_decimal::Decimal> = {
6365                let employee_count = employee_ids.len();
6366                if self.config.hr.payroll.enabled
6367                    && employee_count > 0
6368                    && !snapshot.payroll_runs.is_empty()
6369                {
6370                    // Sum total gross pay across all payroll runs for this company
6371                    let total_gross: rust_decimal::Decimal = snapshot
6372                        .payroll_runs
6373                        .iter()
6374                        .filter(|r| r.company_code == company_code)
6375                        .map(|r| r.total_gross)
6376                        .sum();
6377                    if total_gross > rust_decimal::Decimal::ZERO {
6378                        // Annualise: total_gross covers `period_months` months of pay
6379                        let annual_total = if period_months > 0 && period_months < 12 {
6380                            total_gross * rust_decimal::Decimal::from(12u32)
6381                                / rust_decimal::Decimal::from(period_months)
6382                        } else {
6383                            total_gross
6384                        };
6385                        Some(
6386                            (annual_total / rust_decimal::Decimal::from(employee_count))
6387                                .round_dp(2),
6388                        )
6389                    } else {
6390                        None
6391                    }
6392                } else {
6393                    None
6394                }
6395            };
6396
6397            let mut pension_gen =
6398                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
6399            let pension_snap = pension_gen.generate(
6400                company_code,
6401                entity_name,
6402                &period_label,
6403                reporting_date,
6404                employee_ids.len(),
6405                currency,
6406                avg_salary,
6407                period_months,
6408            );
6409            snapshot.pension_plan_count = pension_snap.plans.len();
6410            snapshot.pension_plans = pension_snap.plans;
6411            snapshot.pension_obligations = pension_snap.obligations;
6412            snapshot.pension_plan_assets = pension_snap.plan_assets;
6413            snapshot.pension_disclosures = pension_snap.disclosures;
6414            // Pension JEs are returned here so they can be added to entries
6415            // in the caller (stored temporarily on snapshot for transfer).
6416            // We embed them in the hr snapshot for simplicity; the orchestrator
6417            // will extract and extend `entries`.
6418            snapshot.pension_journal_entries = pension_snap.journal_entries;
6419        }
6420
6421        // Generate stock-based compensation (ASC 718 / IFRS 2)
6422        if self.phase_config.generate_hr && !employee_ids.is_empty() {
6423            let period_months = self.config.global.period_months;
6424            let period_label = {
6425                let y = start_date.year();
6426                let m = start_date.month();
6427                if period_months >= 12 {
6428                    format!("FY{y}")
6429                } else {
6430                    format!("{y}-{m:02}")
6431                }
6432            };
6433            let reporting_date =
6434                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
6435
6436            let mut stock_comp_gen =
6437                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
6438            let stock_snap = stock_comp_gen.generate(
6439                company_code,
6440                &employee_ids,
6441                start_date,
6442                &period_label,
6443                reporting_date,
6444                currency,
6445            );
6446            snapshot.stock_grant_count = stock_snap.grants.len();
6447            snapshot.stock_grants = stock_snap.grants;
6448            snapshot.stock_comp_expenses = stock_snap.expenses;
6449            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
6450        }
6451
6452        stats.payroll_run_count = snapshot.payroll_run_count;
6453        stats.time_entry_count = snapshot.time_entry_count;
6454        stats.expense_report_count = snapshot.expense_report_count;
6455        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
6456        stats.pension_plan_count = snapshot.pension_plan_count;
6457        stats.stock_grant_count = snapshot.stock_grant_count;
6458
6459        info!(
6460            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
6461            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
6462            snapshot.time_entry_count, snapshot.expense_report_count,
6463            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
6464            snapshot.stock_grant_count
6465        );
6466        self.check_resources_with_log("post-hr")?;
6467
6468        Ok(snapshot)
6469    }
6470
6471    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
6472    fn phase_accounting_standards(
6473        &mut self,
6474        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
6475        journal_entries: &[JournalEntry],
6476        stats: &mut EnhancedGenerationStatistics,
6477    ) -> SynthResult<AccountingStandardsSnapshot> {
6478        if !self.phase_config.generate_accounting_standards {
6479            debug!("Phase 17: Skipped (accounting standards generation disabled)");
6480            return Ok(AccountingStandardsSnapshot::default());
6481        }
6482        info!("Phase 17: Generating Accounting Standards Data");
6483
6484        let seed = self.seed;
6485        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6486            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6487        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6488        let company_code = self
6489            .config
6490            .companies
6491            .first()
6492            .map(|c| c.code.as_str())
6493            .unwrap_or("1000");
6494        let currency = self
6495            .config
6496            .companies
6497            .first()
6498            .map(|c| c.currency.as_str())
6499            .unwrap_or("USD");
6500
6501        // Convert config framework to standards framework.
6502        // If the user explicitly set a framework in the YAML config, use that.
6503        // Otherwise, fall back to the country pack's accounting.framework field,
6504        // and if that is also absent or unrecognised, default to US GAAP.
6505        let framework = match self.config.accounting_standards.framework {
6506            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
6507                datasynth_standards::framework::AccountingFramework::UsGaap
6508            }
6509            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
6510                datasynth_standards::framework::AccountingFramework::Ifrs
6511            }
6512            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
6513                datasynth_standards::framework::AccountingFramework::DualReporting
6514            }
6515            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
6516                datasynth_standards::framework::AccountingFramework::FrenchGaap
6517            }
6518            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
6519                datasynth_standards::framework::AccountingFramework::GermanGaap
6520            }
6521            None => {
6522                // Derive framework from the primary company's country pack
6523                let pack = self.primary_pack();
6524                let pack_fw = pack.accounting.framework.as_str();
6525                match pack_fw {
6526                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
6527                    "dual_reporting" => {
6528                        datasynth_standards::framework::AccountingFramework::DualReporting
6529                    }
6530                    "french_gaap" => {
6531                        datasynth_standards::framework::AccountingFramework::FrenchGaap
6532                    }
6533                    "german_gaap" | "hgb" => {
6534                        datasynth_standards::framework::AccountingFramework::GermanGaap
6535                    }
6536                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
6537                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
6538                }
6539            }
6540        };
6541
6542        let mut snapshot = AccountingStandardsSnapshot::default();
6543
6544        // Revenue recognition
6545        if self.config.accounting_standards.revenue_recognition.enabled {
6546            let customer_ids: Vec<String> = self
6547                .master_data
6548                .customers
6549                .iter()
6550                .map(|c| c.customer_id.clone())
6551                .collect();
6552
6553            if !customer_ids.is_empty() {
6554                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
6555                let contracts = rev_gen.generate(
6556                    company_code,
6557                    &customer_ids,
6558                    start_date,
6559                    end_date,
6560                    currency,
6561                    &self.config.accounting_standards.revenue_recognition,
6562                    framework,
6563                );
6564                snapshot.revenue_contract_count = contracts.len();
6565                snapshot.contracts = contracts;
6566            }
6567        }
6568
6569        // Impairment testing
6570        if self.config.accounting_standards.impairment.enabled {
6571            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
6572                .master_data
6573                .assets
6574                .iter()
6575                .map(|a| {
6576                    (
6577                        a.asset_id.clone(),
6578                        a.description.clone(),
6579                        a.acquisition_cost,
6580                    )
6581                })
6582                .collect();
6583
6584            if !asset_data.is_empty() {
6585                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
6586                let tests = imp_gen.generate(
6587                    company_code,
6588                    &asset_data,
6589                    end_date,
6590                    &self.config.accounting_standards.impairment,
6591                    framework,
6592                );
6593                snapshot.impairment_test_count = tests.len();
6594                snapshot.impairment_tests = tests;
6595            }
6596        }
6597
6598        // Business combinations (IFRS 3 / ASC 805)
6599        if self
6600            .config
6601            .accounting_standards
6602            .business_combinations
6603            .enabled
6604        {
6605            let bc_config = &self.config.accounting_standards.business_combinations;
6606            let framework_str = match framework {
6607                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6608                _ => "US_GAAP",
6609            };
6610            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
6611            let bc_snap = bc_gen.generate(
6612                company_code,
6613                currency,
6614                start_date,
6615                end_date,
6616                bc_config.acquisition_count,
6617                framework_str,
6618            );
6619            snapshot.business_combination_count = bc_snap.combinations.len();
6620            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
6621            snapshot.business_combinations = bc_snap.combinations;
6622        }
6623
6624        // Expected Credit Loss (IFRS 9 / ASC 326)
6625        if self
6626            .config
6627            .accounting_standards
6628            .expected_credit_loss
6629            .enabled
6630        {
6631            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
6632            let framework_str = match framework {
6633                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
6634                _ => "ASC_326",
6635            };
6636
6637            // Use AR aging data from the subledger snapshot if available;
6638            // otherwise generate synthetic bucket exposures.
6639            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6640
6641            let mut ecl_gen = EclGenerator::new(seed + 43);
6642
6643            // Collect combined bucket totals across all company AR aging reports.
6644            let bucket_exposures: Vec<(
6645                datasynth_core::models::subledger::ar::AgingBucket,
6646                rust_decimal::Decimal,
6647            )> = if ar_aging_reports.is_empty() {
6648                // No AR aging data — synthesise plausible bucket exposures.
6649                use datasynth_core::models::subledger::ar::AgingBucket;
6650                vec![
6651                    (
6652                        AgingBucket::Current,
6653                        rust_decimal::Decimal::from(500_000_u32),
6654                    ),
6655                    (
6656                        AgingBucket::Days1To30,
6657                        rust_decimal::Decimal::from(120_000_u32),
6658                    ),
6659                    (
6660                        AgingBucket::Days31To60,
6661                        rust_decimal::Decimal::from(45_000_u32),
6662                    ),
6663                    (
6664                        AgingBucket::Days61To90,
6665                        rust_decimal::Decimal::from(15_000_u32),
6666                    ),
6667                    (
6668                        AgingBucket::Over90Days,
6669                        rust_decimal::Decimal::from(8_000_u32),
6670                    ),
6671                ]
6672            } else {
6673                use datasynth_core::models::subledger::ar::AgingBucket;
6674                // Sum bucket totals from all reports.
6675                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
6676                    std::collections::HashMap::new();
6677                for report in ar_aging_reports {
6678                    for (bucket, amount) in &report.bucket_totals {
6679                        *totals.entry(*bucket).or_default() += amount;
6680                    }
6681                }
6682                AgingBucket::all()
6683                    .into_iter()
6684                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
6685                    .collect()
6686            };
6687
6688            let ecl_snap = ecl_gen.generate(
6689                company_code,
6690                end_date,
6691                &bucket_exposures,
6692                ecl_config,
6693                &period_label,
6694                framework_str,
6695            );
6696
6697            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
6698            snapshot.ecl_models = ecl_snap.ecl_models;
6699            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
6700            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
6701        }
6702
6703        // Provisions and contingencies (IAS 37 / ASC 450)
6704        {
6705            let framework_str = match framework {
6706                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
6707                _ => "US_GAAP",
6708            };
6709
6710            // Compute actual revenue from the journal entries generated so far.
6711            // The `journal_entries` slice passed to this phase contains all GL entries
6712            // up to and including Period Close. Fall back to a minimum of 100_000 to
6713            // avoid degenerate zero-based provision amounts on first-period datasets.
6714            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
6715                .max(rust_decimal::Decimal::from(100_000_u32));
6716
6717            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6718
6719            let mut prov_gen = ProvisionGenerator::new(seed + 44);
6720            let prov_snap = prov_gen.generate(
6721                company_code,
6722                currency,
6723                revenue_proxy,
6724                end_date,
6725                &period_label,
6726                framework_str,
6727                None, // prior_opening: no carry-forward data in single-period runs
6728            );
6729
6730            snapshot.provision_count = prov_snap.provisions.len();
6731            snapshot.provisions = prov_snap.provisions;
6732            snapshot.provision_movements = prov_snap.movements;
6733            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
6734            snapshot.provision_journal_entries = prov_snap.journal_entries;
6735        }
6736
6737        // IAS 21 Functional Currency Translation
6738        // For each company whose functional currency differs from the presentation
6739        // currency, generate a CurrencyTranslationResult with CTA (OCI).
6740        {
6741            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
6742
6743            let presentation_currency = self
6744                .config
6745                .global
6746                .presentation_currency
6747                .clone()
6748                .unwrap_or_else(|| self.config.global.group_currency.clone());
6749
6750            // Build a minimal rate table populated with approximate rates from
6751            // the FX model base rates (USD-based) so we can do the translation.
6752            let mut rate_table = FxRateTable::new(&presentation_currency);
6753
6754            // Populate with base rates against USD; if presentation_currency is
6755            // not USD we do a best-effort two-step conversion using the table's
6756            // triangulation support.
6757            let base_rates = base_rates_usd();
6758            for (ccy, rate) in &base_rates {
6759                rate_table.add_rate(FxRate::new(
6760                    ccy,
6761                    "USD",
6762                    RateType::Closing,
6763                    end_date,
6764                    *rate,
6765                    "SYNTHETIC",
6766                ));
6767                // Average rate = 98% of closing (approximation).
6768                // 0.98 = 98/100 = Decimal::new(98, 2)
6769                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
6770                rate_table.add_rate(FxRate::new(
6771                    ccy,
6772                    "USD",
6773                    RateType::Average,
6774                    end_date,
6775                    avg,
6776                    "SYNTHETIC",
6777                ));
6778            }
6779
6780            let mut translation_results = Vec::new();
6781            for company in &self.config.companies {
6782                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
6783                // to ensure the translation produces non-trivial CTA amounts.
6784                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
6785                    .max(rust_decimal::Decimal::from(100_000_u32));
6786
6787                let func_ccy = company
6788                    .functional_currency
6789                    .clone()
6790                    .unwrap_or_else(|| company.currency.clone());
6791
6792                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
6793                    &company.code,
6794                    &func_ccy,
6795                    &presentation_currency,
6796                    &ias21_period_label,
6797                    end_date,
6798                    company_revenue,
6799                    &rate_table,
6800                );
6801                translation_results.push(result);
6802            }
6803
6804            snapshot.currency_translation_count = translation_results.len();
6805            snapshot.currency_translation_results = translation_results;
6806        }
6807
6808        stats.revenue_contract_count = snapshot.revenue_contract_count;
6809        stats.impairment_test_count = snapshot.impairment_test_count;
6810        stats.business_combination_count = snapshot.business_combination_count;
6811        stats.ecl_model_count = snapshot.ecl_model_count;
6812        stats.provision_count = snapshot.provision_count;
6813
6814        info!(
6815            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations",
6816            snapshot.revenue_contract_count,
6817            snapshot.impairment_test_count,
6818            snapshot.business_combination_count,
6819            snapshot.ecl_model_count,
6820            snapshot.provision_count,
6821            snapshot.currency_translation_count
6822        );
6823        self.check_resources_with_log("post-accounting-standards")?;
6824
6825        Ok(snapshot)
6826    }
6827
6828    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
6829    fn phase_manufacturing(
6830        &mut self,
6831        stats: &mut EnhancedGenerationStatistics,
6832    ) -> SynthResult<ManufacturingSnapshot> {
6833        if !self.phase_config.generate_manufacturing {
6834            debug!("Phase 18: Skipped (manufacturing generation disabled)");
6835            return Ok(ManufacturingSnapshot::default());
6836        }
6837        info!("Phase 18: Generating Manufacturing Data");
6838
6839        let seed = self.seed;
6840        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6841            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6842        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6843        let company_code = self
6844            .config
6845            .companies
6846            .first()
6847            .map(|c| c.code.as_str())
6848            .unwrap_or("1000");
6849
6850        let material_data: Vec<(String, String)> = self
6851            .master_data
6852            .materials
6853            .iter()
6854            .map(|m| (m.material_id.clone(), m.description.clone()))
6855            .collect();
6856
6857        if material_data.is_empty() {
6858            debug!("Phase 18: Skipped (no materials available)");
6859            return Ok(ManufacturingSnapshot::default());
6860        }
6861
6862        let mut snapshot = ManufacturingSnapshot::default();
6863
6864        // Generate production orders
6865        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
6866        let production_orders = prod_gen.generate(
6867            company_code,
6868            &material_data,
6869            start_date,
6870            end_date,
6871            &self.config.manufacturing.production_orders,
6872            &self.config.manufacturing.costing,
6873            &self.config.manufacturing.routing,
6874        );
6875        snapshot.production_order_count = production_orders.len();
6876
6877        // Generate quality inspections from production orders
6878        let inspection_data: Vec<(String, String, String)> = production_orders
6879            .iter()
6880            .map(|po| {
6881                (
6882                    po.order_id.clone(),
6883                    po.material_id.clone(),
6884                    po.material_description.clone(),
6885                )
6886            })
6887            .collect();
6888
6889        snapshot.production_orders = production_orders;
6890
6891        if !inspection_data.is_empty() {
6892            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
6893            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
6894            snapshot.quality_inspection_count = inspections.len();
6895            snapshot.quality_inspections = inspections;
6896        }
6897
6898        // Generate cycle counts (one per month)
6899        let storage_locations: Vec<(String, String)> = material_data
6900            .iter()
6901            .enumerate()
6902            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
6903            .collect();
6904
6905        let employee_ids: Vec<String> = self
6906            .master_data
6907            .employees
6908            .iter()
6909            .map(|e| e.employee_id.clone())
6910            .collect();
6911        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
6912            .with_employee_pool(employee_ids);
6913        let mut cycle_count_total = 0usize;
6914        for month in 0..self.config.global.period_months {
6915            let count_date = start_date + chrono::Months::new(month);
6916            let items_per_count = storage_locations.len().clamp(10, 50);
6917            let cc = cc_gen.generate(
6918                company_code,
6919                &storage_locations,
6920                count_date,
6921                items_per_count,
6922            );
6923            snapshot.cycle_counts.push(cc);
6924            cycle_count_total += 1;
6925        }
6926        snapshot.cycle_count_count = cycle_count_total;
6927
6928        // Generate BOM components
6929        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
6930        let bom_components = bom_gen.generate(company_code, &material_data);
6931        snapshot.bom_component_count = bom_components.len();
6932        snapshot.bom_components = bom_components;
6933
6934        // Generate inventory movements — link GoodsIssue movements to real production order IDs
6935        let currency = self
6936            .config
6937            .companies
6938            .first()
6939            .map(|c| c.currency.as_str())
6940            .unwrap_or("USD");
6941        let production_order_ids: Vec<String> = snapshot
6942            .production_orders
6943            .iter()
6944            .map(|po| po.order_id.clone())
6945            .collect();
6946        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
6947        let inventory_movements = inv_mov_gen.generate_with_production_orders(
6948            company_code,
6949            &material_data,
6950            start_date,
6951            end_date,
6952            2,
6953            currency,
6954            &production_order_ids,
6955        );
6956        snapshot.inventory_movement_count = inventory_movements.len();
6957        snapshot.inventory_movements = inventory_movements;
6958
6959        stats.production_order_count = snapshot.production_order_count;
6960        stats.quality_inspection_count = snapshot.quality_inspection_count;
6961        stats.cycle_count_count = snapshot.cycle_count_count;
6962        stats.bom_component_count = snapshot.bom_component_count;
6963        stats.inventory_movement_count = snapshot.inventory_movement_count;
6964
6965        info!(
6966            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
6967            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
6968            snapshot.bom_component_count, snapshot.inventory_movement_count
6969        );
6970        self.check_resources_with_log("post-manufacturing")?;
6971
6972        Ok(snapshot)
6973    }
6974
6975    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
6976    fn phase_sales_kpi_budgets(
6977        &mut self,
6978        coa: &Arc<ChartOfAccounts>,
6979        financial_reporting: &FinancialReportingSnapshot,
6980        stats: &mut EnhancedGenerationStatistics,
6981    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
6982        if !self.phase_config.generate_sales_kpi_budgets {
6983            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
6984            return Ok(SalesKpiBudgetsSnapshot::default());
6985        }
6986        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
6987
6988        let seed = self.seed;
6989        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6990            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6991        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
6992        let company_code = self
6993            .config
6994            .companies
6995            .first()
6996            .map(|c| c.code.as_str())
6997            .unwrap_or("1000");
6998
6999        let mut snapshot = SalesKpiBudgetsSnapshot::default();
7000
7001        // Sales Quotes
7002        if self.config.sales_quotes.enabled {
7003            let customer_data: Vec<(String, String)> = self
7004                .master_data
7005                .customers
7006                .iter()
7007                .map(|c| (c.customer_id.clone(), c.name.clone()))
7008                .collect();
7009            let material_data: Vec<(String, String)> = self
7010                .master_data
7011                .materials
7012                .iter()
7013                .map(|m| (m.material_id.clone(), m.description.clone()))
7014                .collect();
7015
7016            if !customer_data.is_empty() && !material_data.is_empty() {
7017                let employee_ids: Vec<String> = self
7018                    .master_data
7019                    .employees
7020                    .iter()
7021                    .map(|e| e.employee_id.clone())
7022                    .collect();
7023                let customer_ids: Vec<String> = self
7024                    .master_data
7025                    .customers
7026                    .iter()
7027                    .map(|c| c.customer_id.clone())
7028                    .collect();
7029                let company_currency = self
7030                    .config
7031                    .companies
7032                    .first()
7033                    .map(|c| c.currency.as_str())
7034                    .unwrap_or("USD");
7035
7036                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
7037                    .with_pools(employee_ids, customer_ids);
7038                let quotes = quote_gen.generate_with_currency(
7039                    company_code,
7040                    &customer_data,
7041                    &material_data,
7042                    start_date,
7043                    end_date,
7044                    &self.config.sales_quotes,
7045                    company_currency,
7046                );
7047                snapshot.sales_quote_count = quotes.len();
7048                snapshot.sales_quotes = quotes;
7049            }
7050        }
7051
7052        // Management KPIs
7053        if self.config.financial_reporting.management_kpis.enabled {
7054            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
7055            let mut kpis = kpi_gen.generate(
7056                company_code,
7057                start_date,
7058                end_date,
7059                &self.config.financial_reporting.management_kpis,
7060            );
7061
7062            // Override financial KPIs with actual data from financial statements
7063            {
7064                use rust_decimal::Decimal;
7065
7066                if let Some(income_stmt) =
7067                    financial_reporting.financial_statements.iter().find(|fs| {
7068                        fs.statement_type == StatementType::IncomeStatement
7069                            && fs.company_code == company_code
7070                    })
7071                {
7072                    // Extract revenue and COGS from income statement line items
7073                    let total_revenue: Decimal = income_stmt
7074                        .line_items
7075                        .iter()
7076                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
7077                        .map(|li| li.amount)
7078                        .sum();
7079                    let total_cogs: Decimal = income_stmt
7080                        .line_items
7081                        .iter()
7082                        .filter(|li| {
7083                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
7084                                && !li.is_total
7085                        })
7086                        .map(|li| li.amount.abs())
7087                        .sum();
7088                    let total_opex: Decimal = income_stmt
7089                        .line_items
7090                        .iter()
7091                        .filter(|li| {
7092                            li.section.contains("Expense")
7093                                && !li.is_total
7094                                && !li.section.contains("Cost")
7095                        })
7096                        .map(|li| li.amount.abs())
7097                        .sum();
7098
7099                    if total_revenue > Decimal::ZERO {
7100                        let hundred = Decimal::from(100);
7101                        let gross_margin_pct =
7102                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
7103                        let operating_income = total_revenue - total_cogs - total_opex;
7104                        let op_margin_pct =
7105                            (operating_income * hundred / total_revenue).round_dp(2);
7106
7107                        // Override gross margin and operating margin KPIs
7108                        for kpi in &mut kpis {
7109                            if kpi.name == "Gross Margin" {
7110                                kpi.value = gross_margin_pct;
7111                            } else if kpi.name == "Operating Margin" {
7112                                kpi.value = op_margin_pct;
7113                            }
7114                        }
7115                    }
7116                }
7117
7118                // Override Current Ratio from balance sheet
7119                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
7120                    fs.statement_type == StatementType::BalanceSheet
7121                        && fs.company_code == company_code
7122                }) {
7123                    let current_assets: Decimal = bs
7124                        .line_items
7125                        .iter()
7126                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
7127                        .map(|li| li.amount)
7128                        .sum();
7129                    let current_liabilities: Decimal = bs
7130                        .line_items
7131                        .iter()
7132                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
7133                        .map(|li| li.amount.abs())
7134                        .sum();
7135
7136                    if current_liabilities > Decimal::ZERO {
7137                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
7138                        for kpi in &mut kpis {
7139                            if kpi.name == "Current Ratio" {
7140                                kpi.value = current_ratio;
7141                            }
7142                        }
7143                    }
7144                }
7145            }
7146
7147            snapshot.kpi_count = kpis.len();
7148            snapshot.kpis = kpis;
7149        }
7150
7151        // Budgets
7152        if self.config.financial_reporting.budgets.enabled {
7153            let account_data: Vec<(String, String)> = coa
7154                .accounts
7155                .iter()
7156                .map(|a| (a.account_number.clone(), a.short_description.clone()))
7157                .collect();
7158
7159            if !account_data.is_empty() {
7160                let fiscal_year = start_date.year() as u32;
7161                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
7162                let budget = budget_gen.generate(
7163                    company_code,
7164                    fiscal_year,
7165                    &account_data,
7166                    &self.config.financial_reporting.budgets,
7167                );
7168                snapshot.budget_line_count = budget.line_items.len();
7169                snapshot.budgets.push(budget);
7170            }
7171        }
7172
7173        stats.sales_quote_count = snapshot.sales_quote_count;
7174        stats.kpi_count = snapshot.kpi_count;
7175        stats.budget_line_count = snapshot.budget_line_count;
7176
7177        info!(
7178            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
7179            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
7180        );
7181        self.check_resources_with_log("post-sales-kpi-budgets")?;
7182
7183        Ok(snapshot)
7184    }
7185
7186    /// Compute pre-tax income for a single company from actual journal entries.
7187    ///
7188    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
7189    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
7190    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
7191    /// and the period-close engine so that all three use a consistent definition.
7192    fn compute_pre_tax_income(
7193        company_code: &str,
7194        journal_entries: &[JournalEntry],
7195    ) -> rust_decimal::Decimal {
7196        use datasynth_core::accounts::AccountCategory;
7197        use rust_decimal::Decimal;
7198
7199        let mut total_revenue = Decimal::ZERO;
7200        let mut total_expenses = Decimal::ZERO;
7201
7202        for je in journal_entries {
7203            if je.header.company_code != company_code {
7204                continue;
7205            }
7206            for line in &je.lines {
7207                let cat = AccountCategory::from_account(&line.gl_account);
7208                match cat {
7209                    AccountCategory::Revenue => {
7210                        total_revenue += line.credit_amount - line.debit_amount;
7211                    }
7212                    AccountCategory::Cogs
7213                    | AccountCategory::OperatingExpense
7214                    | AccountCategory::OtherIncomeExpense => {
7215                        total_expenses += line.debit_amount - line.credit_amount;
7216                    }
7217                    _ => {}
7218                }
7219            }
7220        }
7221
7222        let pti = (total_revenue - total_expenses).round_dp(2);
7223        if pti == rust_decimal::Decimal::ZERO {
7224            // No income statement activity yet — fall back to a synthetic value so the
7225            // tax provision generator can still produce meaningful output.
7226            rust_decimal::Decimal::from(1_000_000u32)
7227        } else {
7228            pti
7229        }
7230    }
7231
7232    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
7233    fn phase_tax_generation(
7234        &mut self,
7235        document_flows: &DocumentFlowSnapshot,
7236        journal_entries: &[JournalEntry],
7237        stats: &mut EnhancedGenerationStatistics,
7238    ) -> SynthResult<TaxSnapshot> {
7239        if !self.phase_config.generate_tax {
7240            debug!("Phase 20: Skipped (tax generation disabled)");
7241            return Ok(TaxSnapshot::default());
7242        }
7243        info!("Phase 20: Generating Tax Data");
7244
7245        let seed = self.seed;
7246        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7247            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7248        let fiscal_year = start_date.year();
7249        let company_code = self
7250            .config
7251            .companies
7252            .first()
7253            .map(|c| c.code.as_str())
7254            .unwrap_or("1000");
7255
7256        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
7257            seed + 370,
7258            self.config.tax.clone(),
7259        );
7260
7261        let pack = self.primary_pack().clone();
7262        let (jurisdictions, codes) =
7263            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
7264
7265        // Generate tax provisions for each company
7266        let mut provisions = Vec::new();
7267        if self.config.tax.provisions.enabled {
7268            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
7269            for company in &self.config.companies {
7270                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
7271                let statutory_rate = rust_decimal::Decimal::new(
7272                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
7273                    2,
7274                );
7275                let provision = provision_gen.generate(
7276                    &company.code,
7277                    start_date,
7278                    pre_tax_income,
7279                    statutory_rate,
7280                );
7281                provisions.push(provision);
7282            }
7283        }
7284
7285        // Generate tax lines from document invoices
7286        let mut tax_lines = Vec::new();
7287        if !codes.is_empty() {
7288            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
7289                datasynth_generators::TaxLineGeneratorConfig::default(),
7290                codes.clone(),
7291                seed + 372,
7292            );
7293
7294            // Tax lines from vendor invoices (input tax)
7295            // Use the first company's country as buyer country
7296            let buyer_country = self
7297                .config
7298                .companies
7299                .first()
7300                .map(|c| c.country.as_str())
7301                .unwrap_or("US");
7302            for vi in &document_flows.vendor_invoices {
7303                let lines = tax_line_gen.generate_for_document(
7304                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
7305                    &vi.header.document_id,
7306                    buyer_country, // seller approx same country
7307                    buyer_country,
7308                    vi.payable_amount,
7309                    vi.header.document_date,
7310                    None,
7311                );
7312                tax_lines.extend(lines);
7313            }
7314
7315            // Tax lines from customer invoices (output tax)
7316            for ci in &document_flows.customer_invoices {
7317                let lines = tax_line_gen.generate_for_document(
7318                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
7319                    &ci.header.document_id,
7320                    buyer_country, // seller is the company
7321                    buyer_country,
7322                    ci.total_gross_amount,
7323                    ci.header.document_date,
7324                    None,
7325                );
7326                tax_lines.extend(lines);
7327            }
7328        }
7329
7330        // Generate deferred tax data (IAS 12 / ASC 740) for each company
7331        let deferred_tax = {
7332            let companies: Vec<(&str, &str)> = self
7333                .config
7334                .companies
7335                .iter()
7336                .map(|c| (c.code.as_str(), c.country.as_str()))
7337                .collect();
7338            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
7339            deferred_gen.generate(&companies, start_date, journal_entries)
7340        };
7341
7342        // Build a document_id → posting_date map so each tax JE uses its
7343        // source document's date rather than a blanket period-end date.
7344        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
7345            std::collections::HashMap::new();
7346        for vi in &document_flows.vendor_invoices {
7347            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
7348        }
7349        for ci in &document_flows.customer_invoices {
7350            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
7351        }
7352
7353        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
7354        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7355        let tax_posting_journal_entries = if !tax_lines.is_empty() {
7356            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
7357                &tax_lines,
7358                company_code,
7359                &doc_dates,
7360                end_date,
7361            );
7362            debug!("Generated {} tax posting JEs", jes.len());
7363            jes
7364        } else {
7365            Vec::new()
7366        };
7367
7368        let snapshot = TaxSnapshot {
7369            jurisdiction_count: jurisdictions.len(),
7370            code_count: codes.len(),
7371            jurisdictions,
7372            codes,
7373            tax_provisions: provisions,
7374            tax_lines,
7375            tax_returns: Vec::new(),
7376            withholding_records: Vec::new(),
7377            tax_anomaly_labels: Vec::new(),
7378            deferred_tax,
7379            tax_posting_journal_entries,
7380        };
7381
7382        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
7383        stats.tax_code_count = snapshot.code_count;
7384        stats.tax_provision_count = snapshot.tax_provisions.len();
7385        stats.tax_line_count = snapshot.tax_lines.len();
7386
7387        info!(
7388            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
7389            snapshot.jurisdiction_count,
7390            snapshot.code_count,
7391            snapshot.tax_provisions.len(),
7392            snapshot.deferred_tax.temporary_differences.len(),
7393            snapshot.deferred_tax.journal_entries.len(),
7394            snapshot.tax_posting_journal_entries.len(),
7395        );
7396        self.check_resources_with_log("post-tax")?;
7397
7398        Ok(snapshot)
7399    }
7400
7401    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
7402    fn phase_esg_generation(
7403        &mut self,
7404        document_flows: &DocumentFlowSnapshot,
7405        manufacturing: &ManufacturingSnapshot,
7406        stats: &mut EnhancedGenerationStatistics,
7407    ) -> SynthResult<EsgSnapshot> {
7408        if !self.phase_config.generate_esg {
7409            debug!("Phase 21: Skipped (ESG generation disabled)");
7410            return Ok(EsgSnapshot::default());
7411        }
7412        let degradation = self.check_resources()?;
7413        if degradation >= DegradationLevel::Reduced {
7414            debug!(
7415                "Phase skipped due to resource pressure (degradation: {:?})",
7416                degradation
7417            );
7418            return Ok(EsgSnapshot::default());
7419        }
7420        info!("Phase 21: Generating ESG Data");
7421
7422        let seed = self.seed;
7423        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7424            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7425        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7426        let entity_id = self
7427            .config
7428            .companies
7429            .first()
7430            .map(|c| c.code.as_str())
7431            .unwrap_or("1000");
7432
7433        let esg_cfg = &self.config.esg;
7434        let mut snapshot = EsgSnapshot::default();
7435
7436        // Energy consumption (feeds into scope 1 & 2 emissions)
7437        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
7438            esg_cfg.environmental.energy.clone(),
7439            seed + 80,
7440        );
7441        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
7442
7443        // Water usage
7444        let facility_count = esg_cfg.environmental.energy.facility_count;
7445        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
7446        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
7447
7448        // Waste
7449        let mut waste_gen = datasynth_generators::WasteGenerator::new(
7450            seed + 82,
7451            esg_cfg.environmental.waste.diversion_target,
7452            facility_count,
7453        );
7454        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
7455
7456        // Emissions (scope 1, 2, 3)
7457        let mut emission_gen =
7458            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
7459
7460        // Build EnergyInput from energy_records
7461        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
7462            .iter()
7463            .map(|e| datasynth_generators::EnergyInput {
7464                facility_id: e.facility_id.clone(),
7465                energy_type: match e.energy_source {
7466                    EnergySourceType::NaturalGas => {
7467                        datasynth_generators::EnergyInputType::NaturalGas
7468                    }
7469                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
7470                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
7471                    _ => datasynth_generators::EnergyInputType::Electricity,
7472                },
7473                consumption_kwh: e.consumption_kwh,
7474                period: e.period,
7475            })
7476            .collect();
7477
7478        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
7479        if !manufacturing.production_orders.is_empty() {
7480            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
7481                &manufacturing.production_orders,
7482                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
7483                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
7484            );
7485            if !mfg_energy.is_empty() {
7486                info!(
7487                    "ESG: {} energy inputs derived from {} production orders",
7488                    mfg_energy.len(),
7489                    manufacturing.production_orders.len(),
7490                );
7491                energy_inputs.extend(mfg_energy);
7492            }
7493        }
7494
7495        let mut emissions = Vec::new();
7496        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
7497        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
7498
7499        // Scope 3: use vendor spend data from actual payments
7500        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
7501            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7502            for payment in &document_flows.payments {
7503                if payment.is_vendor {
7504                    *totals
7505                        .entry(payment.business_partner_id.clone())
7506                        .or_default() += payment.amount;
7507                }
7508            }
7509            totals
7510        };
7511        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
7512            .master_data
7513            .vendors
7514            .iter()
7515            .map(|v| {
7516                let spend = vendor_payment_totals
7517                    .get(&v.vendor_id)
7518                    .copied()
7519                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
7520                datasynth_generators::VendorSpendInput {
7521                    vendor_id: v.vendor_id.clone(),
7522                    category: format!("{:?}", v.vendor_type).to_lowercase(),
7523                    spend,
7524                    country: v.country.clone(),
7525                }
7526            })
7527            .collect();
7528        if !vendor_spend.is_empty() {
7529            emissions.extend(emission_gen.generate_scope3_purchased_goods(
7530                entity_id,
7531                &vendor_spend,
7532                start_date,
7533                end_date,
7534            ));
7535        }
7536
7537        // Business travel & commuting (scope 3)
7538        let headcount = self.master_data.employees.len() as u32;
7539        if headcount > 0 {
7540            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
7541            emissions.extend(emission_gen.generate_scope3_business_travel(
7542                entity_id,
7543                travel_spend,
7544                start_date,
7545            ));
7546            emissions
7547                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
7548        }
7549
7550        snapshot.emission_count = emissions.len();
7551        snapshot.emissions = emissions;
7552        snapshot.energy = energy_records;
7553
7554        // Social: Workforce diversity, pay equity, safety
7555        let mut workforce_gen =
7556            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
7557        let total_headcount = headcount.max(100);
7558        snapshot.diversity =
7559            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
7560        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
7561
7562        // v2.4: Derive additional workforce diversity metrics from actual employee data
7563        if !self.master_data.employees.is_empty() {
7564            let hr_diversity = workforce_gen.generate_diversity_from_employees(
7565                entity_id,
7566                &self.master_data.employees,
7567                end_date,
7568            );
7569            if !hr_diversity.is_empty() {
7570                info!(
7571                    "ESG: {} diversity metrics derived from {} actual employees",
7572                    hr_diversity.len(),
7573                    self.master_data.employees.len(),
7574                );
7575                snapshot.diversity.extend(hr_diversity);
7576            }
7577        }
7578
7579        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
7580            entity_id,
7581            facility_count,
7582            start_date,
7583            end_date,
7584        );
7585
7586        // Compute safety metrics
7587        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
7588        let safety_metric = workforce_gen.compute_safety_metrics(
7589            entity_id,
7590            &snapshot.safety_incidents,
7591            total_hours,
7592            start_date,
7593        );
7594        snapshot.safety_metrics = vec![safety_metric];
7595
7596        // Governance
7597        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
7598            seed + 85,
7599            esg_cfg.governance.board_size,
7600            esg_cfg.governance.independence_target,
7601        );
7602        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
7603
7604        // Supplier ESG assessments
7605        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
7606            esg_cfg.supply_chain_esg.clone(),
7607            seed + 86,
7608        );
7609        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
7610            .master_data
7611            .vendors
7612            .iter()
7613            .map(|v| datasynth_generators::VendorInput {
7614                vendor_id: v.vendor_id.clone(),
7615                country: v.country.clone(),
7616                industry: format!("{:?}", v.vendor_type).to_lowercase(),
7617                quality_score: None,
7618            })
7619            .collect();
7620        snapshot.supplier_assessments =
7621            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
7622
7623        // Disclosures
7624        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
7625            seed + 87,
7626            esg_cfg.reporting.clone(),
7627            esg_cfg.climate_scenarios.clone(),
7628        );
7629        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
7630        snapshot.disclosures = disclosure_gen.generate_disclosures(
7631            entity_id,
7632            &snapshot.materiality,
7633            start_date,
7634            end_date,
7635        );
7636        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
7637        snapshot.disclosure_count = snapshot.disclosures.len();
7638
7639        // Anomaly injection
7640        if esg_cfg.anomaly_rate > 0.0 {
7641            let mut anomaly_injector =
7642                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
7643            let mut labels = Vec::new();
7644            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
7645            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
7646            labels.extend(
7647                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
7648            );
7649            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
7650            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
7651            snapshot.anomaly_labels = labels;
7652        }
7653
7654        stats.esg_emission_count = snapshot.emission_count;
7655        stats.esg_disclosure_count = snapshot.disclosure_count;
7656
7657        info!(
7658            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
7659            snapshot.emission_count,
7660            snapshot.disclosure_count,
7661            snapshot.supplier_assessments.len()
7662        );
7663        self.check_resources_with_log("post-esg")?;
7664
7665        Ok(snapshot)
7666    }
7667
7668    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
7669    fn phase_treasury_data(
7670        &mut self,
7671        document_flows: &DocumentFlowSnapshot,
7672        subledger: &SubledgerSnapshot,
7673        intercompany: &IntercompanySnapshot,
7674        stats: &mut EnhancedGenerationStatistics,
7675    ) -> SynthResult<TreasurySnapshot> {
7676        if !self.phase_config.generate_treasury {
7677            debug!("Phase 22: Skipped (treasury generation disabled)");
7678            return Ok(TreasurySnapshot::default());
7679        }
7680        let degradation = self.check_resources()?;
7681        if degradation >= DegradationLevel::Reduced {
7682            debug!(
7683                "Phase skipped due to resource pressure (degradation: {:?})",
7684                degradation
7685            );
7686            return Ok(TreasurySnapshot::default());
7687        }
7688        info!("Phase 22: Generating Treasury Data");
7689
7690        let seed = self.seed;
7691        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7692            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7693        let currency = self
7694            .config
7695            .companies
7696            .first()
7697            .map(|c| c.currency.as_str())
7698            .unwrap_or("USD");
7699        let entity_id = self
7700            .config
7701            .companies
7702            .first()
7703            .map(|c| c.code.as_str())
7704            .unwrap_or("1000");
7705
7706        let mut snapshot = TreasurySnapshot::default();
7707
7708        // Generate debt instruments
7709        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
7710            self.config.treasury.debt.clone(),
7711            seed + 90,
7712        );
7713        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
7714
7715        // Generate hedging instruments (IR swaps for floating-rate debt)
7716        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
7717            self.config.treasury.hedging.clone(),
7718            seed + 91,
7719        );
7720        for debt in &snapshot.debt_instruments {
7721            if debt.rate_type == InterestRateType::Variable {
7722                let swap = hedge_gen.generate_ir_swap(
7723                    currency,
7724                    debt.principal,
7725                    debt.origination_date,
7726                    debt.maturity_date,
7727                );
7728                snapshot.hedging_instruments.push(swap);
7729            }
7730        }
7731
7732        // Build FX exposures from foreign-currency payments and generate
7733        // FX forwards + hedge relationship designations via generate() API.
7734        {
7735            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
7736            for payment in &document_flows.payments {
7737                if payment.currency != currency {
7738                    let entry = fx_map
7739                        .entry(payment.currency.clone())
7740                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
7741                    entry.0 += payment.amount;
7742                    // Use the latest settlement date among grouped payments
7743                    if payment.header.document_date > entry.1 {
7744                        entry.1 = payment.header.document_date;
7745                    }
7746                }
7747            }
7748            if !fx_map.is_empty() {
7749                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
7750                    .into_iter()
7751                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
7752                        datasynth_generators::treasury::FxExposure {
7753                            currency_pair: format!("{foreign_ccy}/{currency}"),
7754                            foreign_currency: foreign_ccy,
7755                            net_amount,
7756                            settlement_date,
7757                            description: "AP payment FX exposure".to_string(),
7758                        }
7759                    })
7760                    .collect();
7761                let (fx_instruments, fx_relationships) =
7762                    hedge_gen.generate(start_date, &fx_exposures);
7763                snapshot.hedging_instruments.extend(fx_instruments);
7764                snapshot.hedge_relationships.extend(fx_relationships);
7765            }
7766        }
7767
7768        // Inject anomalies if configured
7769        if self.config.treasury.anomaly_rate > 0.0 {
7770            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
7771                seed + 92,
7772                self.config.treasury.anomaly_rate,
7773            );
7774            let mut labels = Vec::new();
7775            labels.extend(
7776                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
7777            );
7778            snapshot.treasury_anomaly_labels = labels;
7779        }
7780
7781        // Generate cash positions from payment flows
7782        if self.config.treasury.cash_positioning.enabled {
7783            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
7784
7785            // AP payments as outflows
7786            for payment in &document_flows.payments {
7787                cash_flows.push(datasynth_generators::treasury::CashFlow {
7788                    date: payment.header.document_date,
7789                    account_id: format!("{entity_id}-MAIN"),
7790                    amount: payment.amount,
7791                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
7792                });
7793            }
7794
7795            // Customer receipts (from O2C chains) as inflows
7796            for chain in &document_flows.o2c_chains {
7797                if let Some(ref receipt) = chain.customer_receipt {
7798                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7799                        date: receipt.header.document_date,
7800                        account_id: format!("{entity_id}-MAIN"),
7801                        amount: receipt.amount,
7802                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7803                    });
7804                }
7805                // Remainder receipts (follow-up to partial payments)
7806                for receipt in &chain.remainder_receipts {
7807                    cash_flows.push(datasynth_generators::treasury::CashFlow {
7808                        date: receipt.header.document_date,
7809                        account_id: format!("{entity_id}-MAIN"),
7810                        amount: receipt.amount,
7811                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
7812                    });
7813                }
7814            }
7815
7816            if !cash_flows.is_empty() {
7817                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
7818                    self.config.treasury.cash_positioning.clone(),
7819                    seed + 93,
7820                );
7821                let account_id = format!("{entity_id}-MAIN");
7822                snapshot.cash_positions = cash_gen.generate(
7823                    entity_id,
7824                    &account_id,
7825                    currency,
7826                    &cash_flows,
7827                    start_date,
7828                    start_date + chrono::Months::new(self.config.global.period_months),
7829                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
7830                );
7831            }
7832        }
7833
7834        // Generate cash forecasts from AR/AP aging
7835        if self.config.treasury.cash_forecasting.enabled {
7836            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7837
7838            // Build AR aging items from subledger AR invoices
7839            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
7840                .ar_invoices
7841                .iter()
7842                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7843                .map(|inv| {
7844                    let days_past_due = if inv.due_date < end_date {
7845                        (end_date - inv.due_date).num_days().max(0) as u32
7846                    } else {
7847                        0
7848                    };
7849                    datasynth_generators::treasury::ArAgingItem {
7850                        expected_date: inv.due_date,
7851                        amount: inv.amount_remaining,
7852                        days_past_due,
7853                        document_id: inv.invoice_number.clone(),
7854                    }
7855                })
7856                .collect();
7857
7858            // Build AP aging items from subledger AP invoices
7859            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
7860                .ap_invoices
7861                .iter()
7862                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
7863                .map(|inv| datasynth_generators::treasury::ApAgingItem {
7864                    payment_date: inv.due_date,
7865                    amount: inv.amount_remaining,
7866                    document_id: inv.invoice_number.clone(),
7867                })
7868                .collect();
7869
7870            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
7871                self.config.treasury.cash_forecasting.clone(),
7872                seed + 94,
7873            );
7874            let forecast = forecast_gen.generate(
7875                entity_id,
7876                currency,
7877                end_date,
7878                &ar_items,
7879                &ap_items,
7880                &[], // scheduled disbursements - empty for now
7881            );
7882            snapshot.cash_forecasts.push(forecast);
7883        }
7884
7885        // Generate cash pools and sweeps
7886        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
7887            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7888            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
7889                self.config.treasury.cash_pooling.clone(),
7890                seed + 95,
7891            );
7892
7893            // Create a pool from available accounts
7894            let account_ids: Vec<String> = snapshot
7895                .cash_positions
7896                .iter()
7897                .map(|cp| cp.bank_account_id.clone())
7898                .collect::<std::collections::HashSet<_>>()
7899                .into_iter()
7900                .collect();
7901
7902            if let Some(pool) =
7903                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
7904            {
7905                // Generate sweeps - build participant balances from last cash position per account
7906                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
7907                for cp in &snapshot.cash_positions {
7908                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
7909                }
7910
7911                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
7912                    latest_balances
7913                        .into_iter()
7914                        .filter(|(id, _)| pool.participant_accounts.contains(id))
7915                        .map(
7916                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
7917                                account_id: id,
7918                                balance,
7919                            },
7920                        )
7921                        .collect();
7922
7923                let sweeps =
7924                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
7925                snapshot.cash_pool_sweeps = sweeps;
7926                snapshot.cash_pools.push(pool);
7927            }
7928        }
7929
7930        // Generate bank guarantees
7931        if self.config.treasury.bank_guarantees.enabled {
7932            let vendor_names: Vec<String> = self
7933                .master_data
7934                .vendors
7935                .iter()
7936                .map(|v| v.name.clone())
7937                .collect();
7938            if !vendor_names.is_empty() {
7939                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
7940                    self.config.treasury.bank_guarantees.clone(),
7941                    seed + 96,
7942                );
7943                snapshot.bank_guarantees =
7944                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
7945            }
7946        }
7947
7948        // Generate netting runs from intercompany matched pairs
7949        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
7950            let entity_ids: Vec<String> = self
7951                .config
7952                .companies
7953                .iter()
7954                .map(|c| c.code.clone())
7955                .collect();
7956            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
7957                .matched_pairs
7958                .iter()
7959                .map(|mp| {
7960                    (
7961                        mp.seller_company.clone(),
7962                        mp.buyer_company.clone(),
7963                        mp.amount,
7964                    )
7965                })
7966                .collect();
7967            if entity_ids.len() >= 2 {
7968                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
7969                    self.config.treasury.netting.clone(),
7970                    seed + 97,
7971                );
7972                snapshot.netting_runs = netting_gen.generate(
7973                    &entity_ids,
7974                    currency,
7975                    start_date,
7976                    self.config.global.period_months,
7977                    &ic_amounts,
7978                );
7979            }
7980        }
7981
7982        // Generate treasury journal entries from the instruments we just created.
7983        {
7984            use datasynth_generators::treasury::TreasuryAccounting;
7985
7986            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7987            let mut treasury_jes = Vec::new();
7988
7989            // Debt interest accrual JEs
7990            if !snapshot.debt_instruments.is_empty() {
7991                let debt_jes =
7992                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
7993                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
7994                treasury_jes.extend(debt_jes);
7995            }
7996
7997            // Hedge mark-to-market JEs
7998            if !snapshot.hedging_instruments.is_empty() {
7999                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
8000                    &snapshot.hedging_instruments,
8001                    &snapshot.hedge_relationships,
8002                    end_date,
8003                    entity_id,
8004                );
8005                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
8006                treasury_jes.extend(hedge_jes);
8007            }
8008
8009            // Cash pool sweep JEs
8010            if !snapshot.cash_pool_sweeps.is_empty() {
8011                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
8012                    &snapshot.cash_pool_sweeps,
8013                    entity_id,
8014                );
8015                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
8016                treasury_jes.extend(sweep_jes);
8017            }
8018
8019            if !treasury_jes.is_empty() {
8020                debug!("Total treasury journal entries: {}", treasury_jes.len());
8021            }
8022            snapshot.journal_entries = treasury_jes;
8023        }
8024
8025        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
8026        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
8027        stats.cash_position_count = snapshot.cash_positions.len();
8028        stats.cash_forecast_count = snapshot.cash_forecasts.len();
8029        stats.cash_pool_count = snapshot.cash_pools.len();
8030
8031        info!(
8032            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
8033            snapshot.debt_instruments.len(),
8034            snapshot.hedging_instruments.len(),
8035            snapshot.cash_positions.len(),
8036            snapshot.cash_forecasts.len(),
8037            snapshot.cash_pools.len(),
8038            snapshot.bank_guarantees.len(),
8039            snapshot.netting_runs.len(),
8040            snapshot.journal_entries.len(),
8041        );
8042        self.check_resources_with_log("post-treasury")?;
8043
8044        Ok(snapshot)
8045    }
8046
8047    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
8048    fn phase_project_accounting(
8049        &mut self,
8050        document_flows: &DocumentFlowSnapshot,
8051        hr: &HrSnapshot,
8052        stats: &mut EnhancedGenerationStatistics,
8053    ) -> SynthResult<ProjectAccountingSnapshot> {
8054        if !self.phase_config.generate_project_accounting {
8055            debug!("Phase 23: Skipped (project accounting disabled)");
8056            return Ok(ProjectAccountingSnapshot::default());
8057        }
8058        let degradation = self.check_resources()?;
8059        if degradation >= DegradationLevel::Reduced {
8060            debug!(
8061                "Phase skipped due to resource pressure (degradation: {:?})",
8062                degradation
8063            );
8064            return Ok(ProjectAccountingSnapshot::default());
8065        }
8066        info!("Phase 23: Generating Project Accounting Data");
8067
8068        let seed = self.seed;
8069        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8070            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8071        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8072        let company_code = self
8073            .config
8074            .companies
8075            .first()
8076            .map(|c| c.code.as_str())
8077            .unwrap_or("1000");
8078
8079        let mut snapshot = ProjectAccountingSnapshot::default();
8080
8081        // Generate projects with WBS hierarchies
8082        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
8083            self.config.project_accounting.clone(),
8084            seed + 95,
8085        );
8086        let pool = project_gen.generate(company_code, start_date, end_date);
8087        snapshot.projects = pool.projects.clone();
8088
8089        // Link source documents to projects for cost allocation
8090        {
8091            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
8092                Vec::new();
8093
8094            // Time entries
8095            for te in &hr.time_entries {
8096                let total_hours = te.hours_regular + te.hours_overtime;
8097                if total_hours > 0.0 {
8098                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8099                        id: te.entry_id.clone(),
8100                        entity_id: company_code.to_string(),
8101                        date: te.date,
8102                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
8103                            .unwrap_or(rust_decimal::Decimal::ZERO),
8104                        source_type: CostSourceType::TimeEntry,
8105                        hours: Some(
8106                            rust_decimal::Decimal::from_f64_retain(total_hours)
8107                                .unwrap_or(rust_decimal::Decimal::ZERO),
8108                        ),
8109                    });
8110                }
8111            }
8112
8113            // Expense reports
8114            for er in &hr.expense_reports {
8115                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8116                    id: er.report_id.clone(),
8117                    entity_id: company_code.to_string(),
8118                    date: er.submission_date,
8119                    amount: er.total_amount,
8120                    source_type: CostSourceType::ExpenseReport,
8121                    hours: None,
8122                });
8123            }
8124
8125            // Purchase orders
8126            for po in &document_flows.purchase_orders {
8127                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8128                    id: po.header.document_id.clone(),
8129                    entity_id: company_code.to_string(),
8130                    date: po.header.document_date,
8131                    amount: po.total_net_amount,
8132                    source_type: CostSourceType::PurchaseOrder,
8133                    hours: None,
8134                });
8135            }
8136
8137            // Vendor invoices
8138            for vi in &document_flows.vendor_invoices {
8139                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
8140                    id: vi.header.document_id.clone(),
8141                    entity_id: company_code.to_string(),
8142                    date: vi.header.document_date,
8143                    amount: vi.payable_amount,
8144                    source_type: CostSourceType::VendorInvoice,
8145                    hours: None,
8146                });
8147            }
8148
8149            if !source_docs.is_empty() && !pool.projects.is_empty() {
8150                let mut cost_gen =
8151                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
8152                        self.config.project_accounting.cost_allocation.clone(),
8153                        seed + 99,
8154                    );
8155                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
8156            }
8157        }
8158
8159        // Generate change orders
8160        if self.config.project_accounting.change_orders.enabled {
8161            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
8162                self.config.project_accounting.change_orders.clone(),
8163                seed + 96,
8164            );
8165            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
8166        }
8167
8168        // Generate milestones
8169        if self.config.project_accounting.milestones.enabled {
8170            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
8171                self.config.project_accounting.milestones.clone(),
8172                seed + 97,
8173            );
8174            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
8175        }
8176
8177        // Generate earned value metrics (needs cost lines, so only if we have projects)
8178        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
8179            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
8180                self.config.project_accounting.earned_value.clone(),
8181                seed + 98,
8182            );
8183            snapshot.earned_value_metrics =
8184                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
8185        }
8186
8187        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
8188        if self.config.project_accounting.revenue_recognition.enabled
8189            && !snapshot.projects.is_empty()
8190            && !snapshot.cost_lines.is_empty()
8191        {
8192            use datasynth_generators::project_accounting::RevenueGenerator;
8193            let rev_config = self.config.project_accounting.revenue_recognition.clone();
8194            let avg_contract_value =
8195                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
8196                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
8197
8198            // Build contract value tuples: only customer-type projects get revenue recognition.
8199            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
8200            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
8201                snapshot
8202                    .projects
8203                    .iter()
8204                    .filter(|p| {
8205                        matches!(
8206                            p.project_type,
8207                            datasynth_core::models::ProjectType::Customer
8208                        )
8209                    })
8210                    .map(|p| {
8211                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
8212                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
8213                        // budget × 1.25 → contract value
8214                        } else {
8215                            avg_contract_value
8216                        };
8217                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
8218                        (p.project_id.clone(), cv, etc)
8219                    })
8220                    .collect();
8221
8222            if !contract_values.is_empty() {
8223                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
8224                snapshot.revenue_records = rev_gen.generate(
8225                    &snapshot.projects,
8226                    &snapshot.cost_lines,
8227                    &contract_values,
8228                    start_date,
8229                    end_date,
8230                );
8231                debug!(
8232                    "Generated {} revenue recognition records for {} customer projects",
8233                    snapshot.revenue_records.len(),
8234                    contract_values.len()
8235                );
8236            }
8237        }
8238
8239        stats.project_count = snapshot.projects.len();
8240        stats.project_change_order_count = snapshot.change_orders.len();
8241        stats.project_cost_line_count = snapshot.cost_lines.len();
8242
8243        info!(
8244            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
8245            snapshot.projects.len(),
8246            snapshot.change_orders.len(),
8247            snapshot.milestones.len(),
8248            snapshot.earned_value_metrics.len()
8249        );
8250        self.check_resources_with_log("post-project-accounting")?;
8251
8252        Ok(snapshot)
8253    }
8254
8255    /// Phase 24: Generate process evolution and organizational events.
8256    fn phase_evolution_events(
8257        &mut self,
8258        stats: &mut EnhancedGenerationStatistics,
8259    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
8260        if !self.phase_config.generate_evolution_events {
8261            debug!("Phase 24: Skipped (evolution events disabled)");
8262            return Ok((Vec::new(), Vec::new()));
8263        }
8264        info!("Phase 24: Generating Process Evolution + Organizational Events");
8265
8266        let seed = self.seed;
8267        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8268            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8269        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8270
8271        // Process evolution events
8272        let mut proc_gen =
8273            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
8274                seed + 100,
8275            );
8276        let process_events = proc_gen.generate_events(start_date, end_date);
8277
8278        // Organizational events
8279        let company_codes: Vec<String> = self
8280            .config
8281            .companies
8282            .iter()
8283            .map(|c| c.code.clone())
8284            .collect();
8285        let mut org_gen =
8286            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
8287                seed + 101,
8288            );
8289        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
8290
8291        stats.process_evolution_event_count = process_events.len();
8292        stats.organizational_event_count = org_events.len();
8293
8294        info!(
8295            "Evolution events generated: {} process evolution, {} organizational",
8296            process_events.len(),
8297            org_events.len()
8298        );
8299        self.check_resources_with_log("post-evolution-events")?;
8300
8301        Ok((process_events, org_events))
8302    }
8303
8304    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
8305    /// data recovery, and regulatory changes).
8306    fn phase_disruption_events(
8307        &self,
8308        stats: &mut EnhancedGenerationStatistics,
8309    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
8310        if !self.config.organizational_events.enabled {
8311            debug!("Phase 24b: Skipped (organizational events disabled)");
8312            return Ok(Vec::new());
8313        }
8314        info!("Phase 24b: Generating Disruption Events");
8315
8316        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8317            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8318        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8319
8320        let company_codes: Vec<String> = self
8321            .config
8322            .companies
8323            .iter()
8324            .map(|c| c.code.clone())
8325            .collect();
8326
8327        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
8328        let events = gen.generate(start_date, end_date, &company_codes);
8329
8330        stats.disruption_event_count = events.len();
8331        info!("Disruption events generated: {} events", events.len());
8332        self.check_resources_with_log("post-disruption-events")?;
8333
8334        Ok(events)
8335    }
8336
8337    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
8338    ///
8339    /// Produces paired examples where each pair contains the original clean JE
8340    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
8341    /// split transaction). Useful for training anomaly detection models with
8342    /// known ground truth.
8343    fn phase_counterfactuals(
8344        &self,
8345        journal_entries: &[JournalEntry],
8346        stats: &mut EnhancedGenerationStatistics,
8347    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
8348        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
8349            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
8350            return Ok(Vec::new());
8351        }
8352        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
8353
8354        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
8355
8356        let mut gen = CounterfactualGenerator::new(self.seed + 110);
8357
8358        // Rotating set of specs to produce diverse mutation types
8359        let specs = [
8360            CounterfactualSpec::ScaleAmount { factor: 2.5 },
8361            CounterfactualSpec::ShiftDate { days: -14 },
8362            CounterfactualSpec::SelfApprove,
8363            CounterfactualSpec::SplitTransaction { split_count: 3 },
8364        ];
8365
8366        let pairs: Vec<_> = journal_entries
8367            .iter()
8368            .enumerate()
8369            .map(|(i, je)| {
8370                let spec = &specs[i % specs.len()];
8371                gen.generate(je, spec)
8372            })
8373            .collect();
8374
8375        stats.counterfactual_pair_count = pairs.len();
8376        info!(
8377            "Counterfactual pairs generated: {} pairs from {} journal entries",
8378            pairs.len(),
8379            journal_entries.len()
8380        );
8381        self.check_resources_with_log("post-counterfactuals")?;
8382
8383        Ok(pairs)
8384    }
8385
8386    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
8387    ///
8388    /// Uses the anomaly labels (from Phase 8) to determine which documents are
8389    /// fraudulent, then generates probabilistic red flags on all chain documents.
8390    /// Non-fraud documents also receive red flags at a lower rate (false positives)
8391    /// to produce realistic ML training data.
8392    fn phase_red_flags(
8393        &self,
8394        anomaly_labels: &AnomalyLabels,
8395        document_flows: &DocumentFlowSnapshot,
8396        stats: &mut EnhancedGenerationStatistics,
8397    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
8398        if !self.config.fraud.enabled {
8399            debug!("Phase 26: Skipped (fraud generation disabled)");
8400            return Ok(Vec::new());
8401        }
8402        info!("Phase 26: Generating Fraud Red-Flag Indicators");
8403
8404        use datasynth_generators::fraud::RedFlagGenerator;
8405
8406        let generator = RedFlagGenerator::new();
8407        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
8408
8409        // Build a set of document IDs that are known-fraudulent from anomaly labels.
8410        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
8411            .labels
8412            .iter()
8413            .filter(|label| label.anomaly_type.is_intentional())
8414            .map(|label| label.document_id.as_str())
8415            .collect();
8416
8417        let mut flags = Vec::new();
8418
8419        // Iterate P2P chains: use the purchase order document ID as the chain key.
8420        for chain in &document_flows.p2p_chains {
8421            let doc_id = &chain.purchase_order.header.document_id;
8422            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8423            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8424        }
8425
8426        // Iterate O2C chains: use the sales order document ID as the chain key.
8427        for chain in &document_flows.o2c_chains {
8428            let doc_id = &chain.sales_order.header.document_id;
8429            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
8430            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
8431        }
8432
8433        stats.red_flag_count = flags.len();
8434        info!(
8435            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
8436            flags.len(),
8437            document_flows.p2p_chains.len(),
8438            document_flows.o2c_chains.len(),
8439            fraud_doc_ids.len()
8440        );
8441        self.check_resources_with_log("post-red-flags")?;
8442
8443        Ok(flags)
8444    }
8445
8446    /// Phase 26b: Generate collusion rings from employee/vendor pools.
8447    ///
8448    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
8449    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
8450    /// advance them over the simulation period.
8451    fn phase_collusion_rings(
8452        &mut self,
8453        stats: &mut EnhancedGenerationStatistics,
8454    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
8455        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
8456            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
8457            return Ok(Vec::new());
8458        }
8459        info!("Phase 26b: Generating Collusion Rings");
8460
8461        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8462            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8463        let months = self.config.global.period_months;
8464
8465        let employee_ids: Vec<String> = self
8466            .master_data
8467            .employees
8468            .iter()
8469            .map(|e| e.employee_id.clone())
8470            .collect();
8471        let vendor_ids: Vec<String> = self
8472            .master_data
8473            .vendors
8474            .iter()
8475            .map(|v| v.vendor_id.clone())
8476            .collect();
8477
8478        let mut generator =
8479            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
8480        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
8481
8482        stats.collusion_ring_count = rings.len();
8483        info!(
8484            "Collusion rings generated: {} rings, total members: {}",
8485            rings.len(),
8486            rings
8487                .iter()
8488                .map(datasynth_generators::fraud::CollusionRing::size)
8489                .sum::<usize>()
8490        );
8491        self.check_resources_with_log("post-collusion-rings")?;
8492
8493        Ok(rings)
8494    }
8495
8496    /// Phase 27: Generate bi-temporal version chains for vendor entities.
8497    ///
8498    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
8499    /// master data changes over time, supporting bi-temporal audit queries.
8500    fn phase_temporal_attributes(
8501        &mut self,
8502        stats: &mut EnhancedGenerationStatistics,
8503    ) -> SynthResult<
8504        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
8505    > {
8506        if !self.config.temporal_attributes.enabled {
8507            debug!("Phase 27: Skipped (temporal attributes disabled)");
8508            return Ok(Vec::new());
8509        }
8510        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
8511
8512        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8513            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8514
8515        // Build a TemporalAttributeConfig from the user's config.
8516        // Since Phase 27 is already gated on temporal_attributes.enabled,
8517        // default to enabling version chains so users get actual mutations.
8518        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
8519            || self.config.temporal_attributes.enabled;
8520        let temporal_config = {
8521            let ta = &self.config.temporal_attributes;
8522            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
8523                .enabled(ta.enabled)
8524                .closed_probability(ta.valid_time.closed_probability)
8525                .avg_validity_days(ta.valid_time.avg_validity_days)
8526                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
8527                .with_version_chains(if generate_version_chains {
8528                    ta.avg_versions_per_entity
8529                } else {
8530                    1.0
8531                })
8532                .build()
8533        };
8534        // Apply backdating settings if configured
8535        let temporal_config = if self
8536            .config
8537            .temporal_attributes
8538            .transaction_time
8539            .allow_backdating
8540        {
8541            let mut c = temporal_config;
8542            c.transaction_time.allow_backdating = true;
8543            c.transaction_time.backdating_probability = self
8544                .config
8545                .temporal_attributes
8546                .transaction_time
8547                .backdating_probability;
8548            c.transaction_time.max_backdate_days = self
8549                .config
8550                .temporal_attributes
8551                .transaction_time
8552                .max_backdate_days;
8553            c
8554        } else {
8555            temporal_config
8556        };
8557        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
8558            temporal_config,
8559            self.seed + 130,
8560            start_date,
8561        );
8562
8563        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
8564            self.seed + 130,
8565            datasynth_core::GeneratorType::Vendor,
8566        );
8567
8568        let chains: Vec<_> = self
8569            .master_data
8570            .vendors
8571            .iter()
8572            .map(|vendor| {
8573                let id = uuid_factory.next();
8574                gen.generate_version_chain(vendor.clone(), id)
8575            })
8576            .collect();
8577
8578        stats.temporal_version_chain_count = chains.len();
8579        info!("Temporal version chains generated: {} chains", chains.len());
8580        self.check_resources_with_log("post-temporal-attributes")?;
8581
8582        Ok(chains)
8583    }
8584
8585    /// Phase 28: Build entity relationship graph and cross-process links.
8586    ///
8587    /// Part 1 (gated on `relationship_strength.enabled`): builds an
8588    /// `EntityGraph` from master-data vendor/customer entities and
8589    /// journal-entry-derived transaction summaries.
8590    ///
8591    /// Part 2 (gated on `cross_process_links.enabled`): extracts
8592    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
8593    /// generates inventory-movement cross-process links.
8594    fn phase_entity_relationships(
8595        &self,
8596        journal_entries: &[JournalEntry],
8597        document_flows: &DocumentFlowSnapshot,
8598        stats: &mut EnhancedGenerationStatistics,
8599    ) -> SynthResult<(
8600        Option<datasynth_core::models::EntityGraph>,
8601        Vec<datasynth_core::models::CrossProcessLink>,
8602    )> {
8603        use datasynth_generators::relationships::{
8604            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
8605            TransactionSummary,
8606        };
8607
8608        let rs_enabled = self.config.relationship_strength.enabled;
8609        let cpl_enabled = self.config.cross_process_links.enabled
8610            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
8611
8612        if !rs_enabled && !cpl_enabled {
8613            debug!(
8614                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
8615            );
8616            return Ok((None, Vec::new()));
8617        }
8618
8619        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
8620
8621        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8622            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8623
8624        let company_code = self
8625            .config
8626            .companies
8627            .first()
8628            .map(|c| c.code.as_str())
8629            .unwrap_or("1000");
8630
8631        // Build the generator with matching config flags
8632        let gen_config = EntityGraphConfig {
8633            enabled: rs_enabled,
8634            cross_process: datasynth_generators::relationships::CrossProcessConfig {
8635                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
8636                enable_return_flows: false,
8637                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
8638                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
8639                // Use higher link rate for small datasets to avoid probabilistic empty results
8640                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
8641                    1.0
8642                } else {
8643                    0.30
8644                },
8645                ..Default::default()
8646            },
8647            strength_config: datasynth_generators::relationships::StrengthConfig {
8648                transaction_volume_weight: self
8649                    .config
8650                    .relationship_strength
8651                    .calculation
8652                    .transaction_volume_weight,
8653                transaction_count_weight: self
8654                    .config
8655                    .relationship_strength
8656                    .calculation
8657                    .transaction_count_weight,
8658                duration_weight: self
8659                    .config
8660                    .relationship_strength
8661                    .calculation
8662                    .relationship_duration_weight,
8663                recency_weight: self.config.relationship_strength.calculation.recency_weight,
8664                mutual_connections_weight: self
8665                    .config
8666                    .relationship_strength
8667                    .calculation
8668                    .mutual_connections_weight,
8669                recency_half_life_days: self
8670                    .config
8671                    .relationship_strength
8672                    .calculation
8673                    .recency_half_life_days,
8674            },
8675            ..Default::default()
8676        };
8677
8678        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
8679
8680        // --- Part 1: Entity Relationship Graph ---
8681        let entity_graph = if rs_enabled {
8682            // Build EntitySummary lists from master data
8683            let vendor_summaries: Vec<EntitySummary> = self
8684                .master_data
8685                .vendors
8686                .iter()
8687                .map(|v| {
8688                    EntitySummary::new(
8689                        &v.vendor_id,
8690                        &v.name,
8691                        datasynth_core::models::GraphEntityType::Vendor,
8692                        start_date,
8693                    )
8694                })
8695                .collect();
8696
8697            let customer_summaries: Vec<EntitySummary> = self
8698                .master_data
8699                .customers
8700                .iter()
8701                .map(|c| {
8702                    EntitySummary::new(
8703                        &c.customer_id,
8704                        &c.name,
8705                        datasynth_core::models::GraphEntityType::Customer,
8706                        start_date,
8707                    )
8708                })
8709                .collect();
8710
8711            // Build transaction summaries from journal entries.
8712            // Key = (company_code, trading_partner) for entries that have a
8713            // trading partner.  This captures intercompany flows and any JE
8714            // whose line items carry a trading_partner reference.
8715            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
8716                std::collections::HashMap::new();
8717
8718            for je in journal_entries {
8719                let cc = je.header.company_code.clone();
8720                let posting_date = je.header.posting_date;
8721                for line in &je.lines {
8722                    if let Some(ref tp) = line.trading_partner {
8723                        let amount = if line.debit_amount > line.credit_amount {
8724                            line.debit_amount
8725                        } else {
8726                            line.credit_amount
8727                        };
8728                        let entry = txn_summaries
8729                            .entry((cc.clone(), tp.clone()))
8730                            .or_insert_with(|| TransactionSummary {
8731                                total_volume: rust_decimal::Decimal::ZERO,
8732                                transaction_count: 0,
8733                                first_transaction_date: posting_date,
8734                                last_transaction_date: posting_date,
8735                                related_entities: std::collections::HashSet::new(),
8736                            });
8737                        entry.total_volume += amount;
8738                        entry.transaction_count += 1;
8739                        if posting_date < entry.first_transaction_date {
8740                            entry.first_transaction_date = posting_date;
8741                        }
8742                        if posting_date > entry.last_transaction_date {
8743                            entry.last_transaction_date = posting_date;
8744                        }
8745                        entry.related_entities.insert(cc.clone());
8746                    }
8747                }
8748            }
8749
8750            // Also extract transaction relationships from document flow chains.
8751            // P2P chains: Company → Vendor relationships
8752            for chain in &document_flows.p2p_chains {
8753                let cc = chain.purchase_order.header.company_code.clone();
8754                let vendor_id = chain.purchase_order.vendor_id.clone();
8755                let po_date = chain.purchase_order.header.document_date;
8756                let amount = chain.purchase_order.total_net_amount;
8757
8758                let entry = txn_summaries
8759                    .entry((cc.clone(), vendor_id))
8760                    .or_insert_with(|| TransactionSummary {
8761                        total_volume: rust_decimal::Decimal::ZERO,
8762                        transaction_count: 0,
8763                        first_transaction_date: po_date,
8764                        last_transaction_date: po_date,
8765                        related_entities: std::collections::HashSet::new(),
8766                    });
8767                entry.total_volume += amount;
8768                entry.transaction_count += 1;
8769                if po_date < entry.first_transaction_date {
8770                    entry.first_transaction_date = po_date;
8771                }
8772                if po_date > entry.last_transaction_date {
8773                    entry.last_transaction_date = po_date;
8774                }
8775                entry.related_entities.insert(cc);
8776            }
8777
8778            // O2C chains: Company → Customer relationships
8779            for chain in &document_flows.o2c_chains {
8780                let cc = chain.sales_order.header.company_code.clone();
8781                let customer_id = chain.sales_order.customer_id.clone();
8782                let so_date = chain.sales_order.header.document_date;
8783                let amount = chain.sales_order.total_net_amount;
8784
8785                let entry = txn_summaries
8786                    .entry((cc.clone(), customer_id))
8787                    .or_insert_with(|| TransactionSummary {
8788                        total_volume: rust_decimal::Decimal::ZERO,
8789                        transaction_count: 0,
8790                        first_transaction_date: so_date,
8791                        last_transaction_date: so_date,
8792                        related_entities: std::collections::HashSet::new(),
8793                    });
8794                entry.total_volume += amount;
8795                entry.transaction_count += 1;
8796                if so_date < entry.first_transaction_date {
8797                    entry.first_transaction_date = so_date;
8798                }
8799                if so_date > entry.last_transaction_date {
8800                    entry.last_transaction_date = so_date;
8801                }
8802                entry.related_entities.insert(cc);
8803            }
8804
8805            let as_of_date = journal_entries
8806                .last()
8807                .map(|je| je.header.posting_date)
8808                .unwrap_or(start_date);
8809
8810            let graph = gen.generate_entity_graph(
8811                company_code,
8812                as_of_date,
8813                &vendor_summaries,
8814                &customer_summaries,
8815                &txn_summaries,
8816            );
8817
8818            info!(
8819                "Entity relationship graph: {} nodes, {} edges",
8820                graph.nodes.len(),
8821                graph.edges.len()
8822            );
8823            stats.entity_relationship_node_count = graph.nodes.len();
8824            stats.entity_relationship_edge_count = graph.edges.len();
8825            Some(graph)
8826        } else {
8827            None
8828        };
8829
8830        // --- Part 2: Cross-Process Links ---
8831        let cross_process_links = if cpl_enabled {
8832            // Build GoodsReceiptRef from P2P chains
8833            let gr_refs: Vec<GoodsReceiptRef> = document_flows
8834                .p2p_chains
8835                .iter()
8836                .flat_map(|chain| {
8837                    let vendor_id = chain.purchase_order.vendor_id.clone();
8838                    let cc = chain.purchase_order.header.company_code.clone();
8839                    chain.goods_receipts.iter().flat_map(move |gr| {
8840                        gr.items.iter().filter_map({
8841                            let doc_id = gr.header.document_id.clone();
8842                            let v_id = vendor_id.clone();
8843                            let company = cc.clone();
8844                            let receipt_date = gr.header.document_date;
8845                            move |item| {
8846                                item.base
8847                                    .material_id
8848                                    .as_ref()
8849                                    .map(|mat_id| GoodsReceiptRef {
8850                                        document_id: doc_id.clone(),
8851                                        material_id: mat_id.clone(),
8852                                        quantity: item.base.quantity,
8853                                        receipt_date,
8854                                        vendor_id: v_id.clone(),
8855                                        company_code: company.clone(),
8856                                    })
8857                            }
8858                        })
8859                    })
8860                })
8861                .collect();
8862
8863            // Build DeliveryRef from O2C chains
8864            let del_refs: Vec<DeliveryRef> = document_flows
8865                .o2c_chains
8866                .iter()
8867                .flat_map(|chain| {
8868                    let customer_id = chain.sales_order.customer_id.clone();
8869                    let cc = chain.sales_order.header.company_code.clone();
8870                    chain.deliveries.iter().flat_map(move |del| {
8871                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
8872                        del.items.iter().filter_map({
8873                            let doc_id = del.header.document_id.clone();
8874                            let c_id = customer_id.clone();
8875                            let company = cc.clone();
8876                            move |item| {
8877                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
8878                                    document_id: doc_id.clone(),
8879                                    material_id: mat_id.clone(),
8880                                    quantity: item.base.quantity,
8881                                    delivery_date,
8882                                    customer_id: c_id.clone(),
8883                                    company_code: company.clone(),
8884                                })
8885                            }
8886                        })
8887                    })
8888                })
8889                .collect();
8890
8891            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
8892            info!("Cross-process links generated: {} links", links.len());
8893            stats.cross_process_link_count = links.len();
8894            links
8895        } else {
8896            Vec::new()
8897        };
8898
8899        self.check_resources_with_log("post-entity-relationships")?;
8900        Ok((entity_graph, cross_process_links))
8901    }
8902
8903    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
8904    fn phase_industry_data(
8905        &self,
8906        stats: &mut EnhancedGenerationStatistics,
8907    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
8908        if !self.config.industry_specific.enabled {
8909            return None;
8910        }
8911        info!("Phase 29: Generating industry-specific data");
8912        let output = datasynth_generators::industry::factory::generate_industry_output(
8913            self.config.global.industry,
8914        );
8915        stats.industry_gl_account_count = output.gl_accounts.len();
8916        info!(
8917            "Industry data generated: {} GL accounts for {:?}",
8918            output.gl_accounts.len(),
8919            self.config.global.industry
8920        );
8921        Some(output)
8922    }
8923
8924    /// Phase 3b: Generate opening balances for each company.
8925    fn phase_opening_balances(
8926        &mut self,
8927        coa: &Arc<ChartOfAccounts>,
8928        stats: &mut EnhancedGenerationStatistics,
8929    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
8930        if !self.config.balance.generate_opening_balances {
8931            debug!("Phase 3b: Skipped (opening balance generation disabled)");
8932            return Ok(Vec::new());
8933        }
8934        info!("Phase 3b: Generating Opening Balances");
8935
8936        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8937            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8938        let fiscal_year = start_date.year();
8939
8940        let industry = match self.config.global.industry {
8941            IndustrySector::Manufacturing => IndustryType::Manufacturing,
8942            IndustrySector::Retail => IndustryType::Retail,
8943            IndustrySector::FinancialServices => IndustryType::Financial,
8944            IndustrySector::Healthcare => IndustryType::Healthcare,
8945            IndustrySector::Technology => IndustryType::Technology,
8946            _ => IndustryType::Manufacturing,
8947        };
8948
8949        let config = datasynth_generators::OpeningBalanceConfig {
8950            industry,
8951            ..Default::default()
8952        };
8953        let mut gen =
8954            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
8955
8956        let mut results = Vec::new();
8957        for company in &self.config.companies {
8958            let spec = OpeningBalanceSpec::new(
8959                company.code.clone(),
8960                start_date,
8961                fiscal_year,
8962                company.currency.clone(),
8963                rust_decimal::Decimal::new(10_000_000, 0),
8964                industry,
8965            );
8966            let ob = gen.generate(&spec, coa, start_date, &company.code);
8967            results.push(ob);
8968        }
8969
8970        stats.opening_balance_count = results.len();
8971        info!("Opening balances generated: {} companies", results.len());
8972        self.check_resources_with_log("post-opening-balances")?;
8973
8974        Ok(results)
8975    }
8976
8977    /// Phase 9b: Reconcile GL control accounts to subledger balances.
8978    fn phase_subledger_reconciliation(
8979        &mut self,
8980        subledger: &SubledgerSnapshot,
8981        entries: &[JournalEntry],
8982        stats: &mut EnhancedGenerationStatistics,
8983    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
8984        if !self.config.balance.reconcile_subledgers {
8985            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
8986            return Ok(Vec::new());
8987        }
8988        info!("Phase 9b: Reconciling GL to subledger balances");
8989
8990        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8991            .map(|d| d + chrono::Months::new(self.config.global.period_months))
8992            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8993
8994        // Build GL balance map from journal entries using a balance tracker
8995        let tracker_config = BalanceTrackerConfig {
8996            validate_on_each_entry: false,
8997            track_history: false,
8998            fail_on_validation_error: false,
8999            ..Default::default()
9000        };
9001        let recon_currency = self
9002            .config
9003            .companies
9004            .first()
9005            .map(|c| c.currency.clone())
9006            .unwrap_or_else(|| "USD".to_string());
9007        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
9008        let validation_errors = tracker.apply_entries(entries);
9009        if !validation_errors.is_empty() {
9010            warn!(
9011                error_count = validation_errors.len(),
9012                "Balance tracker encountered validation errors during subledger reconciliation"
9013            );
9014            for err in &validation_errors {
9015                debug!("Balance validation error: {:?}", err);
9016            }
9017        }
9018
9019        let mut engine = datasynth_generators::ReconciliationEngine::new(
9020            datasynth_generators::ReconciliationConfig::default(),
9021        );
9022
9023        let mut results = Vec::new();
9024        let company_code = self
9025            .config
9026            .companies
9027            .first()
9028            .map(|c| c.code.as_str())
9029            .unwrap_or("1000");
9030
9031        // Reconcile AR
9032        if !subledger.ar_invoices.is_empty() {
9033            let gl_balance = tracker
9034                .get_account_balance(
9035                    company_code,
9036                    datasynth_core::accounts::control_accounts::AR_CONTROL,
9037                )
9038                .map(|b| b.closing_balance)
9039                .unwrap_or_default();
9040            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
9041            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
9042        }
9043
9044        // Reconcile AP
9045        if !subledger.ap_invoices.is_empty() {
9046            let gl_balance = tracker
9047                .get_account_balance(
9048                    company_code,
9049                    datasynth_core::accounts::control_accounts::AP_CONTROL,
9050                )
9051                .map(|b| b.closing_balance)
9052                .unwrap_or_default();
9053            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
9054            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
9055        }
9056
9057        // Reconcile FA
9058        if !subledger.fa_records.is_empty() {
9059            let gl_asset_balance = tracker
9060                .get_account_balance(
9061                    company_code,
9062                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
9063                )
9064                .map(|b| b.closing_balance)
9065                .unwrap_or_default();
9066            let gl_accum_depr_balance = tracker
9067                .get_account_balance(
9068                    company_code,
9069                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
9070                )
9071                .map(|b| b.closing_balance)
9072                .unwrap_or_default();
9073            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
9074                subledger.fa_records.iter().collect();
9075            let (asset_recon, depr_recon) = engine.reconcile_fa(
9076                company_code,
9077                end_date,
9078                gl_asset_balance,
9079                gl_accum_depr_balance,
9080                &fa_refs,
9081            );
9082            results.push(asset_recon);
9083            results.push(depr_recon);
9084        }
9085
9086        // Reconcile Inventory
9087        if !subledger.inventory_positions.is_empty() {
9088            let gl_balance = tracker
9089                .get_account_balance(
9090                    company_code,
9091                    datasynth_core::accounts::control_accounts::INVENTORY,
9092                )
9093                .map(|b| b.closing_balance)
9094                .unwrap_or_default();
9095            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
9096                subledger.inventory_positions.iter().collect();
9097            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
9098        }
9099
9100        stats.subledger_reconciliation_count = results.len();
9101        let passed = results.iter().filter(|r| r.is_balanced()).count();
9102        let failed = results.len() - passed;
9103        info!(
9104            "Subledger reconciliation: {} checks, {} passed, {} failed",
9105            results.len(),
9106            passed,
9107            failed
9108        );
9109        self.check_resources_with_log("post-subledger-reconciliation")?;
9110
9111        Ok(results)
9112    }
9113
9114    /// Generate the chart of accounts.
9115    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
9116        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
9117
9118        let coa_framework = self.resolve_coa_framework();
9119
9120        let mut gen = ChartOfAccountsGenerator::new(
9121            self.config.chart_of_accounts.complexity,
9122            self.config.global.industry,
9123            self.seed,
9124        )
9125        .with_coa_framework(coa_framework);
9126
9127        let coa = Arc::new(gen.generate());
9128        self.coa = Some(Arc::clone(&coa));
9129
9130        if let Some(pb) = pb {
9131            pb.finish_with_message("Chart of Accounts complete");
9132        }
9133
9134        Ok(coa)
9135    }
9136
9137    /// Generate master data entities.
9138    fn generate_master_data(&mut self) -> SynthResult<()> {
9139        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9140            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9141        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9142
9143        let total = self.config.companies.len() as u64 * 5; // 5 entity types
9144        let pb = self.create_progress_bar(total, "Generating Master Data");
9145
9146        // Resolve country pack once for all companies (uses primary company's country)
9147        let pack = self.primary_pack().clone();
9148
9149        // Capture config values needed inside the parallel closure
9150        let vendors_per_company = self.phase_config.vendors_per_company;
9151        let customers_per_company = self.phase_config.customers_per_company;
9152        let materials_per_company = self.phase_config.materials_per_company;
9153        let assets_per_company = self.phase_config.assets_per_company;
9154        let coa_framework = self.resolve_coa_framework();
9155
9156        // Generate all master data in parallel across companies.
9157        // Each company's data is independent, making this embarrassingly parallel.
9158        let per_company_results: Vec<_> = self
9159            .config
9160            .companies
9161            .par_iter()
9162            .enumerate()
9163            .map(|(i, company)| {
9164                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
9165                let pack = pack.clone();
9166
9167                // Generate vendors (offset counter so IDs are globally unique across companies)
9168                let mut vendor_gen = VendorGenerator::new(company_seed);
9169                vendor_gen.set_country_pack(pack.clone());
9170                vendor_gen.set_coa_framework(coa_framework);
9171                vendor_gen.set_counter_offset(i * vendors_per_company);
9172                // Wire vendor network config when enabled
9173                if self.config.vendor_network.enabled {
9174                    let vn = &self.config.vendor_network;
9175                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
9176                        enabled: true,
9177                        depth: vn.depth,
9178                        tier1_count: datasynth_generators::TierCountConfig::new(
9179                            vn.tier1.min,
9180                            vn.tier1.max,
9181                        ),
9182                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
9183                            vn.tier2_per_parent.min,
9184                            vn.tier2_per_parent.max,
9185                        ),
9186                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
9187                            vn.tier3_per_parent.min,
9188                            vn.tier3_per_parent.max,
9189                        ),
9190                        cluster_distribution: datasynth_generators::ClusterDistribution {
9191                            reliable_strategic: vn.clusters.reliable_strategic,
9192                            standard_operational: vn.clusters.standard_operational,
9193                            transactional: vn.clusters.transactional,
9194                            problematic: vn.clusters.problematic,
9195                        },
9196                        concentration_limits: datasynth_generators::ConcentrationLimits {
9197                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
9198                            max_top5: vn.dependencies.top_5_concentration,
9199                        },
9200                        ..datasynth_generators::VendorNetworkConfig::default()
9201                    });
9202                }
9203                let vendor_pool =
9204                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
9205
9206                // Generate customers (offset counter so IDs are globally unique across companies)
9207                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
9208                customer_gen.set_country_pack(pack.clone());
9209                customer_gen.set_coa_framework(coa_framework);
9210                customer_gen.set_counter_offset(i * customers_per_company);
9211                // Wire customer segmentation config when enabled
9212                if self.config.customer_segmentation.enabled {
9213                    let cs = &self.config.customer_segmentation;
9214                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
9215                        enabled: true,
9216                        segment_distribution: datasynth_generators::SegmentDistribution {
9217                            enterprise: cs.value_segments.enterprise.customer_share,
9218                            mid_market: cs.value_segments.mid_market.customer_share,
9219                            smb: cs.value_segments.smb.customer_share,
9220                            consumer: cs.value_segments.consumer.customer_share,
9221                        },
9222                        referral_config: datasynth_generators::ReferralConfig {
9223                            enabled: cs.networks.referrals.enabled,
9224                            referral_rate: cs.networks.referrals.referral_rate,
9225                            ..Default::default()
9226                        },
9227                        hierarchy_config: datasynth_generators::HierarchyConfig {
9228                            enabled: cs.networks.corporate_hierarchies.enabled,
9229                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
9230                            ..Default::default()
9231                        },
9232                        ..Default::default()
9233                    };
9234                    customer_gen.set_segmentation_config(seg_cfg);
9235                }
9236                let customer_pool = customer_gen.generate_customer_pool(
9237                    customers_per_company,
9238                    &company.code,
9239                    start_date,
9240                );
9241
9242                // Generate materials (offset counter so IDs are globally unique across companies)
9243                let mut material_gen = MaterialGenerator::new(company_seed + 200);
9244                material_gen.set_country_pack(pack.clone());
9245                material_gen.set_counter_offset(i * materials_per_company);
9246                let material_pool = material_gen.generate_material_pool(
9247                    materials_per_company,
9248                    &company.code,
9249                    start_date,
9250                );
9251
9252                // Generate fixed assets
9253                let mut asset_gen = AssetGenerator::new(company_seed + 300);
9254                let asset_pool = asset_gen.generate_asset_pool(
9255                    assets_per_company,
9256                    &company.code,
9257                    (start_date, end_date),
9258                );
9259
9260                // Generate employees
9261                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
9262                employee_gen.set_country_pack(pack);
9263                let employee_pool =
9264                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
9265
9266                // Generate employee change history (2-5 events per employee)
9267                let employee_change_history =
9268                    employee_gen.generate_all_change_history(&employee_pool, end_date);
9269
9270                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
9271                let employee_ids: Vec<String> = employee_pool
9272                    .employees
9273                    .iter()
9274                    .map(|e| e.employee_id.clone())
9275                    .collect();
9276                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
9277                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
9278
9279                (
9280                    vendor_pool.vendors,
9281                    customer_pool.customers,
9282                    material_pool.materials,
9283                    asset_pool.assets,
9284                    employee_pool.employees,
9285                    employee_change_history,
9286                    cost_centers,
9287                )
9288            })
9289            .collect();
9290
9291        // Aggregate results from all companies
9292        for (vendors, customers, materials, assets, employees, change_history, cost_centers) in
9293            per_company_results
9294        {
9295            self.master_data.vendors.extend(vendors);
9296            self.master_data.customers.extend(customers);
9297            self.master_data.materials.extend(materials);
9298            self.master_data.assets.extend(assets);
9299            self.master_data.employees.extend(employees);
9300            self.master_data.cost_centers.extend(cost_centers);
9301            self.master_data
9302                .employee_change_history
9303                .extend(change_history);
9304        }
9305
9306        if let Some(pb) = &pb {
9307            pb.inc(total);
9308        }
9309        if let Some(pb) = pb {
9310            pb.finish_with_message("Master data generation complete");
9311        }
9312
9313        Ok(())
9314    }
9315
9316    /// Generate document flows (P2P and O2C).
9317    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
9318        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9319            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9320
9321        // Generate P2P chains
9322        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
9323        let months = (self.config.global.period_months as usize).max(1);
9324        let p2p_count = self
9325            .phase_config
9326            .p2p_chains
9327            .min(self.master_data.vendors.len() * 2 * months);
9328        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
9329
9330        // Convert P2P config from schema to generator config
9331        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
9332        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
9333        p2p_gen.set_country_pack(self.primary_pack().clone());
9334
9335        for i in 0..p2p_count {
9336            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
9337            let materials: Vec<&Material> = self
9338                .master_data
9339                .materials
9340                .iter()
9341                .skip(i % self.master_data.materials.len().max(1))
9342                .take(2.min(self.master_data.materials.len()))
9343                .collect();
9344
9345            if materials.is_empty() {
9346                continue;
9347            }
9348
9349            let company = &self.config.companies[i % self.config.companies.len()];
9350            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
9351            let fiscal_period = po_date.month() as u8;
9352            let created_by = if self.master_data.employees.is_empty() {
9353                "SYSTEM"
9354            } else {
9355                self.master_data.employees[i % self.master_data.employees.len()]
9356                    .user_id
9357                    .as_str()
9358            };
9359
9360            let chain = p2p_gen.generate_chain(
9361                &company.code,
9362                vendor,
9363                &materials,
9364                po_date,
9365                start_date.year() as u16,
9366                fiscal_period,
9367                created_by,
9368            );
9369
9370            // Flatten documents
9371            flows.purchase_orders.push(chain.purchase_order.clone());
9372            flows.goods_receipts.extend(chain.goods_receipts.clone());
9373            if let Some(vi) = &chain.vendor_invoice {
9374                flows.vendor_invoices.push(vi.clone());
9375            }
9376            if let Some(payment) = &chain.payment {
9377                flows.payments.push(payment.clone());
9378            }
9379            for remainder in &chain.remainder_payments {
9380                flows.payments.push(remainder.clone());
9381            }
9382            flows.p2p_chains.push(chain);
9383
9384            if let Some(pb) = &pb {
9385                pb.inc(1);
9386            }
9387        }
9388
9389        if let Some(pb) = pb {
9390            pb.finish_with_message("P2P document flows complete");
9391        }
9392
9393        // Generate O2C chains
9394        // Cap at ~2 SOs per customer per month to keep order volume realistic
9395        let o2c_count = self
9396            .phase_config
9397            .o2c_chains
9398            .min(self.master_data.customers.len() * 2 * months);
9399        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
9400
9401        // Convert O2C config from schema to generator config
9402        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
9403        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
9404        o2c_gen.set_country_pack(self.primary_pack().clone());
9405
9406        for i in 0..o2c_count {
9407            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
9408            let materials: Vec<&Material> = self
9409                .master_data
9410                .materials
9411                .iter()
9412                .skip(i % self.master_data.materials.len().max(1))
9413                .take(2.min(self.master_data.materials.len()))
9414                .collect();
9415
9416            if materials.is_empty() {
9417                continue;
9418            }
9419
9420            let company = &self.config.companies[i % self.config.companies.len()];
9421            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
9422            let fiscal_period = so_date.month() as u8;
9423            let created_by = if self.master_data.employees.is_empty() {
9424                "SYSTEM"
9425            } else {
9426                self.master_data.employees[i % self.master_data.employees.len()]
9427                    .user_id
9428                    .as_str()
9429            };
9430
9431            let chain = o2c_gen.generate_chain(
9432                &company.code,
9433                customer,
9434                &materials,
9435                so_date,
9436                start_date.year() as u16,
9437                fiscal_period,
9438                created_by,
9439            );
9440
9441            // Flatten documents
9442            flows.sales_orders.push(chain.sales_order.clone());
9443            flows.deliveries.extend(chain.deliveries.clone());
9444            if let Some(ci) = &chain.customer_invoice {
9445                flows.customer_invoices.push(ci.clone());
9446            }
9447            if let Some(receipt) = &chain.customer_receipt {
9448                flows.payments.push(receipt.clone());
9449            }
9450            // Extract remainder receipts (follow-up to partial payments)
9451            for receipt in &chain.remainder_receipts {
9452                flows.payments.push(receipt.clone());
9453            }
9454            flows.o2c_chains.push(chain);
9455
9456            if let Some(pb) = &pb {
9457                pb.inc(1);
9458            }
9459        }
9460
9461        if let Some(pb) = pb {
9462            pb.finish_with_message("O2C document flows complete");
9463        }
9464
9465        // Collect all document cross-references from document headers.
9466        // Each document embeds references to its predecessor(s) via add_reference(); here we
9467        // denormalise them into a flat list for the document_references.json output file.
9468        {
9469            let mut refs = Vec::new();
9470            for doc in &flows.purchase_orders {
9471                refs.extend(doc.header.document_references.iter().cloned());
9472            }
9473            for doc in &flows.goods_receipts {
9474                refs.extend(doc.header.document_references.iter().cloned());
9475            }
9476            for doc in &flows.vendor_invoices {
9477                refs.extend(doc.header.document_references.iter().cloned());
9478            }
9479            for doc in &flows.sales_orders {
9480                refs.extend(doc.header.document_references.iter().cloned());
9481            }
9482            for doc in &flows.deliveries {
9483                refs.extend(doc.header.document_references.iter().cloned());
9484            }
9485            for doc in &flows.customer_invoices {
9486                refs.extend(doc.header.document_references.iter().cloned());
9487            }
9488            for doc in &flows.payments {
9489                refs.extend(doc.header.document_references.iter().cloned());
9490            }
9491            debug!(
9492                "Collected {} document cross-references from document headers",
9493                refs.len()
9494            );
9495            flows.document_references = refs;
9496        }
9497
9498        Ok(())
9499    }
9500
9501    /// Generate journal entries using parallel generation across multiple cores.
9502    fn generate_journal_entries(
9503        &mut self,
9504        coa: &Arc<ChartOfAccounts>,
9505    ) -> SynthResult<Vec<JournalEntry>> {
9506        use datasynth_core::traits::ParallelGenerator;
9507
9508        let total = self.calculate_total_transactions();
9509        let pb = self.create_progress_bar(total, "Generating Journal Entries");
9510
9511        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9512            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9513        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9514
9515        let company_codes: Vec<String> = self
9516            .config
9517            .companies
9518            .iter()
9519            .map(|c| c.code.clone())
9520            .collect();
9521
9522        let generator = JournalEntryGenerator::new_with_params(
9523            self.config.transactions.clone(),
9524            Arc::clone(coa),
9525            company_codes,
9526            start_date,
9527            end_date,
9528            self.seed,
9529        );
9530
9531        // Connect generated master data to ensure JEs reference real entities
9532        // Enable persona-based error injection for realistic human behavior
9533        // Pass fraud configuration for fraud injection
9534        let je_pack = self.primary_pack();
9535
9536        let mut generator = generator
9537            .with_master_data(
9538                &self.master_data.vendors,
9539                &self.master_data.customers,
9540                &self.master_data.materials,
9541            )
9542            .with_country_pack_names(je_pack)
9543            .with_country_pack_temporal(
9544                self.config.temporal_patterns.clone(),
9545                self.seed + 200,
9546                je_pack,
9547            )
9548            .with_persona_errors(true)
9549            .with_fraud_config(self.config.fraud.clone());
9550
9551        // Apply temporal drift if configured
9552        if self.config.temporal.enabled {
9553            let drift_config = self.config.temporal.to_core_config();
9554            generator = generator.with_drift_config(drift_config, self.seed + 100);
9555        }
9556
9557        // Check memory limit at start
9558        self.check_memory_limit()?;
9559
9560        // Determine parallelism: use available cores, but cap at total entries
9561        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
9562
9563        // Use parallel generation for datasets with 10K+ entries.
9564        // Below this threshold, the statistical properties of a single-seeded
9565        // generator (e.g. Benford compliance) are better preserved.
9566        let entries = if total >= 10_000 && num_threads > 1 {
9567            // Parallel path: split the generator across cores and generate in parallel.
9568            // Each sub-generator gets a unique seed for deterministic, independent generation.
9569            let sub_generators = generator.split(num_threads);
9570            let entries_per_thread = total as usize / num_threads;
9571            let remainder = total as usize % num_threads;
9572
9573            let batches: Vec<Vec<JournalEntry>> = sub_generators
9574                .into_par_iter()
9575                .enumerate()
9576                .map(|(i, mut gen)| {
9577                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
9578                    gen.generate_batch(count)
9579                })
9580                .collect();
9581
9582            // Merge all batches into a single Vec
9583            let entries = JournalEntryGenerator::merge_results(batches);
9584
9585            if let Some(pb) = &pb {
9586                pb.inc(total);
9587            }
9588            entries
9589        } else {
9590            // Sequential path for small datasets (< 1000 entries)
9591            let mut entries = Vec::with_capacity(total as usize);
9592            for _ in 0..total {
9593                let entry = generator.generate();
9594                entries.push(entry);
9595                if let Some(pb) = &pb {
9596                    pb.inc(1);
9597                }
9598            }
9599            entries
9600        };
9601
9602        if let Some(pb) = pb {
9603            pb.finish_with_message("Journal entries complete");
9604        }
9605
9606        Ok(entries)
9607    }
9608
9609    /// Generate journal entries from document flows.
9610    ///
9611    /// This creates proper GL entries for each document in the P2P and O2C flows,
9612    /// ensuring that document activity is reflected in the general ledger.
9613    fn generate_jes_from_document_flows(
9614        &mut self,
9615        flows: &DocumentFlowSnapshot,
9616    ) -> SynthResult<Vec<JournalEntry>> {
9617        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
9618        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
9619
9620        let je_config = match self.resolve_coa_framework() {
9621            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
9622            CoAFramework::GermanSkr04 => {
9623                let fa = datasynth_core::FrameworkAccounts::german_gaap();
9624                DocumentFlowJeConfig::from(&fa)
9625            }
9626            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
9627        };
9628
9629        let populate_fec = je_config.populate_fec_fields;
9630        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
9631
9632        // Build auxiliary account lookup from vendor/customer master data so that
9633        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
9634        // PCG "4010001") instead of raw partner IDs.
9635        if populate_fec {
9636            let mut aux_lookup = std::collections::HashMap::new();
9637            for vendor in &self.master_data.vendors {
9638                if let Some(ref aux) = vendor.auxiliary_gl_account {
9639                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
9640                }
9641            }
9642            for customer in &self.master_data.customers {
9643                if let Some(ref aux) = customer.auxiliary_gl_account {
9644                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
9645                }
9646            }
9647            if !aux_lookup.is_empty() {
9648                generator.set_auxiliary_account_lookup(aux_lookup);
9649            }
9650        }
9651
9652        let mut entries = Vec::new();
9653
9654        // Generate JEs from P2P chains
9655        for chain in &flows.p2p_chains {
9656            let chain_entries = generator.generate_from_p2p_chain(chain);
9657            entries.extend(chain_entries);
9658            if let Some(pb) = &pb {
9659                pb.inc(1);
9660            }
9661        }
9662
9663        // Generate JEs from O2C chains
9664        for chain in &flows.o2c_chains {
9665            let chain_entries = generator.generate_from_o2c_chain(chain);
9666            entries.extend(chain_entries);
9667            if let Some(pb) = &pb {
9668                pb.inc(1);
9669            }
9670        }
9671
9672        if let Some(pb) = pb {
9673            pb.finish_with_message(format!(
9674                "Generated {} JEs from document flows",
9675                entries.len()
9676            ));
9677        }
9678
9679        Ok(entries)
9680    }
9681
9682    /// Generate journal entries from payroll runs.
9683    ///
9684    /// Creates one JE per payroll run:
9685    /// - DR Salaries & Wages (6100) for gross pay
9686    /// - CR Payroll Clearing (9100) for gross pay
9687    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
9688        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
9689
9690        let mut jes = Vec::with_capacity(payroll_runs.len());
9691
9692        for run in payroll_runs {
9693            let mut je = JournalEntry::new_simple(
9694                format!("JE-PAYROLL-{}", run.payroll_id),
9695                run.company_code.clone(),
9696                run.run_date,
9697                format!("Payroll {}", run.payroll_id),
9698            );
9699
9700            // Debit Salaries & Wages for gross pay
9701            je.add_line(JournalEntryLine {
9702                line_number: 1,
9703                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
9704                debit_amount: run.total_gross,
9705                reference: Some(run.payroll_id.clone()),
9706                text: Some(format!(
9707                    "Payroll {} ({} employees)",
9708                    run.payroll_id, run.employee_count
9709                )),
9710                ..Default::default()
9711            });
9712
9713            // Credit Payroll Clearing for gross pay
9714            je.add_line(JournalEntryLine {
9715                line_number: 2,
9716                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
9717                credit_amount: run.total_gross,
9718                reference: Some(run.payroll_id.clone()),
9719                ..Default::default()
9720            });
9721
9722            jes.push(je);
9723        }
9724
9725        jes
9726    }
9727
9728    /// Link document flows to subledger records.
9729    ///
9730    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
9731    /// ensuring subledger data is coherent with document flow data.
9732    fn link_document_flows_to_subledgers(
9733        &mut self,
9734        flows: &DocumentFlowSnapshot,
9735    ) -> SynthResult<SubledgerSnapshot> {
9736        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
9737        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
9738
9739        // Build vendor/customer name maps from master data for realistic subledger names
9740        let vendor_names: std::collections::HashMap<String, String> = self
9741            .master_data
9742            .vendors
9743            .iter()
9744            .map(|v| (v.vendor_id.clone(), v.name.clone()))
9745            .collect();
9746        let customer_names: std::collections::HashMap<String, String> = self
9747            .master_data
9748            .customers
9749            .iter()
9750            .map(|c| (c.customer_id.clone(), c.name.clone()))
9751            .collect();
9752
9753        let mut linker = DocumentFlowLinker::new()
9754            .with_vendor_names(vendor_names)
9755            .with_customer_names(customer_names);
9756
9757        // Convert vendor invoices to AP invoices
9758        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
9759        if let Some(pb) = &pb {
9760            pb.inc(flows.vendor_invoices.len() as u64);
9761        }
9762
9763        // Convert customer invoices to AR invoices
9764        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
9765        if let Some(pb) = &pb {
9766            pb.inc(flows.customer_invoices.len() as u64);
9767        }
9768
9769        if let Some(pb) = pb {
9770            pb.finish_with_message(format!(
9771                "Linked {} AP and {} AR invoices",
9772                ap_invoices.len(),
9773                ar_invoices.len()
9774            ));
9775        }
9776
9777        Ok(SubledgerSnapshot {
9778            ap_invoices,
9779            ar_invoices,
9780            fa_records: Vec::new(),
9781            inventory_positions: Vec::new(),
9782            inventory_movements: Vec::new(),
9783            // Aging reports are computed after payment settlement in phase_document_flows.
9784            ar_aging_reports: Vec::new(),
9785            ap_aging_reports: Vec::new(),
9786            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
9787            depreciation_runs: Vec::new(),
9788            inventory_valuations: Vec::new(),
9789            // Dunning runs and letters are populated in phase_document_flows after AR aging.
9790            dunning_runs: Vec::new(),
9791            dunning_letters: Vec::new(),
9792        })
9793    }
9794
9795    /// Generate OCPM events from document flows.
9796    ///
9797    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
9798    /// capturing the object-centric process perspective.
9799    #[allow(clippy::too_many_arguments)]
9800    fn generate_ocpm_events(
9801        &mut self,
9802        flows: &DocumentFlowSnapshot,
9803        sourcing: &SourcingSnapshot,
9804        hr: &HrSnapshot,
9805        manufacturing: &ManufacturingSnapshot,
9806        banking: &BankingSnapshot,
9807        audit: &AuditSnapshot,
9808        financial_reporting: &FinancialReportingSnapshot,
9809    ) -> SynthResult<OcpmSnapshot> {
9810        let total_chains = flows.p2p_chains.len()
9811            + flows.o2c_chains.len()
9812            + sourcing.sourcing_projects.len()
9813            + hr.payroll_runs.len()
9814            + manufacturing.production_orders.len()
9815            + banking.customers.len()
9816            + audit.engagements.len()
9817            + financial_reporting.bank_reconciliations.len();
9818        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
9819
9820        // Create OCPM event log with standard types
9821        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
9822        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
9823
9824        // Configure the OCPM generator
9825        let ocpm_config = OcpmGeneratorConfig {
9826            generate_p2p: true,
9827            generate_o2c: true,
9828            generate_s2c: !sourcing.sourcing_projects.is_empty(),
9829            generate_h2r: !hr.payroll_runs.is_empty(),
9830            generate_mfg: !manufacturing.production_orders.is_empty(),
9831            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
9832            generate_bank: !banking.customers.is_empty(),
9833            generate_audit: !audit.engagements.is_empty(),
9834            happy_path_rate: 0.75,
9835            exception_path_rate: 0.20,
9836            error_path_rate: 0.05,
9837            add_duration_variability: true,
9838            duration_std_dev_factor: 0.3,
9839        };
9840        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
9841        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
9842
9843        // Get available users for resource assignment
9844        let available_users: Vec<String> = self
9845            .master_data
9846            .employees
9847            .iter()
9848            .take(20)
9849            .map(|e| e.user_id.clone())
9850            .collect();
9851
9852        // Deterministic base date from config (avoids Utc::now() non-determinism)
9853        let fallback_date =
9854            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
9855        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9856            .unwrap_or(fallback_date);
9857        let base_midnight = base_date
9858            .and_hms_opt(0, 0, 0)
9859            .expect("midnight is always valid");
9860        let base_datetime =
9861            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
9862
9863        // Helper closure to add case results to event log
9864        let add_result = |event_log: &mut OcpmEventLog,
9865                          result: datasynth_ocpm::CaseGenerationResult| {
9866            for event in result.events {
9867                event_log.add_event(event);
9868            }
9869            for object in result.objects {
9870                event_log.add_object(object);
9871            }
9872            for relationship in result.relationships {
9873                event_log.add_relationship(relationship);
9874            }
9875            for corr in result.correlation_events {
9876                event_log.add_correlation_event(corr);
9877            }
9878            event_log.add_case(result.case_trace);
9879        };
9880
9881        // Generate events from P2P chains
9882        for chain in &flows.p2p_chains {
9883            let po = &chain.purchase_order;
9884            let documents = P2pDocuments::new(
9885                &po.header.document_id,
9886                &po.vendor_id,
9887                &po.header.company_code,
9888                po.total_net_amount,
9889                &po.header.currency,
9890                &ocpm_uuid_factory,
9891            )
9892            .with_goods_receipt(
9893                chain
9894                    .goods_receipts
9895                    .first()
9896                    .map(|gr| gr.header.document_id.as_str())
9897                    .unwrap_or(""),
9898                &ocpm_uuid_factory,
9899            )
9900            .with_invoice(
9901                chain
9902                    .vendor_invoice
9903                    .as_ref()
9904                    .map(|vi| vi.header.document_id.as_str())
9905                    .unwrap_or(""),
9906                &ocpm_uuid_factory,
9907            )
9908            .with_payment(
9909                chain
9910                    .payment
9911                    .as_ref()
9912                    .map(|p| p.header.document_id.as_str())
9913                    .unwrap_or(""),
9914                &ocpm_uuid_factory,
9915            );
9916
9917            let start_time =
9918                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
9919            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
9920            add_result(&mut event_log, result);
9921
9922            if let Some(pb) = &pb {
9923                pb.inc(1);
9924            }
9925        }
9926
9927        // Generate events from O2C chains
9928        for chain in &flows.o2c_chains {
9929            let so = &chain.sales_order;
9930            let documents = O2cDocuments::new(
9931                &so.header.document_id,
9932                &so.customer_id,
9933                &so.header.company_code,
9934                so.total_net_amount,
9935                &so.header.currency,
9936                &ocpm_uuid_factory,
9937            )
9938            .with_delivery(
9939                chain
9940                    .deliveries
9941                    .first()
9942                    .map(|d| d.header.document_id.as_str())
9943                    .unwrap_or(""),
9944                &ocpm_uuid_factory,
9945            )
9946            .with_invoice(
9947                chain
9948                    .customer_invoice
9949                    .as_ref()
9950                    .map(|ci| ci.header.document_id.as_str())
9951                    .unwrap_or(""),
9952                &ocpm_uuid_factory,
9953            )
9954            .with_receipt(
9955                chain
9956                    .customer_receipt
9957                    .as_ref()
9958                    .map(|r| r.header.document_id.as_str())
9959                    .unwrap_or(""),
9960                &ocpm_uuid_factory,
9961            );
9962
9963            let start_time =
9964                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
9965            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
9966            add_result(&mut event_log, result);
9967
9968            if let Some(pb) = &pb {
9969                pb.inc(1);
9970            }
9971        }
9972
9973        // Generate events from S2C sourcing projects
9974        for project in &sourcing.sourcing_projects {
9975            // Find vendor from contracts or qualifications
9976            let vendor_id = sourcing
9977                .contracts
9978                .iter()
9979                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
9980                .map(|c| c.vendor_id.clone())
9981                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
9982                .or_else(|| {
9983                    self.master_data
9984                        .vendors
9985                        .first()
9986                        .map(|v| v.vendor_id.clone())
9987                })
9988                .unwrap_or_else(|| "V000".to_string());
9989            let mut docs = S2cDocuments::new(
9990                &project.project_id,
9991                &vendor_id,
9992                &project.company_code,
9993                project.estimated_annual_spend,
9994                &ocpm_uuid_factory,
9995            );
9996            // Link RFx if available
9997            if let Some(rfx) = sourcing
9998                .rfx_events
9999                .iter()
10000                .find(|r| r.sourcing_project_id == project.project_id)
10001            {
10002                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
10003                // Link winning bid (status == Accepted)
10004                if let Some(bid) = sourcing.bids.iter().find(|b| {
10005                    b.rfx_id == rfx.rfx_id
10006                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
10007                }) {
10008                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
10009                }
10010            }
10011            // Link contract
10012            if let Some(contract) = sourcing
10013                .contracts
10014                .iter()
10015                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
10016            {
10017                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
10018            }
10019            let start_time = base_datetime - chrono::Duration::days(90);
10020            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
10021            add_result(&mut event_log, result);
10022
10023            if let Some(pb) = &pb {
10024                pb.inc(1);
10025            }
10026        }
10027
10028        // Generate events from H2R payroll runs
10029        for run in &hr.payroll_runs {
10030            // Use first matching payroll line item's employee, or fallback
10031            let employee_id = hr
10032                .payroll_line_items
10033                .iter()
10034                .find(|li| li.payroll_id == run.payroll_id)
10035                .map(|li| li.employee_id.as_str())
10036                .unwrap_or("EMP000");
10037            let docs = H2rDocuments::new(
10038                &run.payroll_id,
10039                employee_id,
10040                &run.company_code,
10041                run.total_gross,
10042                &ocpm_uuid_factory,
10043            )
10044            .with_time_entries(
10045                hr.time_entries
10046                    .iter()
10047                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
10048                    .take(5)
10049                    .map(|t| t.entry_id.as_str())
10050                    .collect(),
10051            );
10052            let start_time = base_datetime - chrono::Duration::days(30);
10053            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
10054            add_result(&mut event_log, result);
10055
10056            if let Some(pb) = &pb {
10057                pb.inc(1);
10058            }
10059        }
10060
10061        // Generate events from MFG production orders
10062        for order in &manufacturing.production_orders {
10063            let mut docs = MfgDocuments::new(
10064                &order.order_id,
10065                &order.material_id,
10066                &order.company_code,
10067                order.planned_quantity,
10068                &ocpm_uuid_factory,
10069            )
10070            .with_operations(
10071                order
10072                    .operations
10073                    .iter()
10074                    .map(|o| format!("OP-{:04}", o.operation_number))
10075                    .collect::<Vec<_>>()
10076                    .iter()
10077                    .map(std::string::String::as_str)
10078                    .collect(),
10079            );
10080            // Link quality inspection if available (via reference_id matching order_id)
10081            if let Some(insp) = manufacturing
10082                .quality_inspections
10083                .iter()
10084                .find(|i| i.reference_id == order.order_id)
10085            {
10086                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
10087            }
10088            // Link cycle count if available (match by material_id in items)
10089            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
10090                cc.items
10091                    .iter()
10092                    .any(|item| item.material_id == order.material_id)
10093            }) {
10094                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
10095            }
10096            let start_time = base_datetime - chrono::Duration::days(60);
10097            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
10098            add_result(&mut event_log, result);
10099
10100            if let Some(pb) = &pb {
10101                pb.inc(1);
10102            }
10103        }
10104
10105        // Generate events from Banking customers
10106        for customer in &banking.customers {
10107            let customer_id_str = customer.customer_id.to_string();
10108            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
10109            // Link accounts (primary_owner_id matches customer_id)
10110            if let Some(account) = banking
10111                .accounts
10112                .iter()
10113                .find(|a| a.primary_owner_id == customer.customer_id)
10114            {
10115                let account_id_str = account.account_id.to_string();
10116                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
10117                // Link transactions for this account
10118                let txn_strs: Vec<String> = banking
10119                    .transactions
10120                    .iter()
10121                    .filter(|t| t.account_id == account.account_id)
10122                    .take(10)
10123                    .map(|t| t.transaction_id.to_string())
10124                    .collect();
10125                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
10126                let txn_amounts: Vec<rust_decimal::Decimal> = banking
10127                    .transactions
10128                    .iter()
10129                    .filter(|t| t.account_id == account.account_id)
10130                    .take(10)
10131                    .map(|t| t.amount)
10132                    .collect();
10133                if !txn_ids.is_empty() {
10134                    docs = docs.with_transactions(txn_ids, txn_amounts);
10135                }
10136            }
10137            let start_time = base_datetime - chrono::Duration::days(180);
10138            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
10139            add_result(&mut event_log, result);
10140
10141            if let Some(pb) = &pb {
10142                pb.inc(1);
10143            }
10144        }
10145
10146        // Generate events from Audit engagements
10147        for engagement in &audit.engagements {
10148            let engagement_id_str = engagement.engagement_id.to_string();
10149            let docs = AuditDocuments::new(
10150                &engagement_id_str,
10151                &engagement.client_entity_id,
10152                &ocpm_uuid_factory,
10153            )
10154            .with_workpapers(
10155                audit
10156                    .workpapers
10157                    .iter()
10158                    .filter(|w| w.engagement_id == engagement.engagement_id)
10159                    .take(10)
10160                    .map(|w| w.workpaper_id.to_string())
10161                    .collect::<Vec<_>>()
10162                    .iter()
10163                    .map(std::string::String::as_str)
10164                    .collect(),
10165            )
10166            .with_evidence(
10167                audit
10168                    .evidence
10169                    .iter()
10170                    .filter(|e| e.engagement_id == engagement.engagement_id)
10171                    .take(10)
10172                    .map(|e| e.evidence_id.to_string())
10173                    .collect::<Vec<_>>()
10174                    .iter()
10175                    .map(std::string::String::as_str)
10176                    .collect(),
10177            )
10178            .with_risks(
10179                audit
10180                    .risk_assessments
10181                    .iter()
10182                    .filter(|r| r.engagement_id == engagement.engagement_id)
10183                    .take(5)
10184                    .map(|r| r.risk_id.to_string())
10185                    .collect::<Vec<_>>()
10186                    .iter()
10187                    .map(std::string::String::as_str)
10188                    .collect(),
10189            )
10190            .with_findings(
10191                audit
10192                    .findings
10193                    .iter()
10194                    .filter(|f| f.engagement_id == engagement.engagement_id)
10195                    .take(5)
10196                    .map(|f| f.finding_id.to_string())
10197                    .collect::<Vec<_>>()
10198                    .iter()
10199                    .map(std::string::String::as_str)
10200                    .collect(),
10201            )
10202            .with_judgments(
10203                audit
10204                    .judgments
10205                    .iter()
10206                    .filter(|j| j.engagement_id == engagement.engagement_id)
10207                    .take(5)
10208                    .map(|j| j.judgment_id.to_string())
10209                    .collect::<Vec<_>>()
10210                    .iter()
10211                    .map(std::string::String::as_str)
10212                    .collect(),
10213            );
10214            let start_time = base_datetime - chrono::Duration::days(120);
10215            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
10216            add_result(&mut event_log, result);
10217
10218            if let Some(pb) = &pb {
10219                pb.inc(1);
10220            }
10221        }
10222
10223        // Generate events from Bank Reconciliations
10224        for recon in &financial_reporting.bank_reconciliations {
10225            let docs = BankReconDocuments::new(
10226                &recon.reconciliation_id,
10227                &recon.bank_account_id,
10228                &recon.company_code,
10229                recon.bank_ending_balance,
10230                &ocpm_uuid_factory,
10231            )
10232            .with_statement_lines(
10233                recon
10234                    .statement_lines
10235                    .iter()
10236                    .take(20)
10237                    .map(|l| l.line_id.as_str())
10238                    .collect(),
10239            )
10240            .with_reconciling_items(
10241                recon
10242                    .reconciling_items
10243                    .iter()
10244                    .take(10)
10245                    .map(|i| i.item_id.as_str())
10246                    .collect(),
10247            );
10248            let start_time = base_datetime - chrono::Duration::days(30);
10249            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
10250            add_result(&mut event_log, result);
10251
10252            if let Some(pb) = &pb {
10253                pb.inc(1);
10254            }
10255        }
10256
10257        // Compute process variants
10258        event_log.compute_variants();
10259
10260        let summary = event_log.summary();
10261
10262        if let Some(pb) = pb {
10263            pb.finish_with_message(format!(
10264                "Generated {} OCPM events, {} objects",
10265                summary.event_count, summary.object_count
10266            ));
10267        }
10268
10269        Ok(OcpmSnapshot {
10270            event_count: summary.event_count,
10271            object_count: summary.object_count,
10272            case_count: summary.case_count,
10273            event_log: Some(event_log),
10274        })
10275    }
10276
10277    /// Inject anomalies into journal entries.
10278    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
10279        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
10280
10281        // Read anomaly rates from config instead of using hardcoded values.
10282        // Priority: anomaly_injection config > fraud config > default 0.02
10283        let total_rate = if self.config.anomaly_injection.enabled {
10284            self.config.anomaly_injection.rates.total_rate
10285        } else if self.config.fraud.enabled {
10286            self.config.fraud.fraud_rate
10287        } else {
10288            0.02
10289        };
10290
10291        let fraud_rate = if self.config.anomaly_injection.enabled {
10292            self.config.anomaly_injection.rates.fraud_rate
10293        } else {
10294            AnomalyRateConfig::default().fraud_rate
10295        };
10296
10297        let error_rate = if self.config.anomaly_injection.enabled {
10298            self.config.anomaly_injection.rates.error_rate
10299        } else {
10300            AnomalyRateConfig::default().error_rate
10301        };
10302
10303        let process_issue_rate = if self.config.anomaly_injection.enabled {
10304            self.config.anomaly_injection.rates.process_rate
10305        } else {
10306            AnomalyRateConfig::default().process_issue_rate
10307        };
10308
10309        let anomaly_config = AnomalyInjectorConfig {
10310            rates: AnomalyRateConfig {
10311                total_rate,
10312                fraud_rate,
10313                error_rate,
10314                process_issue_rate,
10315                ..Default::default()
10316            },
10317            seed: self.seed + 5000,
10318            ..Default::default()
10319        };
10320
10321        let mut injector = AnomalyInjector::new(anomaly_config);
10322        let result = injector.process_entries(entries);
10323
10324        if let Some(pb) = &pb {
10325            pb.inc(entries.len() as u64);
10326            pb.finish_with_message("Anomaly injection complete");
10327        }
10328
10329        let mut by_type = HashMap::new();
10330        for label in &result.labels {
10331            *by_type
10332                .entry(format!("{:?}", label.anomaly_type))
10333                .or_insert(0) += 1;
10334        }
10335
10336        Ok(AnomalyLabels {
10337            labels: result.labels,
10338            summary: Some(result.summary),
10339            by_type,
10340        })
10341    }
10342
10343    /// Validate journal entries using running balance tracker.
10344    ///
10345    /// Applies all entries to the balance tracker and validates:
10346    /// - Each entry is internally balanced (debits = credits)
10347    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
10348    ///
10349    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
10350    /// excluded from balance validation as they may be intentionally unbalanced.
10351    fn validate_journal_entries(
10352        &mut self,
10353        entries: &[JournalEntry],
10354    ) -> SynthResult<BalanceValidationResult> {
10355        // Filter out entries with human errors as they may be intentionally unbalanced
10356        let clean_entries: Vec<&JournalEntry> = entries
10357            .iter()
10358            .filter(|e| {
10359                e.header
10360                    .header_text
10361                    .as_ref()
10362                    .map(|t| !t.contains("[HUMAN_ERROR:"))
10363                    .unwrap_or(true)
10364            })
10365            .collect();
10366
10367        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
10368
10369        // Configure tracker to not fail on errors (collect them instead)
10370        let config = BalanceTrackerConfig {
10371            validate_on_each_entry: false,   // We'll validate at the end
10372            track_history: false,            // Skip history for performance
10373            fail_on_validation_error: false, // Collect errors, don't fail
10374            ..Default::default()
10375        };
10376        let validation_currency = self
10377            .config
10378            .companies
10379            .first()
10380            .map(|c| c.currency.clone())
10381            .unwrap_or_else(|| "USD".to_string());
10382
10383        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
10384
10385        // Apply clean entries (without human errors)
10386        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
10387        let errors = tracker.apply_entries(&clean_refs);
10388
10389        if let Some(pb) = &pb {
10390            pb.inc(entries.len() as u64);
10391        }
10392
10393        // Check if any entries were unbalanced
10394        // Note: When fail_on_validation_error is false, errors are stored in tracker
10395        let has_unbalanced = tracker
10396            .get_validation_errors()
10397            .iter()
10398            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
10399
10400        // Validate balance sheet for each company
10401        // Include both returned errors and collected validation errors
10402        let mut all_errors = errors;
10403        all_errors.extend(tracker.get_validation_errors().iter().cloned());
10404        let company_codes: Vec<String> = self
10405            .config
10406            .companies
10407            .iter()
10408            .map(|c| c.code.clone())
10409            .collect();
10410
10411        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10412            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10413            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10414
10415        for company_code in &company_codes {
10416            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
10417                all_errors.push(e);
10418            }
10419        }
10420
10421        // Get statistics after all mutable operations are done
10422        let stats = tracker.get_statistics();
10423
10424        // Determine if balanced overall
10425        let is_balanced = all_errors.is_empty();
10426
10427        if let Some(pb) = pb {
10428            let msg = if is_balanced {
10429                "Balance validation passed"
10430            } else {
10431                "Balance validation completed with errors"
10432            };
10433            pb.finish_with_message(msg);
10434        }
10435
10436        Ok(BalanceValidationResult {
10437            validated: true,
10438            is_balanced,
10439            entries_processed: stats.entries_processed,
10440            total_debits: stats.total_debits,
10441            total_credits: stats.total_credits,
10442            accounts_tracked: stats.accounts_tracked,
10443            companies_tracked: stats.companies_tracked,
10444            validation_errors: all_errors,
10445            has_unbalanced_entries: has_unbalanced,
10446        })
10447    }
10448
10449    /// Inject data quality variations into journal entries.
10450    ///
10451    /// Applies typos, missing values, and format variations to make
10452    /// the synthetic data more realistic for testing data cleaning pipelines.
10453    fn inject_data_quality(
10454        &mut self,
10455        entries: &mut [JournalEntry],
10456    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
10457        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
10458
10459        // Build config from user-specified schema settings when data_quality is enabled;
10460        // otherwise fall back to the low-rate minimal() preset.
10461        let config = if self.config.data_quality.enabled {
10462            let dq = &self.config.data_quality;
10463            DataQualityConfig {
10464                enable_missing_values: dq.missing_values.enabled,
10465                missing_values: datasynth_generators::MissingValueConfig {
10466                    global_rate: dq.effective_missing_rate(),
10467                    ..Default::default()
10468                },
10469                enable_format_variations: dq.format_variations.enabled,
10470                format_variations: datasynth_generators::FormatVariationConfig {
10471                    date_variation_rate: dq.format_variations.dates.rate,
10472                    amount_variation_rate: dq.format_variations.amounts.rate,
10473                    identifier_variation_rate: dq.format_variations.identifiers.rate,
10474                    ..Default::default()
10475                },
10476                enable_duplicates: dq.duplicates.enabled,
10477                duplicates: datasynth_generators::DuplicateConfig {
10478                    duplicate_rate: dq.effective_duplicate_rate(),
10479                    ..Default::default()
10480                },
10481                enable_typos: dq.typos.enabled,
10482                typos: datasynth_generators::TypoConfig {
10483                    char_error_rate: dq.effective_typo_rate(),
10484                    ..Default::default()
10485                },
10486                enable_encoding_issues: dq.encoding_issues.enabled,
10487                encoding_issue_rate: dq.encoding_issues.rate,
10488                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
10489                track_statistics: true,
10490            }
10491        } else {
10492            DataQualityConfig::minimal()
10493        };
10494        let mut injector = DataQualityInjector::new(config);
10495
10496        // Wire country pack for locale-aware format baselines
10497        injector.set_country_pack(self.primary_pack().clone());
10498
10499        // Build context for missing value decisions
10500        let context = HashMap::new();
10501
10502        for entry in entries.iter_mut() {
10503            // Process header_text field (common target for typos)
10504            if let Some(text) = &entry.header.header_text {
10505                let processed = injector.process_text_field(
10506                    "header_text",
10507                    text,
10508                    &entry.header.document_id.to_string(),
10509                    &context,
10510                );
10511                match processed {
10512                    Some(new_text) if new_text != *text => {
10513                        entry.header.header_text = Some(new_text);
10514                    }
10515                    None => {
10516                        entry.header.header_text = None; // Missing value
10517                    }
10518                    _ => {}
10519                }
10520            }
10521
10522            // Process reference field
10523            if let Some(ref_text) = &entry.header.reference {
10524                let processed = injector.process_text_field(
10525                    "reference",
10526                    ref_text,
10527                    &entry.header.document_id.to_string(),
10528                    &context,
10529                );
10530                match processed {
10531                    Some(new_text) if new_text != *ref_text => {
10532                        entry.header.reference = Some(new_text);
10533                    }
10534                    None => {
10535                        entry.header.reference = None;
10536                    }
10537                    _ => {}
10538                }
10539            }
10540
10541            // Process user_persona field (potential for typos in user IDs)
10542            let user_persona = entry.header.user_persona.clone();
10543            if let Some(processed) = injector.process_text_field(
10544                "user_persona",
10545                &user_persona,
10546                &entry.header.document_id.to_string(),
10547                &context,
10548            ) {
10549                if processed != user_persona {
10550                    entry.header.user_persona = processed;
10551                }
10552            }
10553
10554            // Process line items
10555            for line in &mut entry.lines {
10556                // Process line description if present
10557                if let Some(ref text) = line.line_text {
10558                    let processed = injector.process_text_field(
10559                        "line_text",
10560                        text,
10561                        &entry.header.document_id.to_string(),
10562                        &context,
10563                    );
10564                    match processed {
10565                        Some(new_text) if new_text != *text => {
10566                            line.line_text = Some(new_text);
10567                        }
10568                        None => {
10569                            line.line_text = None;
10570                        }
10571                        _ => {}
10572                    }
10573                }
10574
10575                // Process cost_center if present
10576                if let Some(cc) = &line.cost_center {
10577                    let processed = injector.process_text_field(
10578                        "cost_center",
10579                        cc,
10580                        &entry.header.document_id.to_string(),
10581                        &context,
10582                    );
10583                    match processed {
10584                        Some(new_cc) if new_cc != *cc => {
10585                            line.cost_center = Some(new_cc);
10586                        }
10587                        None => {
10588                            line.cost_center = None;
10589                        }
10590                        _ => {}
10591                    }
10592                }
10593            }
10594
10595            if let Some(pb) = &pb {
10596                pb.inc(1);
10597            }
10598        }
10599
10600        if let Some(pb) = pb {
10601            pb.finish_with_message("Data quality injection complete");
10602        }
10603
10604        let quality_issues = injector.issues().to_vec();
10605        Ok((injector.stats().clone(), quality_issues))
10606    }
10607
10608    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
10609    ///
10610    /// Creates complete audit documentation for each company in the configuration,
10611    /// following ISA standards:
10612    /// - ISA 210/220: Engagement acceptance and terms
10613    /// - ISA 230: Audit documentation (workpapers)
10614    /// - ISA 265: Control deficiencies (findings)
10615    /// - ISA 315/330: Risk assessment and response
10616    /// - ISA 500: Audit evidence
10617    /// - ISA 200: Professional judgment
10618    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
10619        // Check if FSM-driven audit generation is enabled
10620        let use_fsm = self
10621            .config
10622            .audit
10623            .fsm
10624            .as_ref()
10625            .map(|f| f.enabled)
10626            .unwrap_or(false);
10627
10628        if use_fsm {
10629            return self.generate_audit_data_with_fsm(entries);
10630        }
10631
10632        // --- Legacy (non-FSM) audit generation follows ---
10633        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10634            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10635        let fiscal_year = start_date.year() as u16;
10636        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
10637
10638        // Calculate rough total revenue from entries for materiality
10639        let total_revenue: rust_decimal::Decimal = entries
10640            .iter()
10641            .flat_map(|e| e.lines.iter())
10642            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
10643            .map(|l| l.credit_amount)
10644            .sum();
10645
10646        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
10647        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
10648
10649        let mut snapshot = AuditSnapshot::default();
10650
10651        // Initialize generators
10652        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
10653        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
10654        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
10655        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
10656        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
10657        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
10658        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
10659        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
10660        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
10661        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
10662        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
10663        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
10664
10665        // Get list of accounts from CoA for risk assessment
10666        let accounts: Vec<String> = self
10667            .coa
10668            .as_ref()
10669            .map(|coa| {
10670                coa.get_postable_accounts()
10671                    .iter()
10672                    .map(|acc| acc.account_code().to_string())
10673                    .collect()
10674            })
10675            .unwrap_or_default();
10676
10677        // Generate engagements for each company
10678        for (i, company) in self.config.companies.iter().enumerate() {
10679            // Calculate company-specific revenue (proportional to volume weight)
10680            let company_revenue = total_revenue
10681                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
10682
10683            // Generate engagements for this company
10684            let engagements_for_company =
10685                self.phase_config.audit_engagements / self.config.companies.len().max(1);
10686            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
10687                1
10688            } else {
10689                0
10690            };
10691
10692            for _eng_idx in 0..(engagements_for_company + extra) {
10693                // Generate the engagement
10694                let mut engagement = engagement_gen.generate_engagement(
10695                    &company.code,
10696                    &company.name,
10697                    fiscal_year,
10698                    period_end,
10699                    company_revenue,
10700                    None, // Use default engagement type
10701                );
10702
10703                // Replace synthetic team IDs with real employee IDs from master data
10704                if !self.master_data.employees.is_empty() {
10705                    let emp_count = self.master_data.employees.len();
10706                    // Use employee IDs deterministically based on engagement index
10707                    let base = (i * 10 + _eng_idx) % emp_count;
10708                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
10709                        .employee_id
10710                        .clone();
10711                    engagement.engagement_manager_id = self.master_data.employees
10712                        [(base + 1) % emp_count]
10713                        .employee_id
10714                        .clone();
10715                    let real_team: Vec<String> = engagement
10716                        .team_member_ids
10717                        .iter()
10718                        .enumerate()
10719                        .map(|(j, _)| {
10720                            self.master_data.employees[(base + 2 + j) % emp_count]
10721                                .employee_id
10722                                .clone()
10723                        })
10724                        .collect();
10725                    engagement.team_member_ids = real_team;
10726                }
10727
10728                if let Some(pb) = &pb {
10729                    pb.inc(1);
10730                }
10731
10732                // Get team members from the engagement
10733                let team_members: Vec<String> = engagement.team_member_ids.clone();
10734
10735                // Generate workpapers for the engagement
10736                let workpapers =
10737                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members);
10738
10739                for wp in &workpapers {
10740                    if let Some(pb) = &pb {
10741                        pb.inc(1);
10742                    }
10743
10744                    // Generate evidence for each workpaper
10745                    let evidence = evidence_gen.generate_evidence_for_workpaper(
10746                        wp,
10747                        &team_members,
10748                        wp.preparer_date,
10749                    );
10750
10751                    for _ in &evidence {
10752                        if let Some(pb) = &pb {
10753                            pb.inc(1);
10754                        }
10755                    }
10756
10757                    snapshot.evidence.extend(evidence);
10758                }
10759
10760                // Generate risk assessments for the engagement
10761                let risks =
10762                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
10763
10764                for _ in &risks {
10765                    if let Some(pb) = &pb {
10766                        pb.inc(1);
10767                    }
10768                }
10769                snapshot.risk_assessments.extend(risks);
10770
10771                // Generate findings for the engagement
10772                let findings = finding_gen.generate_findings_for_engagement(
10773                    &engagement,
10774                    &workpapers,
10775                    &team_members,
10776                );
10777
10778                for _ in &findings {
10779                    if let Some(pb) = &pb {
10780                        pb.inc(1);
10781                    }
10782                }
10783                snapshot.findings.extend(findings);
10784
10785                // Generate professional judgments for the engagement
10786                let judgments =
10787                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
10788
10789                for _ in &judgments {
10790                    if let Some(pb) = &pb {
10791                        pb.inc(1);
10792                    }
10793                }
10794                snapshot.judgments.extend(judgments);
10795
10796                // ISA 505: External confirmations and responses
10797                let (confs, resps) =
10798                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
10799                snapshot.confirmations.extend(confs);
10800                snapshot.confirmation_responses.extend(resps);
10801
10802                // ISA 330: Procedure steps per workpaper
10803                let team_pairs: Vec<(String, String)> = team_members
10804                    .iter()
10805                    .map(|id| {
10806                        let name = self
10807                            .master_data
10808                            .employees
10809                            .iter()
10810                            .find(|e| e.employee_id == *id)
10811                            .map(|e| e.display_name.clone())
10812                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
10813                        (id.clone(), name)
10814                    })
10815                    .collect();
10816                for wp in &workpapers {
10817                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
10818                    snapshot.procedure_steps.extend(steps);
10819                }
10820
10821                // ISA 530: Samples per workpaper
10822                for wp in &workpapers {
10823                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
10824                        snapshot.samples.push(sample);
10825                    }
10826                }
10827
10828                // ISA 520: Analytical procedures
10829                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
10830                snapshot.analytical_results.extend(analytical);
10831
10832                // ISA 610: Internal audit function and reports
10833                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
10834                snapshot.ia_functions.push(ia_func);
10835                snapshot.ia_reports.extend(ia_reports);
10836
10837                // ISA 550: Related parties and transactions
10838                let vendor_names: Vec<String> = self
10839                    .master_data
10840                    .vendors
10841                    .iter()
10842                    .map(|v| v.name.clone())
10843                    .collect();
10844                let customer_names: Vec<String> = self
10845                    .master_data
10846                    .customers
10847                    .iter()
10848                    .map(|c| c.name.clone())
10849                    .collect();
10850                let (parties, rp_txns) =
10851                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
10852                snapshot.related_parties.extend(parties);
10853                snapshot.related_party_transactions.extend(rp_txns);
10854
10855                // Add workpapers after findings since findings need them
10856                snapshot.workpapers.extend(workpapers);
10857
10858                // Generate audit scope record for this engagement (one per engagement)
10859                {
10860                    let scope_id = format!(
10861                        "SCOPE-{}-{}",
10862                        engagement.engagement_id.simple(),
10863                        &engagement.client_entity_id
10864                    );
10865                    let scope = datasynth_core::models::audit::AuditScope::new(
10866                        scope_id.clone(),
10867                        engagement.engagement_id.to_string(),
10868                        engagement.client_entity_id.clone(),
10869                        engagement.materiality,
10870                    );
10871                    // Wire scope_id back to engagement
10872                    let mut eng = engagement;
10873                    eng.scope_id = Some(scope_id);
10874                    snapshot.audit_scopes.push(scope);
10875                    snapshot.engagements.push(eng);
10876                }
10877            }
10878        }
10879
10880        // ----------------------------------------------------------------
10881        // ISA 600: Group audit — component auditors, plan, instructions, reports
10882        // ----------------------------------------------------------------
10883        if self.config.companies.len() > 1 {
10884            // Use materiality from the first engagement if available, otherwise
10885            // derive a reasonable figure from total revenue.
10886            let group_materiality = snapshot
10887                .engagements
10888                .first()
10889                .map(|e| e.materiality)
10890                .unwrap_or_else(|| {
10891                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
10892                    total_revenue * pct
10893                });
10894
10895            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
10896            let group_engagement_id = snapshot
10897                .engagements
10898                .first()
10899                .map(|e| e.engagement_id.to_string())
10900                .unwrap_or_else(|| "GROUP-ENG".to_string());
10901
10902            let component_snapshot = component_gen.generate(
10903                &self.config.companies,
10904                group_materiality,
10905                &group_engagement_id,
10906                period_end,
10907            );
10908
10909            snapshot.component_auditors = component_snapshot.component_auditors;
10910            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
10911            snapshot.component_instructions = component_snapshot.component_instructions;
10912            snapshot.component_reports = component_snapshot.component_reports;
10913
10914            info!(
10915                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
10916                snapshot.component_auditors.len(),
10917                snapshot.component_instructions.len(),
10918                snapshot.component_reports.len(),
10919            );
10920        }
10921
10922        // ----------------------------------------------------------------
10923        // ISA 210: Engagement letters — one per engagement
10924        // ----------------------------------------------------------------
10925        {
10926            let applicable_framework = self
10927                .config
10928                .accounting_standards
10929                .framework
10930                .as_ref()
10931                .map(|f| format!("{f:?}"))
10932                .unwrap_or_else(|| "IFRS".to_string());
10933
10934            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
10935            let entity_count = self.config.companies.len();
10936
10937            for engagement in &snapshot.engagements {
10938                let company = self
10939                    .config
10940                    .companies
10941                    .iter()
10942                    .find(|c| c.code == engagement.client_entity_id);
10943                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
10944                let letter_date = engagement.planning_start;
10945                let letter = letter_gen.generate(
10946                    &engagement.engagement_id.to_string(),
10947                    &engagement.client_name,
10948                    entity_count,
10949                    engagement.period_end_date,
10950                    currency,
10951                    &applicable_framework,
10952                    letter_date,
10953                );
10954                snapshot.engagement_letters.push(letter);
10955            }
10956
10957            info!(
10958                "ISA 210 engagement letters: {} generated",
10959                snapshot.engagement_letters.len()
10960            );
10961        }
10962
10963        // ----------------------------------------------------------------
10964        // ISA 560 / IAS 10: Subsequent events
10965        // ----------------------------------------------------------------
10966        {
10967            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
10968            let entity_codes: Vec<String> = self
10969                .config
10970                .companies
10971                .iter()
10972                .map(|c| c.code.clone())
10973                .collect();
10974            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
10975            info!(
10976                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
10977                subsequent.len(),
10978                subsequent
10979                    .iter()
10980                    .filter(|e| matches!(
10981                        e.classification,
10982                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
10983                    ))
10984                    .count(),
10985                subsequent
10986                    .iter()
10987                    .filter(|e| matches!(
10988                        e.classification,
10989                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
10990                    ))
10991                    .count(),
10992            );
10993            snapshot.subsequent_events = subsequent;
10994        }
10995
10996        // ----------------------------------------------------------------
10997        // ISA 402: Service organization controls
10998        // ----------------------------------------------------------------
10999        {
11000            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
11001            let entity_codes: Vec<String> = self
11002                .config
11003                .companies
11004                .iter()
11005                .map(|c| c.code.clone())
11006                .collect();
11007            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
11008            info!(
11009                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
11010                soc_snapshot.service_organizations.len(),
11011                soc_snapshot.soc_reports.len(),
11012                soc_snapshot.user_entity_controls.len(),
11013            );
11014            snapshot.service_organizations = soc_snapshot.service_organizations;
11015            snapshot.soc_reports = soc_snapshot.soc_reports;
11016            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
11017        }
11018
11019        // ----------------------------------------------------------------
11020        // ISA 570: Going concern assessments
11021        // ----------------------------------------------------------------
11022        {
11023            use datasynth_generators::audit::going_concern_generator::{
11024                GoingConcernGenerator, GoingConcernInput,
11025            };
11026            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
11027            let entity_codes: Vec<String> = self
11028                .config
11029                .companies
11030                .iter()
11031                .map(|c| c.code.clone())
11032                .collect();
11033            // Assessment date = period end + 75 days (typical sign-off window).
11034            let assessment_date = period_end + chrono::Duration::days(75);
11035            let period_label = format!("FY{}", period_end.year());
11036
11037            // Build financial inputs from actual journal entries.
11038            //
11039            // We derive approximate P&L, working capital, and operating cash flow
11040            // by aggregating GL account balances from the journal entry population.
11041            // Account ranges used (standard chart):
11042            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
11043            //   Expenses:        6xxx (debit-normal)
11044            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
11045            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
11046            //   Operating CF:    net income adjusted for D&A (rough proxy)
11047            let gc_inputs: Vec<GoingConcernInput> = self
11048                .config
11049                .companies
11050                .iter()
11051                .map(|company| {
11052                    let code = &company.code;
11053                    let mut revenue = rust_decimal::Decimal::ZERO;
11054                    let mut expenses = rust_decimal::Decimal::ZERO;
11055                    let mut current_assets = rust_decimal::Decimal::ZERO;
11056                    let mut current_liabs = rust_decimal::Decimal::ZERO;
11057                    let mut total_debt = rust_decimal::Decimal::ZERO;
11058
11059                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
11060                        for line in &je.lines {
11061                            let acct = line.gl_account.as_str();
11062                            let net = line.debit_amount - line.credit_amount;
11063                            if acct.starts_with('4') {
11064                                // Revenue accounts: credit-normal, so negative net = revenue earned
11065                                revenue -= net;
11066                            } else if acct.starts_with('6') {
11067                                // Expense accounts: debit-normal
11068                                expenses += net;
11069                            }
11070                            // Balance sheet accounts for working capital
11071                            if acct.starts_with('1') {
11072                                // Current asset accounts (1000–1499)
11073                                if let Ok(n) = acct.parse::<u32>() {
11074                                    if (1000..=1499).contains(&n) {
11075                                        current_assets += net;
11076                                    }
11077                                }
11078                            } else if acct.starts_with('2') {
11079                                if let Ok(n) = acct.parse::<u32>() {
11080                                    if (2000..=2499).contains(&n) {
11081                                        // Current liabilities
11082                                        current_liabs -= net; // credit-normal
11083                                    } else if (2500..=2999).contains(&n) {
11084                                        // Long-term debt
11085                                        total_debt -= net;
11086                                    }
11087                                }
11088                            }
11089                        }
11090                    }
11091
11092                    let net_income = revenue - expenses;
11093                    let working_capital = current_assets - current_liabs;
11094                    // Rough operating CF proxy: net income (full accrual CF calculation
11095                    // is done separately in the cash flow statement generator)
11096                    let operating_cash_flow = net_income;
11097
11098                    GoingConcernInput {
11099                        entity_code: code.clone(),
11100                        net_income,
11101                        working_capital,
11102                        operating_cash_flow,
11103                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
11104                        assessment_date,
11105                    }
11106                })
11107                .collect();
11108
11109            let assessments = if gc_inputs.is_empty() {
11110                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
11111            } else {
11112                gc_gen.generate_for_entities_with_inputs(
11113                    &entity_codes,
11114                    &gc_inputs,
11115                    assessment_date,
11116                    &period_label,
11117                )
11118            };
11119            info!(
11120                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
11121                assessments.len(),
11122                assessments.iter().filter(|a| matches!(
11123                    a.auditor_conclusion,
11124                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
11125                )).count(),
11126                assessments.iter().filter(|a| matches!(
11127                    a.auditor_conclusion,
11128                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
11129                )).count(),
11130                assessments.iter().filter(|a| matches!(
11131                    a.auditor_conclusion,
11132                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
11133                )).count(),
11134            );
11135            snapshot.going_concern_assessments = assessments;
11136        }
11137
11138        // ----------------------------------------------------------------
11139        // ISA 540: Accounting estimates
11140        // ----------------------------------------------------------------
11141        {
11142            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
11143            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
11144            let entity_codes: Vec<String> = self
11145                .config
11146                .companies
11147                .iter()
11148                .map(|c| c.code.clone())
11149                .collect();
11150            let estimates = est_gen.generate_for_entities(&entity_codes);
11151            info!(
11152                "ISA 540 accounting estimates: {} estimates across {} entities \
11153                 ({} with retrospective reviews, {} with auditor point estimates)",
11154                estimates.len(),
11155                entity_codes.len(),
11156                estimates
11157                    .iter()
11158                    .filter(|e| e.retrospective_review.is_some())
11159                    .count(),
11160                estimates
11161                    .iter()
11162                    .filter(|e| e.auditor_point_estimate.is_some())
11163                    .count(),
11164            );
11165            snapshot.accounting_estimates = estimates;
11166        }
11167
11168        // ----------------------------------------------------------------
11169        // ISA 700/701/705/706: Audit opinions (one per engagement)
11170        // ----------------------------------------------------------------
11171        {
11172            use datasynth_generators::audit::audit_opinion_generator::{
11173                AuditOpinionGenerator, AuditOpinionInput,
11174            };
11175
11176            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
11177
11178            // Build inputs — one per engagement, linking findings and going concern.
11179            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
11180                .engagements
11181                .iter()
11182                .map(|eng| {
11183                    // Collect findings for this engagement.
11184                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11185                        .findings
11186                        .iter()
11187                        .filter(|f| f.engagement_id == eng.engagement_id)
11188                        .cloned()
11189                        .collect();
11190
11191                    // Going concern for this entity.
11192                    let gc = snapshot
11193                        .going_concern_assessments
11194                        .iter()
11195                        .find(|g| g.entity_code == eng.client_entity_id)
11196                        .cloned();
11197
11198                    // Component reports relevant to this engagement.
11199                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
11200                        snapshot.component_reports.clone();
11201
11202                    let auditor = self
11203                        .master_data
11204                        .employees
11205                        .first()
11206                        .map(|e| e.display_name.clone())
11207                        .unwrap_or_else(|| "Global Audit LLP".into());
11208
11209                    let partner = self
11210                        .master_data
11211                        .employees
11212                        .get(1)
11213                        .map(|e| e.display_name.clone())
11214                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
11215
11216                    AuditOpinionInput {
11217                        entity_code: eng.client_entity_id.clone(),
11218                        entity_name: eng.client_name.clone(),
11219                        engagement_id: eng.engagement_id,
11220                        period_end: eng.period_end_date,
11221                        findings: eng_findings,
11222                        going_concern: gc,
11223                        component_reports: comp_reports,
11224                        // Mark as US-listed when audit standards include PCAOB.
11225                        is_us_listed: {
11226                            let fw = &self.config.audit_standards.isa_compliance.framework;
11227                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
11228                        },
11229                        auditor_name: auditor,
11230                        engagement_partner: partner,
11231                    }
11232                })
11233                .collect();
11234
11235            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
11236
11237            for go in &generated_opinions {
11238                snapshot
11239                    .key_audit_matters
11240                    .extend(go.key_audit_matters.clone());
11241            }
11242            snapshot.audit_opinions = generated_opinions
11243                .into_iter()
11244                .map(|go| go.opinion)
11245                .collect();
11246
11247            info!(
11248                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
11249                snapshot.audit_opinions.len(),
11250                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
11251                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
11252                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
11253                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
11254            );
11255        }
11256
11257        // ----------------------------------------------------------------
11258        // SOX 302 / 404 assessments
11259        // ----------------------------------------------------------------
11260        {
11261            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
11262
11263            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
11264
11265            for (i, company) in self.config.companies.iter().enumerate() {
11266                // Collect findings for this company's engagements.
11267                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
11268                    .engagements
11269                    .iter()
11270                    .filter(|e| e.client_entity_id == company.code)
11271                    .map(|e| e.engagement_id)
11272                    .collect();
11273
11274                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
11275                    .findings
11276                    .iter()
11277                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
11278                    .cloned()
11279                    .collect();
11280
11281                // Derive executive names from employee list.
11282                let emp_count = self.master_data.employees.len();
11283                let ceo_name = if emp_count > 0 {
11284                    self.master_data.employees[i % emp_count]
11285                        .display_name
11286                        .clone()
11287                } else {
11288                    format!("CEO of {}", company.name)
11289                };
11290                let cfo_name = if emp_count > 1 {
11291                    self.master_data.employees[(i + 1) % emp_count]
11292                        .display_name
11293                        .clone()
11294                } else {
11295                    format!("CFO of {}", company.name)
11296                };
11297
11298                // Use engagement materiality if available.
11299                let materiality = snapshot
11300                    .engagements
11301                    .iter()
11302                    .find(|e| e.client_entity_id == company.code)
11303                    .map(|e| e.materiality)
11304                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
11305
11306                let input = SoxGeneratorInput {
11307                    company_code: company.code.clone(),
11308                    company_name: company.name.clone(),
11309                    fiscal_year,
11310                    period_end,
11311                    findings: company_findings,
11312                    ceo_name,
11313                    cfo_name,
11314                    materiality_threshold: materiality,
11315                    revenue_percent: rust_decimal::Decimal::from(100),
11316                    assets_percent: rust_decimal::Decimal::from(100),
11317                    significant_accounts: vec![
11318                        "Revenue".into(),
11319                        "Accounts Receivable".into(),
11320                        "Inventory".into(),
11321                        "Fixed Assets".into(),
11322                        "Accounts Payable".into(),
11323                    ],
11324                };
11325
11326                let (certs, assessment) = sox_gen.generate(&input);
11327                snapshot.sox_302_certifications.extend(certs);
11328                snapshot.sox_404_assessments.push(assessment);
11329            }
11330
11331            info!(
11332                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
11333                snapshot.sox_302_certifications.len(),
11334                snapshot.sox_404_assessments.len(),
11335                snapshot
11336                    .sox_404_assessments
11337                    .iter()
11338                    .filter(|a| a.icfr_effective)
11339                    .count(),
11340                snapshot
11341                    .sox_404_assessments
11342                    .iter()
11343                    .filter(|a| !a.icfr_effective)
11344                    .count(),
11345            );
11346        }
11347
11348        // ----------------------------------------------------------------
11349        // ISA 320: Materiality calculations (one per entity)
11350        // ----------------------------------------------------------------
11351        {
11352            use datasynth_generators::audit::materiality_generator::{
11353                MaterialityGenerator, MaterialityInput,
11354            };
11355
11356            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
11357
11358            // Compute per-company financials from JEs.
11359            // Asset accounts start with '1', revenue with '4',
11360            // expense accounts with '5' or '6'.
11361            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
11362
11363            for company in &self.config.companies {
11364                let company_code = company.code.clone();
11365
11366                // Revenue: credit-side entries on 4xxx accounts
11367                let company_revenue: rust_decimal::Decimal = entries
11368                    .iter()
11369                    .filter(|e| e.company_code() == company_code)
11370                    .flat_map(|e| e.lines.iter())
11371                    .filter(|l| l.account_code.starts_with('4'))
11372                    .map(|l| l.credit_amount)
11373                    .sum();
11374
11375                // Total assets: debit balances on 1xxx accounts
11376                let total_assets: rust_decimal::Decimal = entries
11377                    .iter()
11378                    .filter(|e| e.company_code() == company_code)
11379                    .flat_map(|e| e.lines.iter())
11380                    .filter(|l| l.account_code.starts_with('1'))
11381                    .map(|l| l.debit_amount)
11382                    .sum();
11383
11384                // Expenses: debit-side entries on 5xxx/6xxx accounts
11385                let total_expenses: rust_decimal::Decimal = entries
11386                    .iter()
11387                    .filter(|e| e.company_code() == company_code)
11388                    .flat_map(|e| e.lines.iter())
11389                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11390                    .map(|l| l.debit_amount)
11391                    .sum();
11392
11393                // Equity: credit balances on 3xxx accounts
11394                let equity: rust_decimal::Decimal = entries
11395                    .iter()
11396                    .filter(|e| e.company_code() == company_code)
11397                    .flat_map(|e| e.lines.iter())
11398                    .filter(|l| l.account_code.starts_with('3'))
11399                    .map(|l| l.credit_amount)
11400                    .sum();
11401
11402                let pretax_income = company_revenue - total_expenses;
11403
11404                // If no company-specific data, fall back to proportional share
11405                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
11406                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
11407                        .unwrap_or(rust_decimal::Decimal::ONE);
11408                    (
11409                        total_revenue * w,
11410                        total_revenue * w * rust_decimal::Decimal::from(3),
11411                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
11412                        total_revenue * w * rust_decimal::Decimal::from(2),
11413                    )
11414                } else {
11415                    (company_revenue, total_assets, pretax_income, equity)
11416                };
11417
11418                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
11419
11420                materiality_inputs.push(MaterialityInput {
11421                    entity_code: company_code,
11422                    period: format!("FY{}", fiscal_year),
11423                    revenue: rev,
11424                    pretax_income: pti,
11425                    total_assets: assets,
11426                    equity: eq,
11427                    gross_profit,
11428                });
11429            }
11430
11431            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
11432
11433            info!(
11434                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
11435                 {} total assets, {} equity benchmarks)",
11436                snapshot.materiality_calculations.len(),
11437                snapshot
11438                    .materiality_calculations
11439                    .iter()
11440                    .filter(|m| matches!(
11441                        m.benchmark,
11442                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
11443                    ))
11444                    .count(),
11445                snapshot
11446                    .materiality_calculations
11447                    .iter()
11448                    .filter(|m| matches!(
11449                        m.benchmark,
11450                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
11451                    ))
11452                    .count(),
11453                snapshot
11454                    .materiality_calculations
11455                    .iter()
11456                    .filter(|m| matches!(
11457                        m.benchmark,
11458                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
11459                    ))
11460                    .count(),
11461                snapshot
11462                    .materiality_calculations
11463                    .iter()
11464                    .filter(|m| matches!(
11465                        m.benchmark,
11466                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
11467                    ))
11468                    .count(),
11469            );
11470        }
11471
11472        // ----------------------------------------------------------------
11473        // ISA 315: Combined Risk Assessments (per entity, per account area)
11474        // ----------------------------------------------------------------
11475        {
11476            use datasynth_generators::audit::cra_generator::CraGenerator;
11477
11478            let mut cra_gen = CraGenerator::new(self.seed + 8315);
11479
11480            // Build entity → scope_id map from already-generated scopes
11481            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
11482                .audit_scopes
11483                .iter()
11484                .map(|s| (s.entity_code.clone(), s.id.clone()))
11485                .collect();
11486
11487            for company in &self.config.companies {
11488                let cras = cra_gen.generate_for_entity(&company.code, None);
11489                let scope_id = entity_scope_map.get(&company.code).cloned();
11490                let cras_with_scope: Vec<_> = cras
11491                    .into_iter()
11492                    .map(|mut cra| {
11493                        cra.scope_id = scope_id.clone();
11494                        cra
11495                    })
11496                    .collect();
11497                snapshot.combined_risk_assessments.extend(cras_with_scope);
11498            }
11499
11500            let significant_count = snapshot
11501                .combined_risk_assessments
11502                .iter()
11503                .filter(|c| c.significant_risk)
11504                .count();
11505            let high_cra_count = snapshot
11506                .combined_risk_assessments
11507                .iter()
11508                .filter(|c| {
11509                    matches!(
11510                        c.combined_risk,
11511                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
11512                    )
11513                })
11514                .count();
11515
11516            info!(
11517                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
11518                snapshot.combined_risk_assessments.len(),
11519                significant_count,
11520                high_cra_count,
11521            );
11522        }
11523
11524        // ----------------------------------------------------------------
11525        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
11526        // ----------------------------------------------------------------
11527        {
11528            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
11529
11530            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
11531
11532            // Group CRAs by entity and use per-entity tolerable error from materiality
11533            for company in &self.config.companies {
11534                let entity_code = company.code.clone();
11535
11536                // Find tolerable error for this entity (= performance materiality)
11537                let tolerable_error = snapshot
11538                    .materiality_calculations
11539                    .iter()
11540                    .find(|m| m.entity_code == entity_code)
11541                    .map(|m| m.tolerable_error);
11542
11543                // Collect CRAs for this entity
11544                let entity_cras: Vec<_> = snapshot
11545                    .combined_risk_assessments
11546                    .iter()
11547                    .filter(|c| c.entity_code == entity_code)
11548                    .cloned()
11549                    .collect();
11550
11551                if !entity_cras.is_empty() {
11552                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
11553                    snapshot.sampling_plans.extend(plans);
11554                    snapshot.sampled_items.extend(items);
11555                }
11556            }
11557
11558            let misstatement_count = snapshot
11559                .sampled_items
11560                .iter()
11561                .filter(|i| i.misstatement_found)
11562                .count();
11563
11564            info!(
11565                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
11566                snapshot.sampling_plans.len(),
11567                snapshot.sampled_items.len(),
11568                misstatement_count,
11569            );
11570        }
11571
11572        // ----------------------------------------------------------------
11573        // ISA 315: Significant Classes of Transactions (SCOTS)
11574        // ----------------------------------------------------------------
11575        {
11576            use datasynth_generators::audit::scots_generator::{
11577                ScotsGenerator, ScotsGeneratorConfig,
11578            };
11579
11580            let ic_enabled = self.config.intercompany.enabled;
11581
11582            let config = ScotsGeneratorConfig {
11583                intercompany_enabled: ic_enabled,
11584                ..ScotsGeneratorConfig::default()
11585            };
11586            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
11587
11588            for company in &self.config.companies {
11589                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
11590                snapshot
11591                    .significant_transaction_classes
11592                    .extend(entity_scots);
11593            }
11594
11595            let estimation_count = snapshot
11596                .significant_transaction_classes
11597                .iter()
11598                .filter(|s| {
11599                    matches!(
11600                        s.transaction_type,
11601                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
11602                    )
11603                })
11604                .count();
11605
11606            info!(
11607                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
11608                snapshot.significant_transaction_classes.len(),
11609                estimation_count,
11610            );
11611        }
11612
11613        // ----------------------------------------------------------------
11614        // ISA 520: Unusual Item Markers
11615        // ----------------------------------------------------------------
11616        {
11617            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
11618
11619            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
11620            let entity_codes: Vec<String> = self
11621                .config
11622                .companies
11623                .iter()
11624                .map(|c| c.code.clone())
11625                .collect();
11626            let unusual_flags =
11627                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
11628            info!(
11629                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
11630                unusual_flags.len(),
11631                unusual_flags
11632                    .iter()
11633                    .filter(|f| matches!(
11634                        f.severity,
11635                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
11636                    ))
11637                    .count(),
11638                unusual_flags
11639                    .iter()
11640                    .filter(|f| matches!(
11641                        f.severity,
11642                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
11643                    ))
11644                    .count(),
11645                unusual_flags
11646                    .iter()
11647                    .filter(|f| matches!(
11648                        f.severity,
11649                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
11650                    ))
11651                    .count(),
11652            );
11653            snapshot.unusual_items = unusual_flags;
11654        }
11655
11656        // ----------------------------------------------------------------
11657        // ISA 520: Analytical Relationships
11658        // ----------------------------------------------------------------
11659        {
11660            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
11661
11662            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
11663            let entity_codes: Vec<String> = self
11664                .config
11665                .companies
11666                .iter()
11667                .map(|c| c.code.clone())
11668                .collect();
11669            let current_period_label = format!("FY{fiscal_year}");
11670            let prior_period_label = format!("FY{}", fiscal_year - 1);
11671            let analytical_rels = ar_gen.generate_for_entities(
11672                &entity_codes,
11673                entries,
11674                &current_period_label,
11675                &prior_period_label,
11676            );
11677            let out_of_range = analytical_rels
11678                .iter()
11679                .filter(|r| !r.within_expected_range)
11680                .count();
11681            info!(
11682                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
11683                analytical_rels.len(),
11684                out_of_range,
11685            );
11686            snapshot.analytical_relationships = analytical_rels;
11687        }
11688
11689        if let Some(pb) = pb {
11690            pb.finish_with_message(format!(
11691                "Audit data: {} engagements, {} workpapers, {} evidence, \
11692                 {} confirmations, {} procedure steps, {} samples, \
11693                 {} analytical, {} IA funcs, {} related parties, \
11694                 {} component auditors, {} letters, {} subsequent events, \
11695                 {} service orgs, {} going concern, {} accounting estimates, \
11696                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
11697                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
11698                 {} unusual items, {} analytical relationships",
11699                snapshot.engagements.len(),
11700                snapshot.workpapers.len(),
11701                snapshot.evidence.len(),
11702                snapshot.confirmations.len(),
11703                snapshot.procedure_steps.len(),
11704                snapshot.samples.len(),
11705                snapshot.analytical_results.len(),
11706                snapshot.ia_functions.len(),
11707                snapshot.related_parties.len(),
11708                snapshot.component_auditors.len(),
11709                snapshot.engagement_letters.len(),
11710                snapshot.subsequent_events.len(),
11711                snapshot.service_organizations.len(),
11712                snapshot.going_concern_assessments.len(),
11713                snapshot.accounting_estimates.len(),
11714                snapshot.audit_opinions.len(),
11715                snapshot.key_audit_matters.len(),
11716                snapshot.sox_302_certifications.len(),
11717                snapshot.sox_404_assessments.len(),
11718                snapshot.materiality_calculations.len(),
11719                snapshot.combined_risk_assessments.len(),
11720                snapshot.sampling_plans.len(),
11721                snapshot.significant_transaction_classes.len(),
11722                snapshot.unusual_items.len(),
11723                snapshot.analytical_relationships.len(),
11724            ));
11725        }
11726
11727        // ----------------------------------------------------------------
11728        // PCAOB-ISA cross-reference mappings
11729        // ----------------------------------------------------------------
11730        // Always include the standard PCAOB-ISA mappings when audit generation is
11731        // enabled. These are static reference data (no randomness required) so we
11732        // call standard_mappings() directly.
11733        {
11734            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
11735            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
11736            debug!(
11737                "PCAOB-ISA mappings generated: {} mappings",
11738                snapshot.isa_pcaob_mappings.len()
11739            );
11740        }
11741
11742        // ----------------------------------------------------------------
11743        // ISA standard reference entries
11744        // ----------------------------------------------------------------
11745        // Emit flat ISA standard reference data (number, title, series) so
11746        // consumers get a machine-readable listing of all 34 ISA standards in
11747        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
11748        {
11749            use datasynth_standards::audit::isa_reference::IsaStandard;
11750            snapshot.isa_mappings = IsaStandard::standard_entries();
11751            debug!(
11752                "ISA standard entries generated: {} standards",
11753                snapshot.isa_mappings.len()
11754            );
11755        }
11756
11757        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
11758        // For each RPT, find the chronologically closest JE for the engagement's entity.
11759        {
11760            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
11761                .engagements
11762                .iter()
11763                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
11764                .collect();
11765
11766            for rpt in &mut snapshot.related_party_transactions {
11767                if rpt.journal_entry_id.is_some() {
11768                    continue; // already set
11769                }
11770                let entity = engagement_by_id
11771                    .get(&rpt.engagement_id.to_string())
11772                    .copied()
11773                    .unwrap_or("");
11774
11775                // Find closest JE by date in the entity's company
11776                let best_je = entries
11777                    .iter()
11778                    .filter(|je| je.header.company_code == entity)
11779                    .min_by_key(|je| {
11780                        (je.header.posting_date - rpt.transaction_date)
11781                            .num_days()
11782                            .abs()
11783                    });
11784
11785                if let Some(je) = best_je {
11786                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
11787                }
11788            }
11789
11790            let linked = snapshot
11791                .related_party_transactions
11792                .iter()
11793                .filter(|t| t.journal_entry_id.is_some())
11794                .count();
11795            debug!(
11796                "Linked {}/{} related party transactions to journal entries",
11797                linked,
11798                snapshot.related_party_transactions.len()
11799            );
11800        }
11801
11802        Ok(snapshot)
11803    }
11804
11805    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
11806    ///
11807    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
11808    /// from the current orchestrator state, runs the FSM engine, and maps the
11809    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
11810    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
11811    fn generate_audit_data_with_fsm(
11812        &mut self,
11813        entries: &[JournalEntry],
11814    ) -> SynthResult<AuditSnapshot> {
11815        use datasynth_audit_fsm::{
11816            context::EngagementContext,
11817            engine::AuditFsmEngine,
11818            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
11819        };
11820        use rand::SeedableRng;
11821        use rand_chacha::ChaCha8Rng;
11822
11823        info!("Audit FSM: generating audit data via FSM engine");
11824
11825        let fsm_config = self
11826            .config
11827            .audit
11828            .fsm
11829            .as_ref()
11830            .expect("FSM config must be present when FSM is enabled");
11831
11832        // 1. Load blueprint from config string.
11833        let bwp = match fsm_config.blueprint.as_str() {
11834            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
11835            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
11836            _ => {
11837                warn!(
11838                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
11839                    fsm_config.blueprint
11840                );
11841                BlueprintWithPreconditions::load_builtin_fsa()
11842            }
11843        }
11844        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
11845
11846        // 2. Load overlay from config string.
11847        let overlay = match fsm_config.overlay.as_str() {
11848            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
11849            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
11850            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
11851            _ => {
11852                warn!(
11853                    "Unknown FSM overlay '{}', falling back to builtin:default",
11854                    fsm_config.overlay
11855                );
11856                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
11857            }
11858        }
11859        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
11860
11861        // 3. Build EngagementContext from orchestrator state.
11862        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11863            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11864        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
11865
11866        // Determine the engagement entity early so we can filter JEs.
11867        let company = self.config.companies.first();
11868        let company_code = company
11869            .map(|c| c.code.clone())
11870            .unwrap_or_else(|| "UNKNOWN".to_string());
11871        let company_name = company
11872            .map(|c| c.name.clone())
11873            .unwrap_or_else(|| "Unknown Company".to_string());
11874        let currency = company
11875            .map(|c| c.currency.clone())
11876            .unwrap_or_else(|| "USD".to_string());
11877
11878        // Filter JEs to the engagement entity for single-company coherence.
11879        let entity_entries: Vec<_> = entries
11880            .iter()
11881            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
11882            .cloned()
11883            .collect();
11884        let entries = &entity_entries; // Shadow the parameter for remaining usage
11885
11886        // Financial aggregates from journal entries.
11887        let total_revenue: rust_decimal::Decimal = entries
11888            .iter()
11889            .flat_map(|e| e.lines.iter())
11890            .filter(|l| l.account_code.starts_with('4'))
11891            .map(|l| l.credit_amount - l.debit_amount)
11892            .sum();
11893
11894        let total_assets: rust_decimal::Decimal = entries
11895            .iter()
11896            .flat_map(|e| e.lines.iter())
11897            .filter(|l| l.account_code.starts_with('1'))
11898            .map(|l| l.debit_amount - l.credit_amount)
11899            .sum();
11900
11901        let total_expenses: rust_decimal::Decimal = entries
11902            .iter()
11903            .flat_map(|e| e.lines.iter())
11904            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
11905            .map(|l| l.debit_amount)
11906            .sum();
11907
11908        let equity: rust_decimal::Decimal = entries
11909            .iter()
11910            .flat_map(|e| e.lines.iter())
11911            .filter(|l| l.account_code.starts_with('3'))
11912            .map(|l| l.credit_amount - l.debit_amount)
11913            .sum();
11914
11915        let total_debt: rust_decimal::Decimal = entries
11916            .iter()
11917            .flat_map(|e| e.lines.iter())
11918            .filter(|l| l.account_code.starts_with('2'))
11919            .map(|l| l.credit_amount - l.debit_amount)
11920            .sum();
11921
11922        let pretax_income = total_revenue - total_expenses;
11923
11924        let cogs: rust_decimal::Decimal = entries
11925            .iter()
11926            .flat_map(|e| e.lines.iter())
11927            .filter(|l| l.account_code.starts_with('5'))
11928            .map(|l| l.debit_amount)
11929            .sum();
11930        let gross_profit = total_revenue - cogs;
11931
11932        let current_assets: rust_decimal::Decimal = entries
11933            .iter()
11934            .flat_map(|e| e.lines.iter())
11935            .filter(|l| {
11936                l.account_code.starts_with("10")
11937                    || l.account_code.starts_with("11")
11938                    || l.account_code.starts_with("12")
11939                    || l.account_code.starts_with("13")
11940            })
11941            .map(|l| l.debit_amount - l.credit_amount)
11942            .sum();
11943        let current_liabilities: rust_decimal::Decimal = entries
11944            .iter()
11945            .flat_map(|e| e.lines.iter())
11946            .filter(|l| {
11947                l.account_code.starts_with("20")
11948                    || l.account_code.starts_with("21")
11949                    || l.account_code.starts_with("22")
11950            })
11951            .map(|l| l.credit_amount - l.debit_amount)
11952            .sum();
11953        let working_capital = current_assets - current_liabilities;
11954
11955        let depreciation: rust_decimal::Decimal = entries
11956            .iter()
11957            .flat_map(|e| e.lines.iter())
11958            .filter(|l| l.account_code.starts_with("60"))
11959            .map(|l| l.debit_amount)
11960            .sum();
11961        let operating_cash_flow = pretax_income + depreciation;
11962
11963        // GL accounts for reference data.
11964        let accounts: Vec<String> = self
11965            .coa
11966            .as_ref()
11967            .map(|coa| {
11968                coa.get_postable_accounts()
11969                    .iter()
11970                    .map(|acc| acc.account_code().to_string())
11971                    .collect()
11972            })
11973            .unwrap_or_default();
11974
11975        // Team member IDs and display names from master data.
11976        let team_member_ids: Vec<String> = self
11977            .master_data
11978            .employees
11979            .iter()
11980            .take(8) // Cap team size
11981            .map(|e| e.employee_id.clone())
11982            .collect();
11983        let team_member_pairs: Vec<(String, String)> = self
11984            .master_data
11985            .employees
11986            .iter()
11987            .take(8)
11988            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
11989            .collect();
11990
11991        let vendor_names: Vec<String> = self
11992            .master_data
11993            .vendors
11994            .iter()
11995            .map(|v| v.name.clone())
11996            .collect();
11997        let customer_names: Vec<String> = self
11998            .master_data
11999            .customers
12000            .iter()
12001            .map(|c| c.name.clone())
12002            .collect();
12003
12004        let entity_codes: Vec<String> = self
12005            .config
12006            .companies
12007            .iter()
12008            .map(|c| c.code.clone())
12009            .collect();
12010
12011        // Journal entry IDs for evidence tracing (sample up to 50).
12012        let journal_entry_ids: Vec<String> = entries
12013            .iter()
12014            .take(50)
12015            .map(|e| e.header.document_id.to_string())
12016            .collect();
12017
12018        // Account balances for risk weighting (aggregate debit - credit per account).
12019        let mut account_balances = std::collections::HashMap::<String, f64>::new();
12020        for entry in entries {
12021            for line in &entry.lines {
12022                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
12023                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
12024                *account_balances
12025                    .entry(line.account_code.clone())
12026                    .or_insert(0.0) += debit_f64 - credit_f64;
12027            }
12028        }
12029
12030        // Internal control IDs and anomaly refs are populated by the
12031        // caller when available; here we default to empty because the
12032        // orchestrator state may not have generated controls/anomalies
12033        // yet at this point in the pipeline.
12034        let control_ids: Vec<String> = Vec::new();
12035        let anomaly_refs: Vec<String> = Vec::new();
12036
12037        let mut context = EngagementContext {
12038            company_code,
12039            company_name,
12040            fiscal_year: start_date.year(),
12041            currency,
12042            total_revenue,
12043            total_assets,
12044            engagement_start: start_date,
12045            report_date: period_end,
12046            pretax_income,
12047            equity,
12048            gross_profit,
12049            working_capital,
12050            operating_cash_flow,
12051            total_debt,
12052            team_member_ids,
12053            team_member_pairs,
12054            accounts,
12055            vendor_names,
12056            customer_names,
12057            journal_entry_ids,
12058            account_balances,
12059            control_ids,
12060            anomaly_refs,
12061            journal_entries: entries.to_vec(),
12062            is_us_listed: false,
12063            entity_codes,
12064            auditor_firm_name: "DataSynth Audit LLP".into(),
12065            accounting_framework: self
12066                .config
12067                .accounting_standards
12068                .framework
12069                .map(|f| match f {
12070                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
12071                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
12072                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
12073                        "French GAAP"
12074                    }
12075                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
12076                        "German GAAP"
12077                    }
12078                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
12079                        "Dual Reporting"
12080                    }
12081                })
12082                .unwrap_or("IFRS")
12083                .into(),
12084        };
12085
12086        // 4. Create and run the FSM engine.
12087        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
12088        let rng = ChaCha8Rng::seed_from_u64(seed);
12089        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
12090
12091        let mut result = engine
12092            .run_engagement(&context)
12093            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
12094
12095        info!(
12096            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
12097             {} phases completed, duration {:.1}h",
12098            result.event_log.len(),
12099            result.artifacts.total_artifacts(),
12100            result.anomalies.len(),
12101            result.phases_completed.len(),
12102            result.total_duration_hours,
12103        );
12104
12105        // 4b. Populate financial data in the artifact bag for downstream consumers.
12106        let tb_entity = context.company_code.clone();
12107        let tb_fy = context.fiscal_year;
12108        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
12109        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
12110            entries,
12111            &tb_entity,
12112            tb_fy,
12113            self.coa.as_ref().map(|c| c.as_ref()),
12114        );
12115
12116        // 5. Map ArtifactBag fields to AuditSnapshot.
12117        let bag = result.artifacts;
12118        let mut snapshot = AuditSnapshot {
12119            engagements: bag.engagements,
12120            engagement_letters: bag.engagement_letters,
12121            materiality_calculations: bag.materiality_calculations,
12122            risk_assessments: bag.risk_assessments,
12123            combined_risk_assessments: bag.combined_risk_assessments,
12124            workpapers: bag.workpapers,
12125            evidence: bag.evidence,
12126            findings: bag.findings,
12127            judgments: bag.judgments,
12128            sampling_plans: bag.sampling_plans,
12129            sampled_items: bag.sampled_items,
12130            analytical_results: bag.analytical_results,
12131            going_concern_assessments: bag.going_concern_assessments,
12132            subsequent_events: bag.subsequent_events,
12133            audit_opinions: bag.audit_opinions,
12134            key_audit_matters: bag.key_audit_matters,
12135            procedure_steps: bag.procedure_steps,
12136            samples: bag.samples,
12137            confirmations: bag.confirmations,
12138            confirmation_responses: bag.confirmation_responses,
12139            // Store the event trail for downstream export.
12140            fsm_event_trail: Some(result.event_log),
12141            // Fields not produced by the FSM engine remain at their defaults.
12142            ..Default::default()
12143        };
12144
12145        // 6. Add static reference data (same as legacy path).
12146        {
12147            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
12148            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
12149        }
12150        {
12151            use datasynth_standards::audit::isa_reference::IsaStandard;
12152            snapshot.isa_mappings = IsaStandard::standard_entries();
12153        }
12154
12155        info!(
12156            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
12157             {} risk assessments, {} findings, {} materiality calcs",
12158            snapshot.engagements.len(),
12159            snapshot.workpapers.len(),
12160            snapshot.evidence.len(),
12161            snapshot.risk_assessments.len(),
12162            snapshot.findings.len(),
12163            snapshot.materiality_calculations.len(),
12164        );
12165
12166        Ok(snapshot)
12167    }
12168
12169    /// Export journal entries as graph data for ML training and network reconstruction.
12170    ///
12171    /// Builds a transaction graph where:
12172    /// - Nodes are GL accounts
12173    /// - Edges are money flows from credit to debit accounts
12174    /// - Edge attributes include amount, date, business process, anomaly flags
12175    fn export_graphs(
12176        &mut self,
12177        entries: &[JournalEntry],
12178        _coa: &Arc<ChartOfAccounts>,
12179        stats: &mut EnhancedGenerationStatistics,
12180    ) -> SynthResult<GraphExportSnapshot> {
12181        let pb = self.create_progress_bar(100, "Exporting Graphs");
12182
12183        let mut snapshot = GraphExportSnapshot::default();
12184
12185        // Get output directory
12186        let output_dir = self
12187            .output_path
12188            .clone()
12189            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12190        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12191
12192        // Process each graph type configuration
12193        for graph_type in &self.config.graph_export.graph_types {
12194            if let Some(pb) = &pb {
12195                pb.inc(10);
12196            }
12197
12198            // Build transaction graph
12199            let graph_config = TransactionGraphConfig {
12200                include_vendors: false,
12201                include_customers: false,
12202                create_debit_credit_edges: true,
12203                include_document_nodes: graph_type.include_document_nodes,
12204                min_edge_weight: graph_type.min_edge_weight,
12205                aggregate_parallel_edges: graph_type.aggregate_edges,
12206                framework: None,
12207            };
12208
12209            let mut builder = TransactionGraphBuilder::new(graph_config);
12210            builder.add_journal_entries(entries);
12211            let graph = builder.build();
12212
12213            // Update stats
12214            stats.graph_node_count += graph.node_count();
12215            stats.graph_edge_count += graph.edge_count();
12216
12217            if let Some(pb) = &pb {
12218                pb.inc(40);
12219            }
12220
12221            // Export to each configured format
12222            for format in &self.config.graph_export.formats {
12223                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
12224
12225                // Create output directory
12226                if let Err(e) = std::fs::create_dir_all(&format_dir) {
12227                    warn!("Failed to create graph output directory: {}", e);
12228                    continue;
12229                }
12230
12231                match format {
12232                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
12233                        let pyg_config = PyGExportConfig {
12234                            common: datasynth_graph::CommonExportConfig {
12235                                export_node_features: true,
12236                                export_edge_features: true,
12237                                export_node_labels: true,
12238                                export_edge_labels: true,
12239                                export_masks: true,
12240                                train_ratio: self.config.graph_export.train_ratio,
12241                                val_ratio: self.config.graph_export.validation_ratio,
12242                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12243                            },
12244                            one_hot_categoricals: false,
12245                        };
12246
12247                        let exporter = PyGExporter::new(pyg_config);
12248                        match exporter.export(&graph, &format_dir) {
12249                            Ok(metadata) => {
12250                                snapshot.exports.insert(
12251                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
12252                                    GraphExportInfo {
12253                                        name: graph_type.name.clone(),
12254                                        format: "pytorch_geometric".to_string(),
12255                                        output_path: format_dir.clone(),
12256                                        node_count: metadata.num_nodes,
12257                                        edge_count: metadata.num_edges,
12258                                    },
12259                                );
12260                                snapshot.graph_count += 1;
12261                            }
12262                            Err(e) => {
12263                                warn!("Failed to export PyTorch Geometric graph: {}", e);
12264                            }
12265                        }
12266                    }
12267                    datasynth_config::schema::GraphExportFormat::Neo4j => {
12268                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
12269
12270                        let neo4j_config = Neo4jExportConfig {
12271                            export_node_properties: true,
12272                            export_edge_properties: true,
12273                            export_features: true,
12274                            generate_cypher: true,
12275                            generate_admin_import: true,
12276                            database_name: "synth".to_string(),
12277                            cypher_batch_size: 1000,
12278                        };
12279
12280                        let exporter = Neo4jExporter::new(neo4j_config);
12281                        match exporter.export(&graph, &format_dir) {
12282                            Ok(metadata) => {
12283                                snapshot.exports.insert(
12284                                    format!("{}_{}", graph_type.name, "neo4j"),
12285                                    GraphExportInfo {
12286                                        name: graph_type.name.clone(),
12287                                        format: "neo4j".to_string(),
12288                                        output_path: format_dir.clone(),
12289                                        node_count: metadata.num_nodes,
12290                                        edge_count: metadata.num_edges,
12291                                    },
12292                                );
12293                                snapshot.graph_count += 1;
12294                            }
12295                            Err(e) => {
12296                                warn!("Failed to export Neo4j graph: {}", e);
12297                            }
12298                        }
12299                    }
12300                    datasynth_config::schema::GraphExportFormat::Dgl => {
12301                        use datasynth_graph::{DGLExportConfig, DGLExporter};
12302
12303                        let dgl_config = DGLExportConfig {
12304                            common: datasynth_graph::CommonExportConfig {
12305                                export_node_features: true,
12306                                export_edge_features: true,
12307                                export_node_labels: true,
12308                                export_edge_labels: true,
12309                                export_masks: true,
12310                                train_ratio: self.config.graph_export.train_ratio,
12311                                val_ratio: self.config.graph_export.validation_ratio,
12312                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
12313                            },
12314                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
12315                            include_pickle_script: true, // DGL ecosystem standard helper
12316                        };
12317
12318                        let exporter = DGLExporter::new(dgl_config);
12319                        match exporter.export(&graph, &format_dir) {
12320                            Ok(metadata) => {
12321                                snapshot.exports.insert(
12322                                    format!("{}_{}", graph_type.name, "dgl"),
12323                                    GraphExportInfo {
12324                                        name: graph_type.name.clone(),
12325                                        format: "dgl".to_string(),
12326                                        output_path: format_dir.clone(),
12327                                        node_count: metadata.common.num_nodes,
12328                                        edge_count: metadata.common.num_edges,
12329                                    },
12330                                );
12331                                snapshot.graph_count += 1;
12332                            }
12333                            Err(e) => {
12334                                warn!("Failed to export DGL graph: {}", e);
12335                            }
12336                        }
12337                    }
12338                    datasynth_config::schema::GraphExportFormat::RustGraph => {
12339                        use datasynth_graph::{
12340                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
12341                        };
12342
12343                        let rustgraph_config = RustGraphExportConfig {
12344                            include_features: true,
12345                            include_temporal: true,
12346                            include_labels: true,
12347                            source_name: "datasynth".to_string(),
12348                            batch_id: None,
12349                            output_format: RustGraphOutputFormat::JsonLines,
12350                            export_node_properties: true,
12351                            export_edge_properties: true,
12352                            pretty_print: false,
12353                        };
12354
12355                        let exporter = RustGraphExporter::new(rustgraph_config);
12356                        match exporter.export(&graph, &format_dir) {
12357                            Ok(metadata) => {
12358                                snapshot.exports.insert(
12359                                    format!("{}_{}", graph_type.name, "rustgraph"),
12360                                    GraphExportInfo {
12361                                        name: graph_type.name.clone(),
12362                                        format: "rustgraph".to_string(),
12363                                        output_path: format_dir.clone(),
12364                                        node_count: metadata.num_nodes,
12365                                        edge_count: metadata.num_edges,
12366                                    },
12367                                );
12368                                snapshot.graph_count += 1;
12369                            }
12370                            Err(e) => {
12371                                warn!("Failed to export RustGraph: {}", e);
12372                            }
12373                        }
12374                    }
12375                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
12376                        // Hypergraph export is handled separately in Phase 10b
12377                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
12378                    }
12379                }
12380            }
12381
12382            if let Some(pb) = &pb {
12383                pb.inc(40);
12384            }
12385        }
12386
12387        stats.graph_export_count = snapshot.graph_count;
12388        snapshot.exported = snapshot.graph_count > 0;
12389
12390        if let Some(pb) = pb {
12391            pb.finish_with_message(format!(
12392                "Graphs exported: {} graphs ({} nodes, {} edges)",
12393                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
12394            ));
12395        }
12396
12397        Ok(snapshot)
12398    }
12399
12400    /// Build additional graph types (banking, approval, entity) when relevant data
12401    /// is available. These run as a late phase because the data they need (banking
12402    /// snapshot, intercompany snapshot) is only generated after the main graph
12403    /// export phase.
12404    fn build_additional_graphs(
12405        &self,
12406        banking: &BankingSnapshot,
12407        intercompany: &IntercompanySnapshot,
12408        entries: &[JournalEntry],
12409        stats: &mut EnhancedGenerationStatistics,
12410    ) {
12411        let output_dir = self
12412            .output_path
12413            .clone()
12414            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12415        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
12416
12417        // Banking graph: build when banking customers and transactions exist
12418        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
12419            info!("Phase 10c: Building banking network graph");
12420            let config = BankingGraphConfig::default();
12421            let mut builder = BankingGraphBuilder::new(config);
12422            builder.add_customers(&banking.customers);
12423            builder.add_accounts(&banking.accounts, &banking.customers);
12424            builder.add_transactions(&banking.transactions);
12425            let graph = builder.build();
12426
12427            let node_count = graph.node_count();
12428            let edge_count = graph.edge_count();
12429            stats.graph_node_count += node_count;
12430            stats.graph_edge_count += edge_count;
12431
12432            // Export as PyG if configured
12433            for format in &self.config.graph_export.formats {
12434                if matches!(
12435                    format,
12436                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12437                ) {
12438                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
12439                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12440                        warn!("Failed to create banking graph output dir: {}", e);
12441                        continue;
12442                    }
12443                    let pyg_config = PyGExportConfig::default();
12444                    let exporter = PyGExporter::new(pyg_config);
12445                    if let Err(e) = exporter.export(&graph, &format_dir) {
12446                        warn!("Failed to export banking graph as PyG: {}", e);
12447                    } else {
12448                        info!(
12449                            "Banking network graph exported: {} nodes, {} edges",
12450                            node_count, edge_count
12451                        );
12452                    }
12453                }
12454            }
12455        }
12456
12457        // Approval graph: build from journal entry approval workflows
12458        let approval_entries: Vec<_> = entries
12459            .iter()
12460            .filter(|je| je.header.approval_workflow.is_some())
12461            .collect();
12462
12463        if !approval_entries.is_empty() {
12464            info!(
12465                "Phase 10c: Building approval network graph ({} entries with approvals)",
12466                approval_entries.len()
12467            );
12468            let config = ApprovalGraphConfig::default();
12469            let mut builder = ApprovalGraphBuilder::new(config);
12470
12471            for je in &approval_entries {
12472                if let Some(ref wf) = je.header.approval_workflow {
12473                    for action in &wf.actions {
12474                        let record = datasynth_core::models::ApprovalRecord {
12475                            approval_id: format!(
12476                                "APR-{}-{}",
12477                                je.header.document_id, action.approval_level
12478                            ),
12479                            document_number: je.header.document_id.to_string(),
12480                            document_type: "JE".to_string(),
12481                            company_code: je.company_code().to_string(),
12482                            requester_id: wf.preparer_id.clone(),
12483                            requester_name: Some(wf.preparer_name.clone()),
12484                            approver_id: action.actor_id.clone(),
12485                            approver_name: action.actor_name.clone(),
12486                            approval_date: je.posting_date(),
12487                            action: format!("{:?}", action.action),
12488                            amount: wf.amount,
12489                            approval_limit: None,
12490                            comments: action.comments.clone(),
12491                            delegation_from: None,
12492                            is_auto_approved: false,
12493                        };
12494                        builder.add_approval(&record);
12495                    }
12496                }
12497            }
12498
12499            let graph = builder.build();
12500            let node_count = graph.node_count();
12501            let edge_count = graph.edge_count();
12502            stats.graph_node_count += node_count;
12503            stats.graph_edge_count += edge_count;
12504
12505            // Export as PyG if configured
12506            for format in &self.config.graph_export.formats {
12507                if matches!(
12508                    format,
12509                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12510                ) {
12511                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
12512                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12513                        warn!("Failed to create approval graph output dir: {}", e);
12514                        continue;
12515                    }
12516                    let pyg_config = PyGExportConfig::default();
12517                    let exporter = PyGExporter::new(pyg_config);
12518                    if let Err(e) = exporter.export(&graph, &format_dir) {
12519                        warn!("Failed to export approval graph as PyG: {}", e);
12520                    } else {
12521                        info!(
12522                            "Approval network graph exported: {} nodes, {} edges",
12523                            node_count, edge_count
12524                        );
12525                    }
12526                }
12527            }
12528        }
12529
12530        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
12531        if self.config.companies.len() >= 2 {
12532            info!(
12533                "Phase 10c: Building entity relationship graph ({} companies)",
12534                self.config.companies.len()
12535            );
12536
12537            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12538                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
12539
12540            // Map CompanyConfig → Company objects
12541            let parent_code = &self.config.companies[0].code;
12542            let mut companies: Vec<datasynth_core::models::Company> =
12543                Vec::with_capacity(self.config.companies.len());
12544
12545            // First company is the parent
12546            let first = &self.config.companies[0];
12547            companies.push(datasynth_core::models::Company::parent(
12548                &first.code,
12549                &first.name,
12550                &first.country,
12551                &first.currency,
12552            ));
12553
12554            // Remaining companies are subsidiaries (100% owned by parent)
12555            for cc in self.config.companies.iter().skip(1) {
12556                companies.push(datasynth_core::models::Company::subsidiary(
12557                    &cc.code,
12558                    &cc.name,
12559                    &cc.country,
12560                    &cc.currency,
12561                    parent_code,
12562                    rust_decimal::Decimal::from(100),
12563                ));
12564            }
12565
12566            // Build IntercompanyRelationship records (same logic as phase_intercompany)
12567            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
12568                self.config
12569                    .companies
12570                    .iter()
12571                    .skip(1)
12572                    .enumerate()
12573                    .map(|(i, cc)| {
12574                        let mut rel =
12575                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
12576                                format!("REL{:03}", i + 1),
12577                                parent_code.clone(),
12578                                cc.code.clone(),
12579                                rust_decimal::Decimal::from(100),
12580                                start_date,
12581                            );
12582                        rel.functional_currency = cc.currency.clone();
12583                        rel
12584                    })
12585                    .collect();
12586
12587            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
12588            builder.add_companies(&companies);
12589            builder.add_ownership_relationships(&relationships);
12590
12591            // Thread IC matched-pair transaction edges into the entity graph
12592            for pair in &intercompany.matched_pairs {
12593                builder.add_intercompany_edge(
12594                    &pair.seller_company,
12595                    &pair.buyer_company,
12596                    pair.amount,
12597                    &format!("{:?}", pair.transaction_type),
12598                );
12599            }
12600
12601            let graph = builder.build();
12602            let node_count = graph.node_count();
12603            let edge_count = graph.edge_count();
12604            stats.graph_node_count += node_count;
12605            stats.graph_edge_count += edge_count;
12606
12607            // Export as PyG if configured
12608            for format in &self.config.graph_export.formats {
12609                if matches!(
12610                    format,
12611                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
12612                ) {
12613                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
12614                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
12615                        warn!("Failed to create entity graph output dir: {}", e);
12616                        continue;
12617                    }
12618                    let pyg_config = PyGExportConfig::default();
12619                    let exporter = PyGExporter::new(pyg_config);
12620                    if let Err(e) = exporter.export(&graph, &format_dir) {
12621                        warn!("Failed to export entity graph as PyG: {}", e);
12622                    } else {
12623                        info!(
12624                            "Entity relationship graph exported: {} nodes, {} edges",
12625                            node_count, edge_count
12626                        );
12627                    }
12628                }
12629            }
12630        } else {
12631            debug!(
12632                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
12633                self.config.companies.len()
12634            );
12635        }
12636    }
12637
12638    /// Export a multi-layer hypergraph for RustGraph integration.
12639    ///
12640    /// Builds a 3-layer hypergraph:
12641    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
12642    /// - Layer 2: Process Events (all process family document flows + OCPM events)
12643    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
12644    #[allow(clippy::too_many_arguments)]
12645    fn export_hypergraph(
12646        &self,
12647        coa: &Arc<ChartOfAccounts>,
12648        entries: &[JournalEntry],
12649        document_flows: &DocumentFlowSnapshot,
12650        sourcing: &SourcingSnapshot,
12651        hr: &HrSnapshot,
12652        manufacturing: &ManufacturingSnapshot,
12653        banking: &BankingSnapshot,
12654        audit: &AuditSnapshot,
12655        financial_reporting: &FinancialReportingSnapshot,
12656        ocpm: &OcpmSnapshot,
12657        compliance: &ComplianceRegulationsSnapshot,
12658        stats: &mut EnhancedGenerationStatistics,
12659    ) -> SynthResult<HypergraphExportInfo> {
12660        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
12661        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
12662        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
12663        use datasynth_graph::models::hypergraph::AggregationStrategy;
12664
12665        let hg_settings = &self.config.graph_export.hypergraph;
12666
12667        // Parse aggregation strategy from config string
12668        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
12669            "truncate" => AggregationStrategy::Truncate,
12670            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
12671            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
12672            "importance_sample" => AggregationStrategy::ImportanceSample,
12673            _ => AggregationStrategy::PoolByCounterparty,
12674        };
12675
12676        let builder_config = HypergraphConfig {
12677            max_nodes: hg_settings.max_nodes,
12678            aggregation_strategy,
12679            include_coso: hg_settings.governance_layer.include_coso,
12680            include_controls: hg_settings.governance_layer.include_controls,
12681            include_sox: hg_settings.governance_layer.include_sox,
12682            include_vendors: hg_settings.governance_layer.include_vendors,
12683            include_customers: hg_settings.governance_layer.include_customers,
12684            include_employees: hg_settings.governance_layer.include_employees,
12685            include_p2p: hg_settings.process_layer.include_p2p,
12686            include_o2c: hg_settings.process_layer.include_o2c,
12687            include_s2c: hg_settings.process_layer.include_s2c,
12688            include_h2r: hg_settings.process_layer.include_h2r,
12689            include_mfg: hg_settings.process_layer.include_mfg,
12690            include_bank: hg_settings.process_layer.include_bank,
12691            include_audit: hg_settings.process_layer.include_audit,
12692            include_r2r: hg_settings.process_layer.include_r2r,
12693            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
12694            docs_per_counterparty_threshold: hg_settings
12695                .process_layer
12696                .docs_per_counterparty_threshold,
12697            include_accounts: hg_settings.accounting_layer.include_accounts,
12698            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
12699            include_cross_layer_edges: hg_settings.cross_layer.enabled,
12700            include_compliance: self.config.compliance_regulations.enabled,
12701            include_tax: true,
12702            include_treasury: true,
12703            include_esg: true,
12704            include_project: true,
12705            include_intercompany: true,
12706            include_temporal_events: true,
12707        };
12708
12709        let mut builder = HypergraphBuilder::new(builder_config);
12710
12711        // Layer 1: Governance & Controls
12712        builder.add_coso_framework();
12713
12714        // Add controls if available (generated during JE generation)
12715        // Controls are generated per-company; we use the standard set
12716        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
12717            let controls = InternalControl::standard_controls();
12718            builder.add_controls(&controls);
12719        }
12720
12721        // Add master data
12722        builder.add_vendors(&self.master_data.vendors);
12723        builder.add_customers(&self.master_data.customers);
12724        builder.add_employees(&self.master_data.employees);
12725
12726        // Layer 2: Process Events (all process families)
12727        builder.add_p2p_documents(
12728            &document_flows.purchase_orders,
12729            &document_flows.goods_receipts,
12730            &document_flows.vendor_invoices,
12731            &document_flows.payments,
12732        );
12733        builder.add_o2c_documents(
12734            &document_flows.sales_orders,
12735            &document_flows.deliveries,
12736            &document_flows.customer_invoices,
12737        );
12738        builder.add_s2c_documents(
12739            &sourcing.sourcing_projects,
12740            &sourcing.qualifications,
12741            &sourcing.rfx_events,
12742            &sourcing.bids,
12743            &sourcing.bid_evaluations,
12744            &sourcing.contracts,
12745        );
12746        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
12747        builder.add_mfg_documents(
12748            &manufacturing.production_orders,
12749            &manufacturing.quality_inspections,
12750            &manufacturing.cycle_counts,
12751        );
12752        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
12753        builder.add_audit_documents(
12754            &audit.engagements,
12755            &audit.workpapers,
12756            &audit.findings,
12757            &audit.evidence,
12758            &audit.risk_assessments,
12759            &audit.judgments,
12760            &audit.materiality_calculations,
12761            &audit.audit_opinions,
12762            &audit.going_concern_assessments,
12763        );
12764        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
12765
12766        // OCPM events as hyperedges
12767        if let Some(ref event_log) = ocpm.event_log {
12768            builder.add_ocpm_events(event_log);
12769        }
12770
12771        // Compliance regulations as cross-layer nodes
12772        if self.config.compliance_regulations.enabled
12773            && hg_settings.governance_layer.include_controls
12774        {
12775            // Reconstruct ComplianceStandard objects from the registry
12776            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
12777            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
12778                .standard_records
12779                .iter()
12780                .filter_map(|r| {
12781                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
12782                    registry.get(&sid).cloned()
12783                })
12784                .collect();
12785
12786            builder.add_compliance_regulations(
12787                &standards,
12788                &compliance.findings,
12789                &compliance.filings,
12790            );
12791        }
12792
12793        // Layer 3: Accounting Network
12794        builder.add_accounts(coa);
12795        builder.add_journal_entries_as_hyperedges(entries);
12796
12797        // Build the hypergraph
12798        let hypergraph = builder.build();
12799
12800        // Export
12801        let output_dir = self
12802            .output_path
12803            .clone()
12804            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
12805        let hg_dir = output_dir
12806            .join(&self.config.graph_export.output_subdirectory)
12807            .join(&hg_settings.output_subdirectory);
12808
12809        // Branch on output format
12810        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
12811            "unified" => {
12812                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12813                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12814                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
12815                })?;
12816                (
12817                    metadata.num_nodes,
12818                    metadata.num_edges,
12819                    metadata.num_hyperedges,
12820                )
12821            }
12822            _ => {
12823                // "native" or any unrecognized format → use existing exporter
12824                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
12825                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
12826                    SynthError::generation(format!("Hypergraph export failed: {e}"))
12827                })?;
12828                (
12829                    metadata.num_nodes,
12830                    metadata.num_edges,
12831                    metadata.num_hyperedges,
12832                )
12833            }
12834        };
12835
12836        // Stream to RustGraph ingest endpoint if configured
12837        #[cfg(feature = "streaming")]
12838        if let Some(ref target_url) = hg_settings.stream_target {
12839            use crate::stream_client::{StreamClient, StreamConfig};
12840            use std::io::Write as _;
12841
12842            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
12843            let stream_config = StreamConfig {
12844                target_url: target_url.clone(),
12845                batch_size: hg_settings.stream_batch_size,
12846                api_key,
12847                ..StreamConfig::default()
12848            };
12849
12850            match StreamClient::new(stream_config) {
12851                Ok(mut client) => {
12852                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
12853                    match exporter.export_to_writer(&hypergraph, &mut client) {
12854                        Ok(_) => {
12855                            if let Err(e) = client.flush() {
12856                                warn!("Failed to flush stream client: {}", e);
12857                            } else {
12858                                info!("Streamed {} records to {}", client.total_sent(), target_url);
12859                            }
12860                        }
12861                        Err(e) => {
12862                            warn!("Streaming export failed: {}", e);
12863                        }
12864                    }
12865                }
12866                Err(e) => {
12867                    warn!("Failed to create stream client: {}", e);
12868                }
12869            }
12870        }
12871
12872        // Update stats
12873        stats.graph_node_count += num_nodes;
12874        stats.graph_edge_count += num_edges;
12875        stats.graph_export_count += 1;
12876
12877        Ok(HypergraphExportInfo {
12878            node_count: num_nodes,
12879            edge_count: num_edges,
12880            hyperedge_count: num_hyperedges,
12881            output_path: hg_dir,
12882        })
12883    }
12884
12885    /// Generate banking KYC/AML data.
12886    ///
12887    /// Creates banking customers, accounts, and transactions with AML typology injection.
12888    /// Uses the BankingOrchestrator from synth-banking crate.
12889    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
12890        let pb = self.create_progress_bar(100, "Generating Banking Data");
12891
12892        // Build the banking orchestrator from config
12893        let orchestrator = BankingOrchestratorBuilder::new()
12894            .config(self.config.banking.clone())
12895            .seed(self.seed + 9000)
12896            .country_pack(self.primary_pack().clone())
12897            .build();
12898
12899        if let Some(pb) = &pb {
12900            pb.inc(10);
12901        }
12902
12903        // Generate the banking data
12904        let result = orchestrator.generate();
12905
12906        if let Some(pb) = &pb {
12907            pb.inc(90);
12908            pb.finish_with_message(format!(
12909                "Banking: {} customers, {} transactions",
12910                result.customers.len(),
12911                result.transactions.len()
12912            ));
12913        }
12914
12915        // Cross-reference banking customers with core master data so that
12916        // banking customer names align with the enterprise customer list.
12917        // We rotate through core customers, overlaying their name and country
12918        // onto the generated banking customers where possible.
12919        let mut banking_customers = result.customers;
12920        let core_customers = &self.master_data.customers;
12921        if !core_customers.is_empty() {
12922            for (i, bc) in banking_customers.iter_mut().enumerate() {
12923                let core = &core_customers[i % core_customers.len()];
12924                bc.name = CustomerName::business(&core.name);
12925                bc.residence_country = core.country.clone();
12926                bc.enterprise_customer_id = Some(core.customer_id.clone());
12927            }
12928            debug!(
12929                "Cross-referenced {} banking customers with {} core customers",
12930                banking_customers.len(),
12931                core_customers.len()
12932            );
12933        }
12934
12935        Ok(BankingSnapshot {
12936            customers: banking_customers,
12937            accounts: result.accounts,
12938            transactions: result.transactions,
12939            transaction_labels: result.transaction_labels,
12940            customer_labels: result.customer_labels,
12941            account_labels: result.account_labels,
12942            relationship_labels: result.relationship_labels,
12943            narratives: result.narratives,
12944            suspicious_count: result.stats.suspicious_count,
12945            scenario_count: result.scenarios.len(),
12946        })
12947    }
12948
12949    /// Calculate total transactions to generate.
12950    fn calculate_total_transactions(&self) -> u64 {
12951        let months = self.config.global.period_months as f64;
12952        self.config
12953            .companies
12954            .iter()
12955            .map(|c| {
12956                let annual = c.annual_transaction_volume.count() as f64;
12957                let weighted = annual * c.volume_weight;
12958                (weighted * months / 12.0) as u64
12959            })
12960            .sum()
12961    }
12962
12963    /// Create a progress bar if progress display is enabled.
12964    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
12965        if !self.phase_config.show_progress {
12966            return None;
12967        }
12968
12969        let pb = if let Some(mp) = &self.multi_progress {
12970            mp.add(ProgressBar::new(total))
12971        } else {
12972            ProgressBar::new(total)
12973        };
12974
12975        pb.set_style(
12976            ProgressStyle::default_bar()
12977                .template(&format!(
12978                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
12979                ))
12980                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
12981                .progress_chars("#>-"),
12982        );
12983
12984        Some(pb)
12985    }
12986
12987    /// Get the generated chart of accounts.
12988    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
12989        self.coa.clone()
12990    }
12991
12992    /// Get the generated master data.
12993    pub fn get_master_data(&self) -> &MasterDataSnapshot {
12994        &self.master_data
12995    }
12996
12997    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
12998    fn phase_compliance_regulations(
12999        &mut self,
13000        _stats: &mut EnhancedGenerationStatistics,
13001    ) -> SynthResult<ComplianceRegulationsSnapshot> {
13002        if !self.phase_config.generate_compliance_regulations {
13003            return Ok(ComplianceRegulationsSnapshot::default());
13004        }
13005
13006        info!("Phase: Generating Compliance Regulations Data");
13007
13008        let cr_config = &self.config.compliance_regulations;
13009
13010        // Determine jurisdictions: from config or inferred from companies
13011        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
13012            self.config
13013                .companies
13014                .iter()
13015                .map(|c| c.country.clone())
13016                .collect::<std::collections::HashSet<_>>()
13017                .into_iter()
13018                .collect()
13019        } else {
13020            cr_config.jurisdictions.clone()
13021        };
13022
13023        // Determine reference date
13024        let fallback_date =
13025            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
13026        let reference_date = cr_config
13027            .reference_date
13028            .as_ref()
13029            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
13030            .unwrap_or_else(|| {
13031                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13032                    .unwrap_or(fallback_date)
13033            });
13034
13035        // Generate standards registry data
13036        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
13037        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
13038        let cross_reference_records = reg_gen.generate_cross_reference_records();
13039        let jurisdiction_records =
13040            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
13041
13042        info!(
13043            "  Standards: {} records, {} cross-references, {} jurisdictions",
13044            standard_records.len(),
13045            cross_reference_records.len(),
13046            jurisdiction_records.len()
13047        );
13048
13049        // Generate audit procedures (if enabled)
13050        let audit_procedures = if cr_config.audit_procedures.enabled {
13051            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
13052                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
13053                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
13054                confidence_level: cr_config.audit_procedures.confidence_level,
13055                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
13056            };
13057            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
13058                self.seed + 9000,
13059                proc_config,
13060            );
13061            let registry = reg_gen.registry();
13062            let mut all_procs = Vec::new();
13063            for jurisdiction in &jurisdictions {
13064                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
13065                all_procs.extend(procs);
13066            }
13067            info!("  Audit procedures: {}", all_procs.len());
13068            all_procs
13069        } else {
13070            Vec::new()
13071        };
13072
13073        // Generate compliance findings (if enabled)
13074        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
13075            let finding_config =
13076                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
13077                    finding_rate: cr_config.findings.finding_rate,
13078                    material_weakness_rate: cr_config.findings.material_weakness_rate,
13079                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
13080                    generate_remediation: cr_config.findings.generate_remediation,
13081                };
13082            let mut finding_gen =
13083                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
13084                    self.seed + 9100,
13085                    finding_config,
13086                );
13087            let mut all_findings = Vec::new();
13088            for company in &self.config.companies {
13089                let company_findings =
13090                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
13091                all_findings.extend(company_findings);
13092            }
13093            info!("  Compliance findings: {}", all_findings.len());
13094            all_findings
13095        } else {
13096            Vec::new()
13097        };
13098
13099        // Generate regulatory filings (if enabled)
13100        let filings = if cr_config.filings.enabled {
13101            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
13102                filing_types: cr_config.filings.filing_types.clone(),
13103                generate_status_progression: cr_config.filings.generate_status_progression,
13104            };
13105            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
13106                self.seed + 9200,
13107                filing_config,
13108            );
13109            let company_codes: Vec<String> = self
13110                .config
13111                .companies
13112                .iter()
13113                .map(|c| c.code.clone())
13114                .collect();
13115            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13116                .unwrap_or(fallback_date);
13117            let filings = filing_gen.generate_filings(
13118                &company_codes,
13119                &jurisdictions,
13120                start_date,
13121                self.config.global.period_months,
13122            );
13123            info!("  Regulatory filings: {}", filings.len());
13124            filings
13125        } else {
13126            Vec::new()
13127        };
13128
13129        // Build compliance graph (if enabled)
13130        let compliance_graph = if cr_config.graph.enabled {
13131            let graph_config = datasynth_graph::ComplianceGraphConfig {
13132                include_standard_nodes: cr_config.graph.include_compliance_nodes,
13133                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
13134                include_cross_references: cr_config.graph.include_cross_references,
13135                include_supersession_edges: cr_config.graph.include_supersession_edges,
13136                include_account_links: cr_config.graph.include_account_links,
13137                include_control_links: cr_config.graph.include_control_links,
13138                include_company_links: cr_config.graph.include_company_links,
13139            };
13140            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
13141
13142            // Add standard nodes
13143            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
13144                .iter()
13145                .map(|r| datasynth_graph::StandardNodeInput {
13146                    standard_id: r.standard_id.clone(),
13147                    title: r.title.clone(),
13148                    category: r.category.clone(),
13149                    domain: r.domain.clone(),
13150                    is_active: r.is_active,
13151                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
13152                    applicable_account_types: r.applicable_account_types.clone(),
13153                    applicable_processes: r.applicable_processes.clone(),
13154                })
13155                .collect();
13156            builder.add_standards(&standard_inputs);
13157
13158            // Add jurisdiction nodes
13159            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
13160                jurisdiction_records
13161                    .iter()
13162                    .map(|r| datasynth_graph::JurisdictionNodeInput {
13163                        country_code: r.country_code.clone(),
13164                        country_name: r.country_name.clone(),
13165                        framework: r.accounting_framework.clone(),
13166                        standard_count: r.standard_count,
13167                        tax_rate: r.statutory_tax_rate,
13168                    })
13169                    .collect();
13170            builder.add_jurisdictions(&jurisdiction_inputs);
13171
13172            // Add cross-reference edges
13173            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
13174                cross_reference_records
13175                    .iter()
13176                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
13177                        from_standard: r.from_standard.clone(),
13178                        to_standard: r.to_standard.clone(),
13179                        relationship: r.relationship.clone(),
13180                        convergence_level: r.convergence_level,
13181                    })
13182                    .collect();
13183            builder.add_cross_references(&xref_inputs);
13184
13185            // Add jurisdiction→standard mappings
13186            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
13187                .iter()
13188                .map(|r| datasynth_graph::JurisdictionMappingInput {
13189                    country_code: r.jurisdiction.clone(),
13190                    standard_id: r.standard_id.clone(),
13191                })
13192                .collect();
13193            builder.add_jurisdiction_mappings(&mapping_inputs);
13194
13195            // Add procedure nodes
13196            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
13197                .iter()
13198                .map(|p| datasynth_graph::ProcedureNodeInput {
13199                    procedure_id: p.procedure_id.clone(),
13200                    standard_id: p.standard_id.clone(),
13201                    procedure_type: p.procedure_type.clone(),
13202                    sample_size: p.sample_size,
13203                    confidence_level: p.confidence_level,
13204                })
13205                .collect();
13206            builder.add_procedures(&proc_inputs);
13207
13208            // Add finding nodes
13209            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
13210                .iter()
13211                .map(|f| datasynth_graph::FindingNodeInput {
13212                    finding_id: f.finding_id.to_string(),
13213                    standard_id: f
13214                        .related_standards
13215                        .first()
13216                        .map(|s| s.as_str().to_string())
13217                        .unwrap_or_default(),
13218                    severity: f.severity.to_string(),
13219                    deficiency_level: f.deficiency_level.to_string(),
13220                    severity_score: f.deficiency_level.severity_score(),
13221                    control_id: f.control_id.clone(),
13222                    affected_accounts: f.affected_accounts.clone(),
13223                })
13224                .collect();
13225            builder.add_findings(&finding_inputs);
13226
13227            // Cross-domain: link standards to accounts from chart of accounts
13228            if cr_config.graph.include_account_links {
13229                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
13230                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
13231                for std_record in &standard_records {
13232                    if let Some(std_obj) =
13233                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
13234                            &std_record.standard_id,
13235                        ))
13236                    {
13237                        for acct_type in &std_obj.applicable_account_types {
13238                            account_links.push(datasynth_graph::AccountLinkInput {
13239                                standard_id: std_record.standard_id.clone(),
13240                                account_code: acct_type.clone(),
13241                                account_name: acct_type.clone(),
13242                            });
13243                        }
13244                    }
13245                }
13246                builder.add_account_links(&account_links);
13247            }
13248
13249            // Cross-domain: link standards to internal controls
13250            if cr_config.graph.include_control_links {
13251                let mut control_links = Vec::new();
13252                // SOX/PCAOB standards link to all controls
13253                let sox_like_ids: Vec<String> = standard_records
13254                    .iter()
13255                    .filter(|r| {
13256                        r.standard_id.starts_with("SOX")
13257                            || r.standard_id.starts_with("PCAOB-AS-2201")
13258                    })
13259                    .map(|r| r.standard_id.clone())
13260                    .collect();
13261                // Get control IDs from config (C001-C060 standard controls)
13262                let control_ids = [
13263                    ("C001", "Cash Controls"),
13264                    ("C002", "Large Transaction Approval"),
13265                    ("C010", "PO Approval"),
13266                    ("C011", "Three-Way Match"),
13267                    ("C020", "Revenue Recognition"),
13268                    ("C021", "Credit Check"),
13269                    ("C030", "Manual JE Approval"),
13270                    ("C031", "Period Close Review"),
13271                    ("C032", "Account Reconciliation"),
13272                    ("C040", "Payroll Processing"),
13273                    ("C050", "Fixed Asset Capitalization"),
13274                    ("C060", "Intercompany Elimination"),
13275                ];
13276                for sox_id in &sox_like_ids {
13277                    for (ctrl_id, ctrl_name) in &control_ids {
13278                        control_links.push(datasynth_graph::ControlLinkInput {
13279                            standard_id: sox_id.clone(),
13280                            control_id: ctrl_id.to_string(),
13281                            control_name: ctrl_name.to_string(),
13282                        });
13283                    }
13284                }
13285                builder.add_control_links(&control_links);
13286            }
13287
13288            // Cross-domain: filing nodes with company links
13289            if cr_config.graph.include_company_links {
13290                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
13291                    .iter()
13292                    .enumerate()
13293                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
13294                        filing_id: format!("F{:04}", i + 1),
13295                        filing_type: f.filing_type.to_string(),
13296                        company_code: f.company_code.clone(),
13297                        jurisdiction: f.jurisdiction.clone(),
13298                        status: format!("{:?}", f.status),
13299                    })
13300                    .collect();
13301                builder.add_filings(&filing_inputs);
13302            }
13303
13304            let graph = builder.build();
13305            info!(
13306                "  Compliance graph: {} nodes, {} edges",
13307                graph.nodes.len(),
13308                graph.edges.len()
13309            );
13310            Some(graph)
13311        } else {
13312            None
13313        };
13314
13315        self.check_resources_with_log("post-compliance-regulations")?;
13316
13317        Ok(ComplianceRegulationsSnapshot {
13318            standard_records,
13319            cross_reference_records,
13320            jurisdiction_records,
13321            audit_procedures,
13322            findings,
13323            filings,
13324            compliance_graph,
13325        })
13326    }
13327
13328    /// Build a lineage graph describing config → phase → output relationships.
13329    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
13330        use super::lineage::LineageGraphBuilder;
13331
13332        let mut builder = LineageGraphBuilder::new();
13333
13334        // Config sections
13335        builder.add_config_section("config:global", "Global Config");
13336        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
13337        builder.add_config_section("config:transactions", "Transaction Config");
13338
13339        // Generator phases
13340        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
13341        builder.add_generator_phase("phase:je", "Journal Entry Generation");
13342
13343        // Config → phase edges
13344        builder.configured_by("phase:coa", "config:chart_of_accounts");
13345        builder.configured_by("phase:je", "config:transactions");
13346
13347        // Output files
13348        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
13349        builder.produced_by("output:je", "phase:je");
13350
13351        // Optional phases based on config
13352        if self.phase_config.generate_master_data {
13353            builder.add_config_section("config:master_data", "Master Data Config");
13354            builder.add_generator_phase("phase:master_data", "Master Data Generation");
13355            builder.configured_by("phase:master_data", "config:master_data");
13356            builder.input_to("phase:master_data", "phase:je");
13357        }
13358
13359        if self.phase_config.generate_document_flows {
13360            builder.add_config_section("config:document_flows", "Document Flow Config");
13361            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
13362            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
13363            builder.configured_by("phase:p2p", "config:document_flows");
13364            builder.configured_by("phase:o2c", "config:document_flows");
13365
13366            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
13367            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
13368            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
13369            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
13370            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
13371
13372            builder.produced_by("output:po", "phase:p2p");
13373            builder.produced_by("output:gr", "phase:p2p");
13374            builder.produced_by("output:vi", "phase:p2p");
13375            builder.produced_by("output:so", "phase:o2c");
13376            builder.produced_by("output:ci", "phase:o2c");
13377        }
13378
13379        if self.phase_config.inject_anomalies {
13380            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
13381            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
13382            builder.configured_by("phase:anomaly", "config:fraud");
13383            builder.add_output_file(
13384                "output:labels",
13385                "Anomaly Labels",
13386                "labels/anomaly_labels.csv",
13387            );
13388            builder.produced_by("output:labels", "phase:anomaly");
13389        }
13390
13391        if self.phase_config.generate_audit {
13392            builder.add_config_section("config:audit", "Audit Config");
13393            builder.add_generator_phase("phase:audit", "Audit Data Generation");
13394            builder.configured_by("phase:audit", "config:audit");
13395        }
13396
13397        if self.phase_config.generate_banking {
13398            builder.add_config_section("config:banking", "Banking Config");
13399            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
13400            builder.configured_by("phase:banking", "config:banking");
13401        }
13402
13403        if self.config.llm.enabled {
13404            builder.add_config_section("config:llm", "LLM Enrichment Config");
13405            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
13406            builder.configured_by("phase:llm_enrichment", "config:llm");
13407        }
13408
13409        if self.config.diffusion.enabled {
13410            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
13411            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
13412            builder.configured_by("phase:diffusion", "config:diffusion");
13413        }
13414
13415        if self.config.causal.enabled {
13416            builder.add_config_section("config:causal", "Causal Generation Config");
13417            builder.add_generator_phase("phase:causal", "Causal Overlay");
13418            builder.configured_by("phase:causal", "config:causal");
13419        }
13420
13421        builder.build()
13422    }
13423
13424    // -----------------------------------------------------------------------
13425    // Trial-balance helpers used to replace hardcoded proxy values
13426    // -----------------------------------------------------------------------
13427
13428    /// Compute total revenue for a company from its journal entries.
13429    ///
13430    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
13431    /// net credits on all revenue-account lines filtered to `company_code`.
13432    fn compute_company_revenue(
13433        entries: &[JournalEntry],
13434        company_code: &str,
13435    ) -> rust_decimal::Decimal {
13436        use rust_decimal::Decimal;
13437        let mut revenue = Decimal::ZERO;
13438        for je in entries {
13439            if je.header.company_code != company_code {
13440                continue;
13441            }
13442            for line in &je.lines {
13443                if line.gl_account.starts_with('4') {
13444                    // Revenue is credit-normal
13445                    revenue += line.credit_amount - line.debit_amount;
13446                }
13447            }
13448        }
13449        revenue.max(Decimal::ZERO)
13450    }
13451
13452    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
13453    ///
13454    /// Asset accounts start with "1"; liability accounts start with "2".
13455    fn compute_entity_net_assets(
13456        entries: &[JournalEntry],
13457        entity_code: &str,
13458    ) -> rust_decimal::Decimal {
13459        use rust_decimal::Decimal;
13460        let mut asset_net = Decimal::ZERO;
13461        let mut liability_net = Decimal::ZERO;
13462        for je in entries {
13463            if je.header.company_code != entity_code {
13464                continue;
13465            }
13466            for line in &je.lines {
13467                if line.gl_account.starts_with('1') {
13468                    asset_net += line.debit_amount - line.credit_amount;
13469                } else if line.gl_account.starts_with('2') {
13470                    liability_net += line.credit_amount - line.debit_amount;
13471                }
13472            }
13473        }
13474        asset_net - liability_net
13475    }
13476}
13477
13478/// Get the directory name for a graph export format.
13479fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
13480    match format {
13481        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
13482        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
13483        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
13484        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
13485        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
13486    }
13487}
13488
13489/// Aggregate journal entry lines into per-account trial balance rows.
13490///
13491/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
13492/// debit/credit totals and a net balance (debit minus credit).
13493fn compute_trial_balance_entries(
13494    entries: &[JournalEntry],
13495    entity_code: &str,
13496    fiscal_year: i32,
13497    coa: Option<&ChartOfAccounts>,
13498) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
13499    use std::collections::BTreeMap;
13500
13501    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
13502        BTreeMap::new();
13503
13504    for je in entries {
13505        for line in &je.lines {
13506            let entry = balances.entry(line.account_code.clone()).or_default();
13507            entry.0 += line.debit_amount;
13508            entry.1 += line.credit_amount;
13509        }
13510    }
13511
13512    balances
13513        .into_iter()
13514        .map(
13515            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
13516                account_description: coa
13517                    .and_then(|c| c.get_account(&account_code))
13518                    .map(|a| a.description().to_string())
13519                    .unwrap_or_else(|| account_code.clone()),
13520                account_code,
13521                debit_balance: debit,
13522                credit_balance: credit,
13523                net_balance: debit - credit,
13524                entity_code: entity_code.to_string(),
13525                period: format!("FY{}", fiscal_year),
13526            },
13527        )
13528        .collect()
13529}
13530
13531#[cfg(test)]
13532#[allow(clippy::unwrap_used)]
13533mod tests {
13534    use super::*;
13535    use datasynth_config::schema::*;
13536
13537    fn create_test_config() -> GeneratorConfig {
13538        GeneratorConfig {
13539            global: GlobalConfig {
13540                industry: IndustrySector::Manufacturing,
13541                start_date: "2024-01-01".to_string(),
13542                period_months: 1,
13543                seed: Some(42),
13544                parallel: false,
13545                group_currency: "USD".to_string(),
13546                presentation_currency: None,
13547                worker_threads: 0,
13548                memory_limit_mb: 0,
13549                fiscal_year_months: None,
13550            },
13551            companies: vec![CompanyConfig {
13552                code: "1000".to_string(),
13553                name: "Test Company".to_string(),
13554                currency: "USD".to_string(),
13555                functional_currency: None,
13556                country: "US".to_string(),
13557                annual_transaction_volume: TransactionVolume::TenK,
13558                volume_weight: 1.0,
13559                fiscal_year_variant: "K4".to_string(),
13560            }],
13561            chart_of_accounts: ChartOfAccountsConfig {
13562                complexity: CoAComplexity::Small,
13563                industry_specific: true,
13564                custom_accounts: None,
13565                min_hierarchy_depth: 2,
13566                max_hierarchy_depth: 4,
13567            },
13568            transactions: TransactionConfig::default(),
13569            output: OutputConfig::default(),
13570            fraud: FraudConfig::default(),
13571            internal_controls: InternalControlsConfig::default(),
13572            business_processes: BusinessProcessConfig::default(),
13573            user_personas: UserPersonaConfig::default(),
13574            templates: TemplateConfig::default(),
13575            approval: ApprovalConfig::default(),
13576            departments: DepartmentConfig::default(),
13577            master_data: MasterDataConfig::default(),
13578            document_flows: DocumentFlowConfig::default(),
13579            intercompany: IntercompanyConfig::default(),
13580            balance: BalanceConfig::default(),
13581            ocpm: OcpmConfig::default(),
13582            audit: AuditGenerationConfig::default(),
13583            banking: datasynth_banking::BankingConfig::default(),
13584            data_quality: DataQualitySchemaConfig::default(),
13585            scenario: ScenarioConfig::default(),
13586            temporal: TemporalDriftConfig::default(),
13587            graph_export: GraphExportConfig::default(),
13588            streaming: StreamingSchemaConfig::default(),
13589            rate_limit: RateLimitSchemaConfig::default(),
13590            temporal_attributes: TemporalAttributeSchemaConfig::default(),
13591            relationships: RelationshipSchemaConfig::default(),
13592            accounting_standards: AccountingStandardsConfig::default(),
13593            audit_standards: AuditStandardsConfig::default(),
13594            distributions: Default::default(),
13595            temporal_patterns: Default::default(),
13596            vendor_network: VendorNetworkSchemaConfig::default(),
13597            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
13598            relationship_strength: RelationshipStrengthSchemaConfig::default(),
13599            cross_process_links: CrossProcessLinksSchemaConfig::default(),
13600            organizational_events: OrganizationalEventsSchemaConfig::default(),
13601            behavioral_drift: BehavioralDriftSchemaConfig::default(),
13602            market_drift: MarketDriftSchemaConfig::default(),
13603            drift_labeling: DriftLabelingSchemaConfig::default(),
13604            anomaly_injection: Default::default(),
13605            industry_specific: Default::default(),
13606            fingerprint_privacy: Default::default(),
13607            quality_gates: Default::default(),
13608            compliance: Default::default(),
13609            webhooks: Default::default(),
13610            llm: Default::default(),
13611            diffusion: Default::default(),
13612            causal: Default::default(),
13613            source_to_pay: Default::default(),
13614            financial_reporting: Default::default(),
13615            hr: Default::default(),
13616            manufacturing: Default::default(),
13617            sales_quotes: Default::default(),
13618            tax: Default::default(),
13619            treasury: Default::default(),
13620            project_accounting: Default::default(),
13621            esg: Default::default(),
13622            country_packs: None,
13623            scenarios: Default::default(),
13624            session: Default::default(),
13625            compliance_regulations: Default::default(),
13626        }
13627    }
13628
13629    #[test]
13630    fn test_enhanced_orchestrator_creation() {
13631        let config = create_test_config();
13632        let orchestrator = EnhancedOrchestrator::with_defaults(config);
13633        assert!(orchestrator.is_ok());
13634    }
13635
13636    #[test]
13637    fn test_minimal_generation() {
13638        let config = create_test_config();
13639        let phase_config = PhaseConfig {
13640            generate_master_data: false,
13641            generate_document_flows: false,
13642            generate_journal_entries: true,
13643            inject_anomalies: false,
13644            show_progress: false,
13645            ..Default::default()
13646        };
13647
13648        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13649        let result = orchestrator.generate();
13650
13651        assert!(result.is_ok());
13652        let result = result.unwrap();
13653        assert!(!result.journal_entries.is_empty());
13654    }
13655
13656    #[test]
13657    fn test_master_data_generation() {
13658        let config = create_test_config();
13659        let phase_config = PhaseConfig {
13660            generate_master_data: true,
13661            generate_document_flows: false,
13662            generate_journal_entries: false,
13663            inject_anomalies: false,
13664            show_progress: false,
13665            vendors_per_company: 5,
13666            customers_per_company: 5,
13667            materials_per_company: 10,
13668            assets_per_company: 5,
13669            employees_per_company: 10,
13670            ..Default::default()
13671        };
13672
13673        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13674        let result = orchestrator.generate().unwrap();
13675
13676        assert!(!result.master_data.vendors.is_empty());
13677        assert!(!result.master_data.customers.is_empty());
13678        assert!(!result.master_data.materials.is_empty());
13679    }
13680
13681    #[test]
13682    fn test_document_flow_generation() {
13683        let config = create_test_config();
13684        let phase_config = PhaseConfig {
13685            generate_master_data: true,
13686            generate_document_flows: true,
13687            generate_journal_entries: false,
13688            inject_anomalies: false,
13689            inject_data_quality: false,
13690            validate_balances: false,
13691            generate_ocpm_events: false,
13692            show_progress: false,
13693            vendors_per_company: 5,
13694            customers_per_company: 5,
13695            materials_per_company: 10,
13696            assets_per_company: 5,
13697            employees_per_company: 10,
13698            p2p_chains: 5,
13699            o2c_chains: 5,
13700            ..Default::default()
13701        };
13702
13703        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13704        let result = orchestrator.generate().unwrap();
13705
13706        // Should have generated P2P and O2C chains
13707        assert!(!result.document_flows.p2p_chains.is_empty());
13708        assert!(!result.document_flows.o2c_chains.is_empty());
13709
13710        // Flattened documents should be populated
13711        assert!(!result.document_flows.purchase_orders.is_empty());
13712        assert!(!result.document_flows.sales_orders.is_empty());
13713    }
13714
13715    #[test]
13716    fn test_anomaly_injection() {
13717        let config = create_test_config();
13718        let phase_config = PhaseConfig {
13719            generate_master_data: false,
13720            generate_document_flows: false,
13721            generate_journal_entries: true,
13722            inject_anomalies: true,
13723            show_progress: false,
13724            ..Default::default()
13725        };
13726
13727        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13728        let result = orchestrator.generate().unwrap();
13729
13730        // Should have journal entries
13731        assert!(!result.journal_entries.is_empty());
13732
13733        // With ~833 entries and 2% rate, expect some anomalies
13734        // Note: This is probabilistic, so we just verify the structure exists
13735        assert!(result.anomaly_labels.summary.is_some());
13736    }
13737
13738    #[test]
13739    fn test_full_generation_pipeline() {
13740        let config = create_test_config();
13741        let phase_config = PhaseConfig {
13742            generate_master_data: true,
13743            generate_document_flows: true,
13744            generate_journal_entries: true,
13745            inject_anomalies: false,
13746            inject_data_quality: false,
13747            validate_balances: true,
13748            generate_ocpm_events: false,
13749            show_progress: false,
13750            vendors_per_company: 3,
13751            customers_per_company: 3,
13752            materials_per_company: 5,
13753            assets_per_company: 3,
13754            employees_per_company: 5,
13755            p2p_chains: 3,
13756            o2c_chains: 3,
13757            ..Default::default()
13758        };
13759
13760        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13761        let result = orchestrator.generate().unwrap();
13762
13763        // All phases should have results
13764        assert!(!result.master_data.vendors.is_empty());
13765        assert!(!result.master_data.customers.is_empty());
13766        assert!(!result.document_flows.p2p_chains.is_empty());
13767        assert!(!result.document_flows.o2c_chains.is_empty());
13768        assert!(!result.journal_entries.is_empty());
13769        assert!(result.statistics.accounts_count > 0);
13770
13771        // Subledger linking should have run
13772        assert!(!result.subledger.ap_invoices.is_empty());
13773        assert!(!result.subledger.ar_invoices.is_empty());
13774
13775        // Balance validation should have run
13776        assert!(result.balance_validation.validated);
13777        assert!(result.balance_validation.entries_processed > 0);
13778    }
13779
13780    #[test]
13781    fn test_subledger_linking() {
13782        let config = create_test_config();
13783        let phase_config = PhaseConfig {
13784            generate_master_data: true,
13785            generate_document_flows: true,
13786            generate_journal_entries: false,
13787            inject_anomalies: false,
13788            inject_data_quality: false,
13789            validate_balances: false,
13790            generate_ocpm_events: false,
13791            show_progress: false,
13792            vendors_per_company: 5,
13793            customers_per_company: 5,
13794            materials_per_company: 10,
13795            assets_per_company: 3,
13796            employees_per_company: 5,
13797            p2p_chains: 5,
13798            o2c_chains: 5,
13799            ..Default::default()
13800        };
13801
13802        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13803        let result = orchestrator.generate().unwrap();
13804
13805        // Should have document flows
13806        assert!(!result.document_flows.vendor_invoices.is_empty());
13807        assert!(!result.document_flows.customer_invoices.is_empty());
13808
13809        // Subledger should be linked from document flows
13810        assert!(!result.subledger.ap_invoices.is_empty());
13811        assert!(!result.subledger.ar_invoices.is_empty());
13812
13813        // AP invoices count should match vendor invoices count
13814        assert_eq!(
13815            result.subledger.ap_invoices.len(),
13816            result.document_flows.vendor_invoices.len()
13817        );
13818
13819        // AR invoices count should match customer invoices count
13820        assert_eq!(
13821            result.subledger.ar_invoices.len(),
13822            result.document_flows.customer_invoices.len()
13823        );
13824
13825        // Statistics should reflect subledger counts
13826        assert_eq!(
13827            result.statistics.ap_invoice_count,
13828            result.subledger.ap_invoices.len()
13829        );
13830        assert_eq!(
13831            result.statistics.ar_invoice_count,
13832            result.subledger.ar_invoices.len()
13833        );
13834    }
13835
13836    #[test]
13837    fn test_balance_validation() {
13838        let config = create_test_config();
13839        let phase_config = PhaseConfig {
13840            generate_master_data: false,
13841            generate_document_flows: false,
13842            generate_journal_entries: true,
13843            inject_anomalies: false,
13844            validate_balances: true,
13845            show_progress: false,
13846            ..Default::default()
13847        };
13848
13849        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13850        let result = orchestrator.generate().unwrap();
13851
13852        // Balance validation should run
13853        assert!(result.balance_validation.validated);
13854        assert!(result.balance_validation.entries_processed > 0);
13855
13856        // Generated JEs should be balanced (no unbalanced entries)
13857        assert!(!result.balance_validation.has_unbalanced_entries);
13858
13859        // Total debits should equal total credits
13860        assert_eq!(
13861            result.balance_validation.total_debits,
13862            result.balance_validation.total_credits
13863        );
13864    }
13865
13866    #[test]
13867    fn test_statistics_accuracy() {
13868        let config = create_test_config();
13869        let phase_config = PhaseConfig {
13870            generate_master_data: true,
13871            generate_document_flows: false,
13872            generate_journal_entries: true,
13873            inject_anomalies: false,
13874            show_progress: false,
13875            vendors_per_company: 10,
13876            customers_per_company: 20,
13877            materials_per_company: 15,
13878            assets_per_company: 5,
13879            employees_per_company: 8,
13880            ..Default::default()
13881        };
13882
13883        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13884        let result = orchestrator.generate().unwrap();
13885
13886        // Statistics should match actual data
13887        assert_eq!(
13888            result.statistics.vendor_count,
13889            result.master_data.vendors.len()
13890        );
13891        assert_eq!(
13892            result.statistics.customer_count,
13893            result.master_data.customers.len()
13894        );
13895        assert_eq!(
13896            result.statistics.material_count,
13897            result.master_data.materials.len()
13898        );
13899        assert_eq!(
13900            result.statistics.total_entries as usize,
13901            result.journal_entries.len()
13902        );
13903    }
13904
13905    #[test]
13906    fn test_phase_config_defaults() {
13907        let config = PhaseConfig::default();
13908        assert!(config.generate_master_data);
13909        assert!(config.generate_document_flows);
13910        assert!(config.generate_journal_entries);
13911        assert!(!config.inject_anomalies);
13912        assert!(config.validate_balances);
13913        assert!(config.show_progress);
13914        assert!(config.vendors_per_company > 0);
13915        assert!(config.customers_per_company > 0);
13916    }
13917
13918    #[test]
13919    fn test_get_coa_before_generation() {
13920        let config = create_test_config();
13921        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
13922
13923        // Before generation, CoA should be None
13924        assert!(orchestrator.get_coa().is_none());
13925    }
13926
13927    #[test]
13928    fn test_get_coa_after_generation() {
13929        let config = create_test_config();
13930        let phase_config = PhaseConfig {
13931            generate_master_data: false,
13932            generate_document_flows: false,
13933            generate_journal_entries: true,
13934            inject_anomalies: false,
13935            show_progress: false,
13936            ..Default::default()
13937        };
13938
13939        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13940        let _ = orchestrator.generate().unwrap();
13941
13942        // After generation, CoA should be available
13943        assert!(orchestrator.get_coa().is_some());
13944    }
13945
13946    #[test]
13947    fn test_get_master_data() {
13948        let config = create_test_config();
13949        let phase_config = PhaseConfig {
13950            generate_master_data: true,
13951            generate_document_flows: false,
13952            generate_journal_entries: false,
13953            inject_anomalies: false,
13954            show_progress: false,
13955            vendors_per_company: 5,
13956            customers_per_company: 5,
13957            materials_per_company: 5,
13958            assets_per_company: 5,
13959            employees_per_company: 5,
13960            ..Default::default()
13961        };
13962
13963        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
13964        let result = orchestrator.generate().unwrap();
13965
13966        // After generate(), master_data is moved into the result
13967        assert!(!result.master_data.vendors.is_empty());
13968    }
13969
13970    #[test]
13971    fn test_with_progress_builder() {
13972        let config = create_test_config();
13973        let orchestrator = EnhancedOrchestrator::with_defaults(config)
13974            .unwrap()
13975            .with_progress(false);
13976
13977        // Should still work without progress
13978        assert!(!orchestrator.phase_config.show_progress);
13979    }
13980
13981    #[test]
13982    fn test_multi_company_generation() {
13983        let mut config = create_test_config();
13984        config.companies.push(CompanyConfig {
13985            code: "2000".to_string(),
13986            name: "Subsidiary".to_string(),
13987            currency: "EUR".to_string(),
13988            functional_currency: None,
13989            country: "DE".to_string(),
13990            annual_transaction_volume: TransactionVolume::TenK,
13991            volume_weight: 0.5,
13992            fiscal_year_variant: "K4".to_string(),
13993        });
13994
13995        let phase_config = PhaseConfig {
13996            generate_master_data: true,
13997            generate_document_flows: false,
13998            generate_journal_entries: true,
13999            inject_anomalies: false,
14000            show_progress: false,
14001            vendors_per_company: 5,
14002            customers_per_company: 5,
14003            materials_per_company: 5,
14004            assets_per_company: 5,
14005            employees_per_company: 5,
14006            ..Default::default()
14007        };
14008
14009        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14010        let result = orchestrator.generate().unwrap();
14011
14012        // Should have master data for both companies
14013        assert!(result.statistics.vendor_count >= 10); // 5 per company
14014        assert!(result.statistics.customer_count >= 10);
14015        assert!(result.statistics.companies_count == 2);
14016    }
14017
14018    #[test]
14019    fn test_empty_master_data_skips_document_flows() {
14020        let config = create_test_config();
14021        let phase_config = PhaseConfig {
14022            generate_master_data: false,   // Skip master data
14023            generate_document_flows: true, // Try to generate flows
14024            generate_journal_entries: false,
14025            inject_anomalies: false,
14026            show_progress: false,
14027            ..Default::default()
14028        };
14029
14030        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14031        let result = orchestrator.generate().unwrap();
14032
14033        // Without master data, document flows should be empty
14034        assert!(result.document_flows.p2p_chains.is_empty());
14035        assert!(result.document_flows.o2c_chains.is_empty());
14036    }
14037
14038    #[test]
14039    fn test_journal_entry_line_item_count() {
14040        let config = create_test_config();
14041        let phase_config = PhaseConfig {
14042            generate_master_data: false,
14043            generate_document_flows: false,
14044            generate_journal_entries: true,
14045            inject_anomalies: false,
14046            show_progress: false,
14047            ..Default::default()
14048        };
14049
14050        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14051        let result = orchestrator.generate().unwrap();
14052
14053        // Total line items should match sum of all entry line counts
14054        let calculated_line_items: u64 = result
14055            .journal_entries
14056            .iter()
14057            .map(|e| e.line_count() as u64)
14058            .sum();
14059        assert_eq!(result.statistics.total_line_items, calculated_line_items);
14060    }
14061
14062    #[test]
14063    fn test_audit_generation() {
14064        let config = create_test_config();
14065        let phase_config = PhaseConfig {
14066            generate_master_data: false,
14067            generate_document_flows: false,
14068            generate_journal_entries: true,
14069            inject_anomalies: false,
14070            show_progress: false,
14071            generate_audit: true,
14072            audit_engagements: 2,
14073            workpapers_per_engagement: 5,
14074            evidence_per_workpaper: 2,
14075            risks_per_engagement: 3,
14076            findings_per_engagement: 2,
14077            judgments_per_engagement: 2,
14078            ..Default::default()
14079        };
14080
14081        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14082        let result = orchestrator.generate().unwrap();
14083
14084        // Should have generated audit data
14085        assert_eq!(result.audit.engagements.len(), 2);
14086        assert!(!result.audit.workpapers.is_empty());
14087        assert!(!result.audit.evidence.is_empty());
14088        assert!(!result.audit.risk_assessments.is_empty());
14089        assert!(!result.audit.findings.is_empty());
14090        assert!(!result.audit.judgments.is_empty());
14091
14092        // New ISA entity collections should also be populated
14093        assert!(
14094            !result.audit.confirmations.is_empty(),
14095            "ISA 505 confirmations should be generated"
14096        );
14097        assert!(
14098            !result.audit.confirmation_responses.is_empty(),
14099            "ISA 505 confirmation responses should be generated"
14100        );
14101        assert!(
14102            !result.audit.procedure_steps.is_empty(),
14103            "ISA 330 procedure steps should be generated"
14104        );
14105        // Samples may or may not be generated depending on workpaper sampling methods
14106        assert!(
14107            !result.audit.analytical_results.is_empty(),
14108            "ISA 520 analytical procedures should be generated"
14109        );
14110        assert!(
14111            !result.audit.ia_functions.is_empty(),
14112            "ISA 610 IA functions should be generated (one per engagement)"
14113        );
14114        assert!(
14115            !result.audit.related_parties.is_empty(),
14116            "ISA 550 related parties should be generated"
14117        );
14118
14119        // Statistics should match
14120        assert_eq!(
14121            result.statistics.audit_engagement_count,
14122            result.audit.engagements.len()
14123        );
14124        assert_eq!(
14125            result.statistics.audit_workpaper_count,
14126            result.audit.workpapers.len()
14127        );
14128        assert_eq!(
14129            result.statistics.audit_evidence_count,
14130            result.audit.evidence.len()
14131        );
14132        assert_eq!(
14133            result.statistics.audit_risk_count,
14134            result.audit.risk_assessments.len()
14135        );
14136        assert_eq!(
14137            result.statistics.audit_finding_count,
14138            result.audit.findings.len()
14139        );
14140        assert_eq!(
14141            result.statistics.audit_judgment_count,
14142            result.audit.judgments.len()
14143        );
14144        assert_eq!(
14145            result.statistics.audit_confirmation_count,
14146            result.audit.confirmations.len()
14147        );
14148        assert_eq!(
14149            result.statistics.audit_confirmation_response_count,
14150            result.audit.confirmation_responses.len()
14151        );
14152        assert_eq!(
14153            result.statistics.audit_procedure_step_count,
14154            result.audit.procedure_steps.len()
14155        );
14156        assert_eq!(
14157            result.statistics.audit_sample_count,
14158            result.audit.samples.len()
14159        );
14160        assert_eq!(
14161            result.statistics.audit_analytical_result_count,
14162            result.audit.analytical_results.len()
14163        );
14164        assert_eq!(
14165            result.statistics.audit_ia_function_count,
14166            result.audit.ia_functions.len()
14167        );
14168        assert_eq!(
14169            result.statistics.audit_ia_report_count,
14170            result.audit.ia_reports.len()
14171        );
14172        assert_eq!(
14173            result.statistics.audit_related_party_count,
14174            result.audit.related_parties.len()
14175        );
14176        assert_eq!(
14177            result.statistics.audit_related_party_transaction_count,
14178            result.audit.related_party_transactions.len()
14179        );
14180    }
14181
14182    #[test]
14183    fn test_new_phases_disabled_by_default() {
14184        let config = create_test_config();
14185        // Verify new config fields default to disabled
14186        assert!(!config.llm.enabled);
14187        assert!(!config.diffusion.enabled);
14188        assert!(!config.causal.enabled);
14189
14190        let phase_config = PhaseConfig {
14191            generate_master_data: false,
14192            generate_document_flows: false,
14193            generate_journal_entries: true,
14194            inject_anomalies: false,
14195            show_progress: false,
14196            ..Default::default()
14197        };
14198
14199        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14200        let result = orchestrator.generate().unwrap();
14201
14202        // All new phase statistics should be zero when disabled
14203        assert_eq!(result.statistics.llm_enrichment_ms, 0);
14204        assert_eq!(result.statistics.llm_vendors_enriched, 0);
14205        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
14206        assert_eq!(result.statistics.diffusion_samples_generated, 0);
14207        assert_eq!(result.statistics.causal_generation_ms, 0);
14208        assert_eq!(result.statistics.causal_samples_generated, 0);
14209        assert!(result.statistics.causal_validation_passed.is_none());
14210        assert_eq!(result.statistics.counterfactual_pair_count, 0);
14211        assert!(result.counterfactual_pairs.is_empty());
14212    }
14213
14214    #[test]
14215    fn test_counterfactual_generation_enabled() {
14216        let config = create_test_config();
14217        let phase_config = PhaseConfig {
14218            generate_master_data: false,
14219            generate_document_flows: false,
14220            generate_journal_entries: true,
14221            inject_anomalies: false,
14222            show_progress: false,
14223            generate_counterfactuals: true,
14224            generate_period_close: false, // Disable so entry count matches counterfactual pairs
14225            ..Default::default()
14226        };
14227
14228        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14229        let result = orchestrator.generate().unwrap();
14230
14231        // With JE generation enabled, counterfactual pairs should be generated
14232        if !result.journal_entries.is_empty() {
14233            assert_eq!(
14234                result.counterfactual_pairs.len(),
14235                result.journal_entries.len()
14236            );
14237            assert_eq!(
14238                result.statistics.counterfactual_pair_count,
14239                result.journal_entries.len()
14240            );
14241            // Each pair should have a distinct pair_id
14242            let ids: std::collections::HashSet<_> = result
14243                .counterfactual_pairs
14244                .iter()
14245                .map(|p| p.pair_id.clone())
14246                .collect();
14247            assert_eq!(ids.len(), result.counterfactual_pairs.len());
14248        }
14249    }
14250
14251    #[test]
14252    fn test_llm_enrichment_enabled() {
14253        let mut config = create_test_config();
14254        config.llm.enabled = true;
14255        config.llm.max_vendor_enrichments = 3;
14256
14257        let phase_config = PhaseConfig {
14258            generate_master_data: true,
14259            generate_document_flows: false,
14260            generate_journal_entries: false,
14261            inject_anomalies: false,
14262            show_progress: false,
14263            vendors_per_company: 5,
14264            customers_per_company: 3,
14265            materials_per_company: 3,
14266            assets_per_company: 3,
14267            employees_per_company: 3,
14268            ..Default::default()
14269        };
14270
14271        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14272        let result = orchestrator.generate().unwrap();
14273
14274        // LLM enrichment should have run
14275        assert!(result.statistics.llm_vendors_enriched > 0);
14276        assert!(result.statistics.llm_vendors_enriched <= 3);
14277    }
14278
14279    #[test]
14280    fn test_diffusion_enhancement_enabled() {
14281        let mut config = create_test_config();
14282        config.diffusion.enabled = true;
14283        config.diffusion.n_steps = 50;
14284        config.diffusion.sample_size = 20;
14285
14286        let phase_config = PhaseConfig {
14287            generate_master_data: false,
14288            generate_document_flows: false,
14289            generate_journal_entries: true,
14290            inject_anomalies: false,
14291            show_progress: false,
14292            ..Default::default()
14293        };
14294
14295        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14296        let result = orchestrator.generate().unwrap();
14297
14298        // Diffusion phase should have generated samples
14299        assert_eq!(result.statistics.diffusion_samples_generated, 20);
14300    }
14301
14302    #[test]
14303    fn test_causal_overlay_enabled() {
14304        let mut config = create_test_config();
14305        config.causal.enabled = true;
14306        config.causal.template = "fraud_detection".to_string();
14307        config.causal.sample_size = 100;
14308        config.causal.validate = true;
14309
14310        let phase_config = PhaseConfig {
14311            generate_master_data: false,
14312            generate_document_flows: false,
14313            generate_journal_entries: true,
14314            inject_anomalies: false,
14315            show_progress: false,
14316            ..Default::default()
14317        };
14318
14319        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14320        let result = orchestrator.generate().unwrap();
14321
14322        // Causal phase should have generated samples
14323        assert_eq!(result.statistics.causal_samples_generated, 100);
14324        // Validation should have run
14325        assert!(result.statistics.causal_validation_passed.is_some());
14326    }
14327
14328    #[test]
14329    fn test_causal_overlay_revenue_cycle_template() {
14330        let mut config = create_test_config();
14331        config.causal.enabled = true;
14332        config.causal.template = "revenue_cycle".to_string();
14333        config.causal.sample_size = 50;
14334        config.causal.validate = false;
14335
14336        let phase_config = PhaseConfig {
14337            generate_master_data: false,
14338            generate_document_flows: false,
14339            generate_journal_entries: true,
14340            inject_anomalies: false,
14341            show_progress: false,
14342            ..Default::default()
14343        };
14344
14345        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14346        let result = orchestrator.generate().unwrap();
14347
14348        // Causal phase should have generated samples
14349        assert_eq!(result.statistics.causal_samples_generated, 50);
14350        // Validation was disabled
14351        assert!(result.statistics.causal_validation_passed.is_none());
14352    }
14353
14354    #[test]
14355    fn test_all_new_phases_enabled_together() {
14356        let mut config = create_test_config();
14357        config.llm.enabled = true;
14358        config.llm.max_vendor_enrichments = 2;
14359        config.diffusion.enabled = true;
14360        config.diffusion.n_steps = 20;
14361        config.diffusion.sample_size = 10;
14362        config.causal.enabled = true;
14363        config.causal.sample_size = 50;
14364        config.causal.validate = true;
14365
14366        let phase_config = PhaseConfig {
14367            generate_master_data: true,
14368            generate_document_flows: false,
14369            generate_journal_entries: true,
14370            inject_anomalies: false,
14371            show_progress: false,
14372            vendors_per_company: 5,
14373            customers_per_company: 3,
14374            materials_per_company: 3,
14375            assets_per_company: 3,
14376            employees_per_company: 3,
14377            ..Default::default()
14378        };
14379
14380        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
14381        let result = orchestrator.generate().unwrap();
14382
14383        // All three phases should have run
14384        assert!(result.statistics.llm_vendors_enriched > 0);
14385        assert_eq!(result.statistics.diffusion_samples_generated, 10);
14386        assert_eq!(result.statistics.causal_samples_generated, 50);
14387        assert!(result.statistics.causal_validation_passed.is_some());
14388    }
14389
14390    #[test]
14391    fn test_statistics_serialization_with_new_fields() {
14392        let stats = EnhancedGenerationStatistics {
14393            total_entries: 100,
14394            total_line_items: 500,
14395            llm_enrichment_ms: 42,
14396            llm_vendors_enriched: 10,
14397            diffusion_enhancement_ms: 100,
14398            diffusion_samples_generated: 50,
14399            causal_generation_ms: 200,
14400            causal_samples_generated: 100,
14401            causal_validation_passed: Some(true),
14402            ..Default::default()
14403        };
14404
14405        let json = serde_json::to_string(&stats).unwrap();
14406        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
14407
14408        assert_eq!(deserialized.llm_enrichment_ms, 42);
14409        assert_eq!(deserialized.llm_vendors_enriched, 10);
14410        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
14411        assert_eq!(deserialized.diffusion_samples_generated, 50);
14412        assert_eq!(deserialized.causal_generation_ms, 200);
14413        assert_eq!(deserialized.causal_samples_generated, 100);
14414        assert_eq!(deserialized.causal_validation_passed, Some(true));
14415    }
14416
14417    #[test]
14418    fn test_statistics_backward_compat_deserialization() {
14419        // Old JSON without the new fields should still deserialize
14420        let old_json = r#"{
14421            "total_entries": 100,
14422            "total_line_items": 500,
14423            "accounts_count": 50,
14424            "companies_count": 1,
14425            "period_months": 12,
14426            "vendor_count": 10,
14427            "customer_count": 20,
14428            "material_count": 15,
14429            "asset_count": 5,
14430            "employee_count": 8,
14431            "p2p_chain_count": 5,
14432            "o2c_chain_count": 5,
14433            "ap_invoice_count": 5,
14434            "ar_invoice_count": 5,
14435            "ocpm_event_count": 0,
14436            "ocpm_object_count": 0,
14437            "ocpm_case_count": 0,
14438            "audit_engagement_count": 0,
14439            "audit_workpaper_count": 0,
14440            "audit_evidence_count": 0,
14441            "audit_risk_count": 0,
14442            "audit_finding_count": 0,
14443            "audit_judgment_count": 0,
14444            "anomalies_injected": 0,
14445            "data_quality_issues": 0,
14446            "banking_customer_count": 0,
14447            "banking_account_count": 0,
14448            "banking_transaction_count": 0,
14449            "banking_suspicious_count": 0,
14450            "graph_export_count": 0,
14451            "graph_node_count": 0,
14452            "graph_edge_count": 0
14453        }"#;
14454
14455        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
14456
14457        // New fields should default to 0 / None
14458        assert_eq!(stats.llm_enrichment_ms, 0);
14459        assert_eq!(stats.llm_vendors_enriched, 0);
14460        assert_eq!(stats.diffusion_enhancement_ms, 0);
14461        assert_eq!(stats.diffusion_samples_generated, 0);
14462        assert_eq!(stats.causal_generation_ms, 0);
14463        assert_eq!(stats.causal_samples_generated, 0);
14464        assert!(stats.causal_validation_passed.is_none());
14465    }
14466}