Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Validate that every `gl_account` referenced in generated JEs exists
291    /// in the chart of accounts. Off by default (a soft warning is emitted
292    /// instead). Set true to fail the run on any orphan account.
293    pub validate_coa_coverage_strict: bool,
294    /// Show progress bars.
295    pub show_progress: bool,
296    /// Number of vendors to generate per company.
297    pub vendors_per_company: usize,
298    /// Number of customers to generate per company.
299    pub customers_per_company: usize,
300    /// Number of materials to generate per company.
301    pub materials_per_company: usize,
302    /// Number of assets to generate per company.
303    pub assets_per_company: usize,
304    /// Number of employees to generate per company.
305    pub employees_per_company: usize,
306    /// Number of P2P chains to generate.
307    pub p2p_chains: usize,
308    /// Number of O2C chains to generate.
309    pub o2c_chains: usize,
310    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
311    pub generate_audit: bool,
312    /// Number of audit engagements to generate.
313    pub audit_engagements: usize,
314    /// Number of workpapers per engagement.
315    pub workpapers_per_engagement: usize,
316    /// Number of evidence items per workpaper.
317    pub evidence_per_workpaper: usize,
318    /// Number of risk assessments per engagement.
319    pub risks_per_engagement: usize,
320    /// Number of findings per engagement.
321    pub findings_per_engagement: usize,
322    /// Number of professional judgments per engagement.
323    pub judgments_per_engagement: usize,
324    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
325    pub generate_banking: bool,
326    /// Generate graph exports (accounting network for ML training).
327    pub generate_graph_export: bool,
328    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
329    pub generate_sourcing: bool,
330    /// Generate bank reconciliations from payments.
331    pub generate_bank_reconciliation: bool,
332    /// Generate financial statements from trial balances.
333    pub generate_financial_statements: bool,
334    /// Generate accounting standards data (revenue recognition, impairment).
335    pub generate_accounting_standards: bool,
336    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
337    pub generate_manufacturing: bool,
338    /// Generate sales quotes, management KPIs, and budgets.
339    pub generate_sales_kpi_budgets: bool,
340    /// Generate tax jurisdictions and tax codes.
341    pub generate_tax: bool,
342    /// Generate ESG data (emissions, energy, water, waste, social, governance).
343    pub generate_esg: bool,
344    /// Generate intercompany transactions and eliminations.
345    pub generate_intercompany: bool,
346    /// Generate process evolution and organizational events.
347    pub generate_evolution_events: bool,
348    /// Generate counterfactual (original, mutated) JE pairs for ML training.
349    pub generate_counterfactuals: bool,
350    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
351    pub generate_compliance_regulations: bool,
352    /// Generate period-close journal entries (tax provision, income statement close).
353    pub generate_period_close: bool,
354    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
355    pub generate_hr: bool,
356    /// Generate treasury data (cash management, hedging, debt, pooling).
357    pub generate_treasury: bool,
358    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
359    pub generate_project_accounting: bool,
360    /// v3.3.0: generate legal documents per engagement (engagement letters,
361    /// management rep letters, legal opinions, regulatory filings,
362    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
363    pub generate_legal_documents: bool,
364    /// v3.3.0: generate IT general controls (access logs, change
365    /// management records) per audit engagement. Gated by
366    /// `audit.it_controls.enabled`.
367    pub generate_it_controls: bool,
368    /// v3.3.0: run the analytics-metadata phase after all JE-adding
369    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
370    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
371    /// top-level `analytics_metadata.enabled` config flag.
372    pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376    fn default() -> Self {
377        Self {
378            generate_master_data: true,
379            generate_document_flows: true,
380            generate_ocpm_events: false, // Off by default
381            generate_journal_entries: true,
382            inject_anomalies: false,
383            inject_data_quality: false, // Off by default (to preserve clean test data)
384            validate_balances: true,
385            validate_coa_coverage_strict: false,
386            show_progress: true,
387            vendors_per_company: 50,
388            customers_per_company: 100,
389            materials_per_company: 200,
390            assets_per_company: 50,
391            employees_per_company: 100,
392            p2p_chains: 100,
393            o2c_chains: 100,
394            generate_audit: false, // Off by default
395            audit_engagements: 5,
396            workpapers_per_engagement: 20,
397            evidence_per_workpaper: 5,
398            risks_per_engagement: 15,
399            findings_per_engagement: 8,
400            judgments_per_engagement: 10,
401            generate_banking: false,                // Off by default
402            generate_graph_export: false,           // Off by default
403            generate_sourcing: false,               // Off by default
404            generate_bank_reconciliation: false,    // Off by default
405            generate_financial_statements: false,   // Off by default
406            generate_accounting_standards: false,   // Off by default
407            generate_manufacturing: false,          // Off by default
408            generate_sales_kpi_budgets: false,      // Off by default
409            generate_tax: false,                    // Off by default
410            generate_esg: false,                    // Off by default
411            generate_intercompany: false,           // Off by default
412            generate_evolution_events: true,        // On by default
413            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
414            generate_compliance_regulations: false, // Off by default
415            generate_period_close: true,            // On by default
416            generate_hr: false,                     // Off by default
417            generate_treasury: false,               // Off by default
418            generate_project_accounting: false,     // Off by default
419            generate_legal_documents: false,        // v3.3.0 — off by default
420            generate_it_controls: false,            // v3.3.0 — off by default
421            generate_analytics_metadata: false,     // v3.3.0 — off by default
422        }
423    }
424}
425
426impl PhaseConfig {
427    /// Derive phase flags from [`GeneratorConfig`].
428    ///
429    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
430    /// CLI flags can override individual fields after calling this method.
431    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432        Self {
433            // Always-on phases
434            generate_master_data: true,
435            generate_document_flows: true,
436            generate_journal_entries: true,
437            validate_balances: true,
438            validate_coa_coverage_strict: false,
439            generate_period_close: true,
440            generate_evolution_events: true,
441            show_progress: true,
442
443            // Feature-gated phases — derived from config sections
444            generate_audit: cfg.audit.enabled,
445            generate_banking: cfg.banking.enabled,
446            generate_graph_export: cfg.graph_export.enabled,
447            generate_sourcing: cfg.source_to_pay.enabled,
448            generate_intercompany: cfg.intercompany.enabled,
449            generate_financial_statements: cfg.financial_reporting.enabled,
450            generate_bank_reconciliation: cfg.financial_reporting.enabled,
451            generate_accounting_standards: cfg.accounting_standards.enabled,
452            generate_manufacturing: cfg.manufacturing.enabled,
453            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454            generate_tax: cfg.tax.enabled,
455            generate_esg: cfg.esg.enabled,
456            generate_ocpm_events: cfg.ocpm.enabled,
457            generate_compliance_regulations: cfg.compliance_regulations.enabled,
458            generate_hr: cfg.hr.enabled,
459            generate_treasury: cfg.treasury.enabled,
460            generate_project_accounting: cfg.project_accounting.enabled,
461
462            // v3.3.0: L1 generator wiring
463            // Legal documents emitted when compliance_regulations is enabled
464            // and the nested legal_documents.enabled flag is set.
465            generate_legal_documents: cfg.compliance_regulations.enabled
466                && cfg.compliance_regulations.legal_documents.enabled,
467            // IT general controls emitted when audit is enabled and the
468            // nested it_controls.enabled flag is set.
469            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470            // Analytics metadata phase (prior-year, industry benchmarks,
471            // management reports, drift events).
472            generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
475            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478            inject_data_quality: cfg.data_quality.enabled,
479
480            // Count defaults (CLI can override after calling this method)
481            vendors_per_company: 50,
482            customers_per_company: 100,
483            materials_per_company: 200,
484            assets_per_company: 50,
485            employees_per_company: 100,
486            p2p_chains: 100,
487            o2c_chains: 100,
488            audit_engagements: 5,
489            workpapers_per_engagement: 20,
490            evidence_per_workpaper: 5,
491            risks_per_engagement: 15,
492            findings_per_engagement: 8,
493            judgments_per_engagement: 10,
494        }
495    }
496}
497
498/// Master data snapshot containing all generated entities.
499#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501    /// Generated vendors.
502    pub vendors: Vec<Vendor>,
503    /// Generated customers.
504    pub customers: Vec<Customer>,
505    /// Generated materials.
506    pub materials: Vec<Material>,
507    /// Generated fixed assets.
508    pub assets: Vec<FixedAsset>,
509    /// Generated employees.
510    pub employees: Vec<Employee>,
511    /// Generated cost center hierarchy (two-level: departments + sub-departments).
512    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513    /// v5.1: Generated profit centre hierarchy (two-level: top-level
514    /// segment / region / product-group nodes + sub-units).  Emits to
515    /// SAP CEPC alongside `cost_centers` → CSKS.
516    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
518    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519    /// v3.3.0+: organizational profiles (one per company) with
520    /// industry / geography / structure / complexity metadata. Emitted
521    /// alongside master data when `generate_master_data = true`.
522    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525/// Info about a completed hypergraph export.
526#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528    /// Number of nodes exported.
529    pub node_count: usize,
530    /// Number of pairwise edges exported.
531    pub edge_count: usize,
532    /// Number of hyperedges exported.
533    pub hyperedge_count: usize,
534    /// Output directory path.
535    pub output_path: PathBuf,
536}
537
538/// Document flow snapshot containing all generated document chains.
539#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541    /// P2P document chains.
542    pub p2p_chains: Vec<P2PDocumentChain>,
543    /// O2C document chains.
544    pub o2c_chains: Vec<O2CDocumentChain>,
545    /// All purchase orders (flattened).
546    pub purchase_orders: Vec<documents::PurchaseOrder>,
547    /// All goods receipts (flattened).
548    pub goods_receipts: Vec<documents::GoodsReceipt>,
549    /// All vendor invoices (flattened).
550    pub vendor_invoices: Vec<documents::VendorInvoice>,
551    /// All sales orders (flattened).
552    pub sales_orders: Vec<documents::SalesOrder>,
553    /// All deliveries (flattened).
554    pub deliveries: Vec<documents::Delivery>,
555    /// All customer invoices (flattened).
556    pub customer_invoices: Vec<documents::CustomerInvoice>,
557    /// All payments (flattened).
558    pub payments: Vec<documents::Payment>,
559    /// Cross-document references collected from all document headers
560    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
561    pub document_references: Vec<documents::DocumentReference>,
562}
563
564/// Subledger snapshot containing generated subledger records.
565#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567    /// AP invoices linked from document flow vendor invoices.
568    pub ap_invoices: Vec<APInvoice>,
569    /// AR invoices linked from document flow customer invoices.
570    pub ar_invoices: Vec<ARInvoice>,
571    /// FA subledger records (asset acquisitions from FA generator).
572    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573    /// Inventory positions from inventory generator.
574    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575    /// Inventory movements from inventory generator.
576    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577    /// AR aging reports, one per company, computed after payment settlement.
578    pub ar_aging_reports: Vec<ARAgingReport>,
579    /// AP aging reports, one per company, computed after payment settlement.
580    pub ap_aging_reports: Vec<APAgingReport>,
581    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
582    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
584    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585    /// Dunning runs executed after AR aging (one per company per dunning cycle).
586    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587    /// Dunning letters generated across all dunning runs.
588    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591/// OCPM snapshot containing generated OCPM event log data.
592#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594    /// OCPM event log (if generated)
595    pub event_log: Option<OcpmEventLog>,
596    /// Number of events generated
597    pub event_count: usize,
598    /// Number of objects generated
599    pub object_count: usize,
600    /// Number of cases generated
601    pub case_count: usize,
602}
603
604/// Audit data snapshot containing all generated audit-related entities.
605#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607    /// Audit engagements per ISA 210/220.
608    pub engagements: Vec<AuditEngagement>,
609    /// Workpapers per ISA 230.
610    pub workpapers: Vec<Workpaper>,
611    /// Audit evidence per ISA 500.
612    pub evidence: Vec<AuditEvidence>,
613    /// Risk assessments per ISA 315/330.
614    pub risk_assessments: Vec<RiskAssessment>,
615    /// Audit findings per ISA 265.
616    pub findings: Vec<AuditFinding>,
617    /// Professional judgments per ISA 200.
618    pub judgments: Vec<ProfessionalJudgment>,
619    /// External confirmations per ISA 505.
620    pub confirmations: Vec<ExternalConfirmation>,
621    /// Confirmation responses per ISA 505.
622    pub confirmation_responses: Vec<ConfirmationResponse>,
623    /// Audit procedure steps per ISA 330/530.
624    pub procedure_steps: Vec<AuditProcedureStep>,
625    /// Audit samples per ISA 530.
626    pub samples: Vec<AuditSample>,
627    /// Analytical procedure results per ISA 520.
628    pub analytical_results: Vec<AnalyticalProcedureResult>,
629    /// Internal audit functions per ISA 610.
630    pub ia_functions: Vec<InternalAuditFunction>,
631    /// Internal audit reports per ISA 610.
632    pub ia_reports: Vec<InternalAuditReport>,
633    /// Related parties per ISA 550.
634    pub related_parties: Vec<RelatedParty>,
635    /// Related party transactions per ISA 550.
636    pub related_party_transactions: Vec<RelatedPartyTransaction>,
637    // ---- ISA 600: Group Audits ----
638    /// Component auditors assigned by jurisdiction (ISA 600).
639    pub component_auditors: Vec<ComponentAuditor>,
640    /// Group audit plan with materiality allocations (ISA 600).
641    pub group_audit_plan: Option<GroupAuditPlan>,
642    /// Component instructions issued to component auditors (ISA 600).
643    pub component_instructions: Vec<ComponentInstruction>,
644    /// Reports received from component auditors (ISA 600).
645    pub component_reports: Vec<ComponentAuditorReport>,
646    // ---- ISA 210: Engagement Letters ----
647    /// Engagement letters per ISA 210.
648    pub engagement_letters: Vec<EngagementLetter>,
649    // ---- ISA 560 / IAS 10: Subsequent Events ----
650    /// Subsequent events per ISA 560 / IAS 10.
651    pub subsequent_events: Vec<SubsequentEvent>,
652    // ---- ISA 402: Service Organization Controls ----
653    /// Service organizations identified per ISA 402.
654    pub service_organizations: Vec<ServiceOrganization>,
655    /// SOC reports obtained per ISA 402.
656    pub soc_reports: Vec<SocReport>,
657    /// User entity controls documented per ISA 402.
658    pub user_entity_controls: Vec<UserEntityControl>,
659    // ---- ISA 570: Going Concern ----
660    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
661    pub going_concern_assessments:
662        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663    // ---- ISA 540: Accounting Estimates ----
664    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
665    pub accounting_estimates:
666        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667    // ---- ISA 700/701/705/706: Audit Opinions ----
668    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
669    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670    /// Key Audit Matters per ISA 701 (flattened across all opinions).
671    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672    // ---- SOX 302 / 404 ----
673    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
674    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675    /// SOX Section 404 ICFR assessments (one per entity per year).
676    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677    // ---- ISA 320: Materiality ----
678    /// Materiality calculations per entity per period (ISA 320).
679    pub materiality_calculations:
680        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681    // ---- ISA 315: Combined Risk Assessments ----
682    /// Combined Risk Assessments per account area / assertion (ISA 315).
683    pub combined_risk_assessments:
684        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685    // ---- ISA 530: Sampling Plans ----
686    /// Sampling plans per CRA at Moderate or higher (ISA 530).
687    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688    /// Individual sampled items (key items + representative items) per ISA 530.
689    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
691    /// Significant classes of transactions per ISA 315 (one set per entity).
692    pub significant_transaction_classes:
693        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694    // ---- ISA 520: Unusual Item Markers ----
695    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
696    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697    // ---- ISA 520: Analytical Relationships ----
698    /// Analytical relationships (ratios, trends, correlations) per entity.
699    pub analytical_relationships:
700        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701    // ---- PCAOB-ISA Cross-Reference ----
702    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
703    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704    // ---- ISA Standard Reference ----
705    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
706    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707    // ---- ISA 220 / ISA 300: Audit Scopes ----
708    /// Audit scope records (one per engagement) describing the audit boundary.
709    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710    // ---- FSM Event Trail ----
711    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
712    /// Contains the ordered sequence of state-transition and procedure-step events
713    /// generated by the audit FSM engine.
714    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715    // ---- v3.3.0: L1 generator wiring ----
716    /// Legal documents (engagement letters, management reps, legal
717    /// opinions, regulatory filings, board resolutions) per entity.
718    /// Emitted by `LegalDocumentGenerator` when
719    /// `compliance_regulations.legal_documents.enabled = true`.
720    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721    /// IT general controls — access logs (login/privileged action
722    /// audit trail). Emitted by `ItControlsGenerator` when
723    /// `audit.it_controls.enabled = true`.
724    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725    /// IT general controls — change management records (code deploys,
726    /// config changes, patches). Emitted by `ItControlsGenerator`.
727    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730/// Banking KYC/AML data snapshot containing all generated banking entities.
731#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733    /// Banking customers (retail, business, trust).
734    pub customers: Vec<BankingCustomer>,
735    /// Bank accounts.
736    pub accounts: Vec<BankAccount>,
737    /// Bank transactions with AML labels.
738    pub transactions: Vec<BankTransaction>,
739    /// Transaction-level AML labels with features.
740    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741    /// Customer-level AML labels.
742    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743    /// Account-level AML labels.
744    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745    /// Relationship-level AML labels.
746    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747    /// Case narratives for AML scenarios.
748    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749    /// Number of suspicious transactions.
750    pub suspicious_count: usize,
751    /// Number of AML scenarios generated.
752    pub scenario_count: usize,
753}
754
755/// Graph export snapshot containing exported graph metadata.
756#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758    /// Whether graph export was performed.
759    pub exported: bool,
760    /// Number of graphs exported.
761    pub graph_count: usize,
762    /// Exported graph metadata (by format name).
763    pub exports: HashMap<String, GraphExportInfo>,
764}
765
766/// Information about an exported graph.
767#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769    /// Graph name.
770    pub name: String,
771    /// Export format (pytorch_geometric, neo4j, dgl).
772    pub format: String,
773    /// Output directory path.
774    pub output_path: PathBuf,
775    /// Number of nodes.
776    pub node_count: usize,
777    /// Number of edges.
778    pub edge_count: usize,
779}
780
781/// S2C sourcing data snapshot.
782#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784    /// Spend analyses.
785    pub spend_analyses: Vec<SpendAnalysis>,
786    /// Sourcing projects.
787    pub sourcing_projects: Vec<SourcingProject>,
788    /// Supplier qualifications.
789    pub qualifications: Vec<SupplierQualification>,
790    /// RFx events (RFI, RFP, RFQ).
791    pub rfx_events: Vec<RfxEvent>,
792    /// Supplier bids.
793    pub bids: Vec<SupplierBid>,
794    /// Bid evaluations.
795    pub bid_evaluations: Vec<BidEvaluation>,
796    /// Procurement contracts.
797    pub contracts: Vec<ProcurementContract>,
798    /// Catalog items.
799    pub catalog_items: Vec<CatalogItem>,
800    /// Supplier scorecards.
801    pub scorecards: Vec<SupplierScorecard>,
802}
803
804/// A single period's trial balance with metadata.
805///
806/// Used as the orchestrator's in-memory representation while it
807/// builds per-period FS / CF artefacts.  At write time the runtime
808/// converts each `PeriodTrialBalance` to the canonical
809/// [`datasynth_core::models::balance::TrialBalance`] shape via
810/// [`PeriodTrialBalance::into_canonical`] so the on-disk
811/// `period_close/trial_balances.json` matches what the group
812/// aggregate phase loads — see
813/// [`crate::output_writer::write_outputs`].
814#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816    /// Fiscal year.
817    pub fiscal_year: u16,
818    /// Fiscal period (1-12).
819    pub fiscal_period: u8,
820    /// Period start date.
821    pub period_start: NaiveDate,
822    /// Period end date.
823    pub period_end: NaiveDate,
824    /// Trial balance entries for this period.
825    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829    /// Convert this in-memory period TB into the canonical
830    /// [`datasynth_core::models::balance::TrialBalance`] shape used
831    /// for the on-disk artefact.
832    ///
833    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
834    /// aggregate's `tb_loader` consumes the canonical type directly,
835    /// dropping the v5.0 dual-shape detection that converted from
836    /// `PeriodTrialBalance` JSON on the fly.
837    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838        let mut total_debits = Decimal::ZERO;
839        let mut total_credits = Decimal::ZERO;
840        let lines: Vec<TrialBalanceLine> = self
841            .entries
842            .into_iter()
843            .map(|e| {
844                total_debits += e.debit_balance;
845                total_credits += e.credit_balance;
846                let category = AccountCategory::from_account_code(&e.account_code);
847                TrialBalanceLine {
848                    account_code: e.account_code,
849                    account_description: e.account_name,
850                    category,
851                    account_type: AccountType::Asset,
852                    opening_balance: Decimal::ZERO,
853                    period_debits: e.debit_balance,
854                    period_credits: e.credit_balance,
855                    closing_balance: e.debit_balance - e.credit_balance,
856                    debit_balance: e.debit_balance,
857                    credit_balance: e.credit_balance,
858                    cost_center: None,
859                    profit_center: None,
860                }
861            })
862            .collect();
863        let imbalance = total_debits - total_credits;
864        let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865        TrialBalance {
866            trial_balance_id: format!(
867                "{company_code}-{:04}{:02}",
868                self.fiscal_year, self.fiscal_period
869            ),
870            company_code: company_code.to_string(),
871            company_name: None,
872            as_of_date: self.period_end,
873            fiscal_year: self.fiscal_year as i32,
874            fiscal_period: self.fiscal_period as u32,
875            currency: currency.to_string(),
876            balance_type: TrialBalanceType::Adjusted,
877            lines,
878            total_debits,
879            total_credits,
880            is_balanced,
881            out_of_balance: imbalance,
882            is_equation_valid: is_balanced,
883            equation_difference: imbalance,
884            category_summary: std::collections::HashMap::new(),
885            created_at: self
886                .period_start
887                .and_hms_opt(0, 0, 0)
888                .expect("midnight is a valid time"),
889            created_by: "ORCHESTRATOR".to_string(),
890            approved_by: None,
891            approved_at: None,
892            status: TrialBalanceStatus::Final,
893        }
894    }
895}
896
897/// Financial reporting snapshot (financial statements + bank reconciliations).
898#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900    /// Financial statements (balance sheet, income statement, cash flow).
901    /// For multi-entity configs this includes all standalone statements.
902    pub financial_statements: Vec<FinancialStatement>,
903    /// Standalone financial statements keyed by entity code.
904    /// Each entity has its own slice of statements.
905    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
907    pub consolidated_statements: Vec<FinancialStatement>,
908    /// Consolidation schedules (one per period) showing pre/post elimination detail.
909    pub consolidation_schedules: Vec<ConsolidationSchedule>,
910    /// Bank reconciliations.
911    pub bank_reconciliations: Vec<BankReconciliation>,
912    /// Period-close trial balances (one per period).
913    pub trial_balances: Vec<PeriodTrialBalance>,
914    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
915    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
917    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
919    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
923#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925    /// Payroll runs (actual data).
926    pub payroll_runs: Vec<PayrollRun>,
927    /// Payroll line items (actual data).
928    pub payroll_line_items: Vec<PayrollLineItem>,
929    /// Time entries (actual data).
930    pub time_entries: Vec<TimeEntry>,
931    /// Expense reports (actual data).
932    pub expense_reports: Vec<ExpenseReport>,
933    /// Benefit enrollments (actual data).
934    pub benefit_enrollments: Vec<BenefitEnrollment>,
935    /// Defined benefit pension plans (IAS 19 / ASC 715).
936    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937    /// Pension obligation (DBO) roll-forwards.
938    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939    /// Plan asset roll-forwards.
940    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941    /// Pension disclosures.
942    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943    /// Journal entries generated from pension expense and OCI remeasurements.
944    pub pension_journal_entries: Vec<JournalEntry>,
945    /// Stock grants (ASC 718 / IFRS 2).
946    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947    /// Stock-based compensation period expense records.
948    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949    /// Journal entries generated from stock-based compensation expense.
950    pub stock_comp_journal_entries: Vec<JournalEntry>,
951    /// Payroll runs.
952    pub payroll_run_count: usize,
953    /// Payroll line item count.
954    pub payroll_line_item_count: usize,
955    /// Time entry count.
956    pub time_entry_count: usize,
957    /// Expense report count.
958    pub expense_report_count: usize,
959    /// Benefit enrollment count.
960    pub benefit_enrollment_count: usize,
961    /// Pension plan count.
962    pub pension_plan_count: usize,
963    /// Stock grant count.
964    pub stock_grant_count: usize,
965}
966
967/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
968#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970    /// Revenue recognition contracts (actual data).
971    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972    /// Impairment tests (actual data).
973    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974    /// Business combinations (IFRS 3 / ASC 805).
975    pub business_combinations:
976        Vec<datasynth_core::models::business_combination::BusinessCombination>,
977    /// Journal entries generated from business combinations (Day 1 + amortization).
978    pub business_combination_journal_entries: Vec<JournalEntry>,
979    /// ECL models (IFRS 9 / ASC 326).
980    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981    /// ECL provision movements.
982    pub ecl_provision_movements:
983        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984    /// Journal entries from ECL provision.
985    pub ecl_journal_entries: Vec<JournalEntry>,
986    /// Provisions (IAS 37 / ASC 450).
987    pub provisions: Vec<datasynth_core::models::provision::Provision>,
988    /// Provision movement roll-forwards (IAS 37 / ASC 450).
989    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990    /// Contingent liabilities (IAS 37 / ASC 450).
991    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992    /// Journal entries from provisions.
993    pub provision_journal_entries: Vec<JournalEntry>,
994    /// IAS 21 functional currency translation results (one per entity per period).
995    pub currency_translation_results:
996        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997    /// Revenue recognition contract count.
998    pub revenue_contract_count: usize,
999    /// Impairment test count.
1000    pub impairment_test_count: usize,
1001    /// Business combination count.
1002    pub business_combination_count: usize,
1003    /// ECL model count.
1004    pub ecl_model_count: usize,
1005    /// Provision count.
1006    pub provision_count: usize,
1007    /// Currency translation result count (IAS 21).
1008    pub currency_translation_count: usize,
1009    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1010    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1011    /// ROU asset + lease liability details.
1012    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1014    pub fair_value_measurements:
1015        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016    /// Framework difference records (dual-reporting only).
1017    pub framework_differences:
1018        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019    /// Per-entity framework reconciliation (dual-reporting only).
1020    pub framework_reconciliations:
1021        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022    /// Counts for stats logging.
1023    pub lease_count: usize,
1024    pub fair_value_measurement_count: usize,
1025    pub framework_difference_count: usize,
1026}
1027
1028/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1029#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031    /// Flattened standard records for output.
1032    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033    /// Cross-reference records.
1034    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035    /// Jurisdiction profile records.
1036    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037    /// Generated audit procedures.
1038    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039    /// Generated compliance findings.
1040    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041    /// Generated regulatory filings.
1042    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043    /// Compliance graph (if graph integration enabled).
1044    pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1048#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050    /// Production orders (actual data).
1051    pub production_orders: Vec<ProductionOrder>,
1052    /// Quality inspections (actual data).
1053    pub quality_inspections: Vec<QualityInspection>,
1054    /// Cycle counts (actual data).
1055    pub cycle_counts: Vec<CycleCount>,
1056    /// BOM components (actual data).
1057    pub bom_components: Vec<BomComponent>,
1058    /// Inventory movements (actual data).
1059    pub inventory_movements: Vec<InventoryMovement>,
1060    /// Production order count.
1061    pub production_order_count: usize,
1062    /// Quality inspection count.
1063    pub quality_inspection_count: usize,
1064    /// Cycle count count.
1065    pub cycle_count_count: usize,
1066    /// BOM component count.
1067    pub bom_component_count: usize,
1068    /// Inventory movement count.
1069    pub inventory_movement_count: usize,
1070}
1071
1072/// Sales, KPI, and budget data snapshot.
1073#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075    /// Sales quotes (actual data).
1076    pub sales_quotes: Vec<SalesQuote>,
1077    /// Management KPIs (actual data).
1078    pub kpis: Vec<ManagementKpi>,
1079    /// Budgets (actual data).
1080    pub budgets: Vec<Budget>,
1081    /// Sales quote count.
1082    pub sales_quote_count: usize,
1083    /// Management KPI count.
1084    pub kpi_count: usize,
1085    /// Budget line count.
1086    pub budget_line_count: usize,
1087}
1088
1089/// Anomaly labels generated during injection.
1090#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092    /// All anomaly labels.
1093    pub labels: Vec<LabeledAnomaly>,
1094    /// Summary statistics.
1095    pub summary: Option<AnomalySummary>,
1096    /// Count by anomaly type.
1097    pub by_type: HashMap<String, usize>,
1098}
1099
1100/// Balance validation results from running balance tracker.
1101#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103    /// Whether validation was performed.
1104    pub validated: bool,
1105    /// Whether balance sheet equation is satisfied.
1106    pub is_balanced: bool,
1107    /// Number of entries processed.
1108    pub entries_processed: u64,
1109    /// Total debits across all entries.
1110    pub total_debits: rust_decimal::Decimal,
1111    /// Total credits across all entries.
1112    pub total_credits: rust_decimal::Decimal,
1113    /// Number of accounts tracked.
1114    pub accounts_tracked: usize,
1115    /// Number of companies tracked.
1116    pub companies_tracked: usize,
1117    /// Validation errors encountered.
1118    pub validation_errors: Vec<ValidationError>,
1119    /// Whether any unbalanced entries were found.
1120    pub has_unbalanced_entries: bool,
1121}
1122
1123/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1124#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126    /// Tax jurisdictions.
1127    pub jurisdictions: Vec<TaxJurisdiction>,
1128    /// Tax codes.
1129    pub codes: Vec<TaxCode>,
1130    /// Tax lines computed on documents.
1131    pub tax_lines: Vec<TaxLine>,
1132    /// Tax returns filed per period.
1133    pub tax_returns: Vec<TaxReturn>,
1134    /// Tax provisions.
1135    pub tax_provisions: Vec<TaxProvision>,
1136    /// Withholding tax records.
1137    pub withholding_records: Vec<WithholdingTaxRecord>,
1138    /// Tax anomaly labels.
1139    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140    /// Jurisdiction count.
1141    pub jurisdiction_count: usize,
1142    /// Code count.
1143    pub code_count: usize,
1144    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1145    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146    /// Journal entries posting tax payable/receivable from computed tax lines.
1147    pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1151#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153    /// Group ownership structure (parent/subsidiary/associate relationships).
1154    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155    /// IC matched pairs (transaction pairs between related entities).
1156    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157    /// IC journal entries generated from matched pairs (seller side).
1158    pub seller_journal_entries: Vec<JournalEntry>,
1159    /// IC journal entries generated from matched pairs (buyer side).
1160    pub buyer_journal_entries: Vec<JournalEntry>,
1161    /// Elimination entries for consolidation.
1162    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163    /// NCI measurements derived from group structure ownership percentages.
1164    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1166    #[serde(skip)]
1167    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168    /// IC matched pair count.
1169    pub matched_pair_count: usize,
1170    /// IC elimination entry count.
1171    pub elimination_entry_count: usize,
1172    /// IC matching rate (0.0 to 1.0).
1173    pub match_rate: f64,
1174}
1175
1176/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1177#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179    /// Emission records (scope 1, 2, 3).
1180    pub emissions: Vec<EmissionRecord>,
1181    /// Energy consumption records.
1182    pub energy: Vec<EnergyConsumption>,
1183    /// Water usage records.
1184    pub water: Vec<WaterUsage>,
1185    /// Waste records.
1186    pub waste: Vec<WasteRecord>,
1187    /// Workforce diversity metrics.
1188    pub diversity: Vec<WorkforceDiversityMetric>,
1189    /// Pay equity metrics.
1190    pub pay_equity: Vec<PayEquityMetric>,
1191    /// Safety incidents.
1192    pub safety_incidents: Vec<SafetyIncident>,
1193    /// Safety metrics.
1194    pub safety_metrics: Vec<SafetyMetric>,
1195    /// Governance metrics.
1196    pub governance: Vec<GovernanceMetric>,
1197    /// Supplier ESG assessments.
1198    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199    /// Materiality assessments.
1200    pub materiality: Vec<MaterialityAssessment>,
1201    /// ESG disclosures.
1202    pub disclosures: Vec<EsgDisclosure>,
1203    /// Climate scenarios.
1204    pub climate_scenarios: Vec<ClimateScenario>,
1205    /// ESG anomaly labels.
1206    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207    /// Total emission record count.
1208    pub emission_count: usize,
1209    /// Total disclosure count.
1210    pub disclosure_count: usize,
1211}
1212
1213/// Treasury data snapshot (cash management, hedging, debt, pooling).
1214#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216    /// Cash positions (daily balances per account).
1217    pub cash_positions: Vec<CashPosition>,
1218    /// Cash forecasts.
1219    pub cash_forecasts: Vec<CashForecast>,
1220    /// Cash pools.
1221    pub cash_pools: Vec<CashPool>,
1222    /// Cash pool sweep transactions.
1223    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224    /// Hedging instruments.
1225    pub hedging_instruments: Vec<HedgingInstrument>,
1226    /// Hedge relationships (ASC 815/IFRS 9 designations).
1227    pub hedge_relationships: Vec<HedgeRelationship>,
1228    /// Debt instruments.
1229    pub debt_instruments: Vec<DebtInstrument>,
1230    /// Bank guarantees and letters of credit.
1231    pub bank_guarantees: Vec<BankGuarantee>,
1232    /// Intercompany netting runs.
1233    pub netting_runs: Vec<NettingRun>,
1234    /// Treasury anomaly labels.
1235    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236    /// Journal entries generated from treasury instruments (debt interest accruals,
1237    /// hedge MTM, cash pool sweeps).
1238    pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1242#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244    /// Projects with WBS hierarchies.
1245    pub projects: Vec<Project>,
1246    /// Project cost lines (linked from source documents).
1247    pub cost_lines: Vec<ProjectCostLine>,
1248    /// Revenue recognition records.
1249    pub revenue_records: Vec<ProjectRevenue>,
1250    /// Earned value metrics.
1251    pub earned_value_metrics: Vec<EarnedValueMetric>,
1252    /// Change orders.
1253    pub change_orders: Vec<ChangeOrder>,
1254    /// Project milestones.
1255    pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258/// Complete result of enhanced generation run.
1259#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261    /// Generated chart of accounts.
1262    pub chart_of_accounts: ChartOfAccounts,
1263    /// Master data snapshot.
1264    pub master_data: MasterDataSnapshot,
1265    /// Document flow snapshot.
1266    pub document_flows: DocumentFlowSnapshot,
1267    /// Subledger snapshot (linked from document flows).
1268    pub subledger: SubledgerSnapshot,
1269    /// OCPM event log snapshot (if OCPM generation enabled).
1270    pub ocpm: OcpmSnapshot,
1271    /// Audit data snapshot (if audit generation enabled).
1272    pub audit: AuditSnapshot,
1273    /// Banking KYC/AML data snapshot (if banking generation enabled).
1274    pub banking: BankingSnapshot,
1275    /// Graph export snapshot (if graph export enabled).
1276    pub graph_export: GraphExportSnapshot,
1277    /// S2C sourcing data snapshot (if sourcing generation enabled).
1278    pub sourcing: SourcingSnapshot,
1279    /// Financial reporting snapshot (financial statements + bank reconciliations).
1280    pub financial_reporting: FinancialReportingSnapshot,
1281    /// HR data snapshot (payroll, time entries, expenses).
1282    pub hr: HrSnapshot,
1283    /// Accounting standards snapshot (revenue recognition, impairment).
1284    pub accounting_standards: AccountingStandardsSnapshot,
1285    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1286    pub manufacturing: ManufacturingSnapshot,
1287    /// Sales, KPI, and budget snapshot.
1288    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1290    pub tax: TaxSnapshot,
1291    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1292    pub esg: EsgSnapshot,
1293    /// Treasury data snapshot (cash management, hedging, debt).
1294    pub treasury: TreasurySnapshot,
1295    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1296    pub project_accounting: ProjectAccountingSnapshot,
1297    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1298    pub process_evolution: Vec<ProcessEvolutionEvent>,
1299    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1300    pub organizational_events: Vec<OrganizationalEvent>,
1301    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1302    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1304    pub intercompany: IntercompanySnapshot,
1305    /// Generated journal entries.
1306    pub journal_entries: Vec<JournalEntry>,
1307    /// Anomaly labels (if injection enabled).
1308    pub anomaly_labels: AnomalyLabels,
1309    /// Balance validation results (if validation enabled).
1310    pub balance_validation: BalanceValidationResult,
1311    /// Data quality statistics (if injection enabled).
1312    pub data_quality_stats: DataQualityStats,
1313    /// Data quality issue records (if injection enabled).
1314    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315    /// Generation statistics.
1316    pub statistics: EnhancedGenerationStatistics,
1317    /// Data lineage graph (if tracking enabled).
1318    pub lineage: Option<super::lineage::LineageGraph>,
1319    /// Quality gate evaluation result.
1320    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321    /// Internal controls (if controls generation enabled).
1322    pub internal_controls: Vec<InternalControl>,
1323    /// SoD (Segregation of Duties) violations identified during control application.
1324    ///
1325    /// Each record corresponds to a journal entry where `sod_violation == true`.
1326    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327    /// Opening balances (if opening balance generation enabled).
1328    pub opening_balances: Vec<GeneratedOpeningBalance>,
1329    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1330    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331    /// Counterfactual (original, mutated) JE pairs for ML training.
1332    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333    /// Fraud red-flag indicators on P2P/O2C documents.
1334    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335    /// Collusion rings (coordinated fraud networks).
1336    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337    /// Bi-temporal version chains for vendor entities.
1338    pub temporal_vendor_chains:
1339        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340    /// Entity relationship graph (nodes + edges with strength scores).
1341    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342    /// Cross-process links (P2P ↔ O2C via inventory movements).
1343    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344    /// Industry-specific GL accounts and metadata.
1345    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1347    pub compliance_regulations: ComplianceRegulationsSnapshot,
1348    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1349    /// industry benchmarks, management reports, drift events). Empty
1350    /// when `analytics_metadata.enabled = false`.
1351    pub analytics_metadata: AnalyticsMetadataSnapshot,
1352    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1353    /// KS) over the generated amount distribution.  `None` when
1354    /// `distributions.validation.enabled = false`.
1355    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1356    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1357    /// customer value-segment labels, and industry-specific metadata
1358    /// populated from the previously-inert `vendor_network`,
1359    /// `customer_segmentation`, and `industry_specific` schema
1360    /// sections. Empty when those sections are disabled.
1361    pub interconnectivity: InterconnectivitySnapshot,
1362}
1363
1364/// v4.1.3+: interconnectivity snapshot. Populated when
1365/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1366/// `industry_specific.enabled` are set. Holds tier / segment / industry
1367/// labels for generated entities so downstream tooling (graph export,
1368/// risk models) can consume them without re-deriving from scratch.
1369#[derive(Debug, Clone, Default)]
1370pub struct InterconnectivitySnapshot {
1371    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1372    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1373    pub vendor_tiers: Vec<(String, u8)>,
1374    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1375    /// `"reliable_strategic" / "standard_operational" / "transactional"
1376    /// / "problematic"`.
1377    pub vendor_clusters: Vec<(String, String)>,
1378    /// `(customer_id, value_segment)` pairs where value_segment is one
1379    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1380    pub customer_value_segments: Vec<(String, String)>,
1381    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1382    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1383    /// "churned" / "won_back"`.
1384    pub customer_lifecycle_stages: Vec<(String, String)>,
1385    /// Summary: industry-specific knob applied, if any (e.g.
1386    /// `"manufacturing.bom_depth=3"`).
1387    pub industry_metadata: Vec<String>,
1388}
1389
1390/// v3.3.0: snapshot for the analytics-metadata phase.
1391#[derive(Debug, Clone, Default)]
1392pub struct AnalyticsMetadataSnapshot {
1393    /// Prior-year comparative balances per account, per entity.
1394    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1395    /// Industry benchmarks for the configured industry.
1396    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1397    /// Management-report artefacts (dashboards, MDA sections).
1398    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1399    /// Drift-event labels emitted from the post-generation sweep.
1400    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1401}
1402
1403/// Enhanced statistics about a generation run.
1404#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1405pub struct EnhancedGenerationStatistics {
1406    /// Total journal entries generated.
1407    pub total_entries: u64,
1408    /// Total line items generated.
1409    pub total_line_items: u64,
1410    /// Number of accounts in CoA.
1411    pub accounts_count: usize,
1412    /// Number of companies.
1413    pub companies_count: usize,
1414    /// Period in months.
1415    pub period_months: u32,
1416    /// Master data counts.
1417    pub vendor_count: usize,
1418    pub customer_count: usize,
1419    pub material_count: usize,
1420    pub asset_count: usize,
1421    pub employee_count: usize,
1422    /// Document flow counts.
1423    pub p2p_chain_count: usize,
1424    pub o2c_chain_count: usize,
1425    /// Subledger counts.
1426    pub ap_invoice_count: usize,
1427    pub ar_invoice_count: usize,
1428    /// OCPM counts.
1429    pub ocpm_event_count: usize,
1430    pub ocpm_object_count: usize,
1431    pub ocpm_case_count: usize,
1432    /// Audit counts.
1433    pub audit_engagement_count: usize,
1434    pub audit_workpaper_count: usize,
1435    pub audit_evidence_count: usize,
1436    pub audit_risk_count: usize,
1437    pub audit_finding_count: usize,
1438    pub audit_judgment_count: usize,
1439    /// ISA 505 confirmation counts.
1440    #[serde(default)]
1441    pub audit_confirmation_count: usize,
1442    #[serde(default)]
1443    pub audit_confirmation_response_count: usize,
1444    /// ISA 330/530 procedure step and sample counts.
1445    #[serde(default)]
1446    pub audit_procedure_step_count: usize,
1447    #[serde(default)]
1448    pub audit_sample_count: usize,
1449    /// ISA 520 analytical procedure counts.
1450    #[serde(default)]
1451    pub audit_analytical_result_count: usize,
1452    /// ISA 610 internal audit counts.
1453    #[serde(default)]
1454    pub audit_ia_function_count: usize,
1455    #[serde(default)]
1456    pub audit_ia_report_count: usize,
1457    /// ISA 550 related party counts.
1458    #[serde(default)]
1459    pub audit_related_party_count: usize,
1460    #[serde(default)]
1461    pub audit_related_party_transaction_count: usize,
1462    /// Anomaly counts.
1463    pub anomalies_injected: usize,
1464    /// Data quality issue counts.
1465    pub data_quality_issues: usize,
1466    /// Banking counts.
1467    pub banking_customer_count: usize,
1468    pub banking_account_count: usize,
1469    pub banking_transaction_count: usize,
1470    pub banking_suspicious_count: usize,
1471    /// Graph export counts.
1472    pub graph_export_count: usize,
1473    pub graph_node_count: usize,
1474    pub graph_edge_count: usize,
1475    /// LLM enrichment timing (milliseconds).
1476    #[serde(default)]
1477    pub llm_enrichment_ms: u64,
1478    /// Number of vendor names enriched by LLM.
1479    #[serde(default)]
1480    pub llm_vendors_enriched: usize,
1481    /// v4.1.1+: number of customer names enriched by LLM.
1482    #[serde(default)]
1483    pub llm_customers_enriched: usize,
1484    /// v4.1.1+: number of material descriptions enriched by LLM.
1485    #[serde(default)]
1486    pub llm_materials_enriched: usize,
1487    /// v4.1.1+: number of audit finding titles enriched by LLM.
1488    #[serde(default)]
1489    pub llm_findings_enriched: usize,
1490    /// Diffusion enhancement timing (milliseconds).
1491    #[serde(default)]
1492    pub diffusion_enhancement_ms: u64,
1493    /// Number of diffusion samples generated.
1494    #[serde(default)]
1495    pub diffusion_samples_generated: usize,
1496    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1497    /// `None` when the neural/hybrid backend is not active.
1498    #[serde(default, skip_serializing_if = "Option::is_none")]
1499    pub neural_hybrid_weight: Option<f64>,
1500    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1501    #[serde(default, skip_serializing_if = "Option::is_none")]
1502    pub neural_hybrid_strategy: Option<String>,
1503    /// How many columns were routed through the neural backend.
1504    #[serde(default, skip_serializing_if = "Option::is_none")]
1505    pub neural_routed_column_count: Option<usize>,
1506    /// Causal generation timing (milliseconds).
1507    #[serde(default)]
1508    pub causal_generation_ms: u64,
1509    /// Number of causal samples generated.
1510    #[serde(default)]
1511    pub causal_samples_generated: usize,
1512    /// Whether causal validation passed.
1513    #[serde(default)]
1514    pub causal_validation_passed: Option<bool>,
1515    /// S2C sourcing counts.
1516    #[serde(default)]
1517    pub sourcing_project_count: usize,
1518    #[serde(default)]
1519    pub rfx_event_count: usize,
1520    #[serde(default)]
1521    pub bid_count: usize,
1522    #[serde(default)]
1523    pub contract_count: usize,
1524    #[serde(default)]
1525    pub catalog_item_count: usize,
1526    #[serde(default)]
1527    pub scorecard_count: usize,
1528    /// Financial reporting counts.
1529    #[serde(default)]
1530    pub financial_statement_count: usize,
1531    #[serde(default)]
1532    pub bank_reconciliation_count: usize,
1533    /// HR counts.
1534    #[serde(default)]
1535    pub payroll_run_count: usize,
1536    #[serde(default)]
1537    pub time_entry_count: usize,
1538    #[serde(default)]
1539    pub expense_report_count: usize,
1540    #[serde(default)]
1541    pub benefit_enrollment_count: usize,
1542    #[serde(default)]
1543    pub pension_plan_count: usize,
1544    #[serde(default)]
1545    pub stock_grant_count: usize,
1546    /// Accounting standards counts.
1547    #[serde(default)]
1548    pub revenue_contract_count: usize,
1549    #[serde(default)]
1550    pub impairment_test_count: usize,
1551    #[serde(default)]
1552    pub business_combination_count: usize,
1553    #[serde(default)]
1554    pub ecl_model_count: usize,
1555    #[serde(default)]
1556    pub provision_count: usize,
1557    /// Manufacturing counts.
1558    #[serde(default)]
1559    pub production_order_count: usize,
1560    #[serde(default)]
1561    pub quality_inspection_count: usize,
1562    #[serde(default)]
1563    pub cycle_count_count: usize,
1564    #[serde(default)]
1565    pub bom_component_count: usize,
1566    #[serde(default)]
1567    pub inventory_movement_count: usize,
1568    /// Sales & reporting counts.
1569    #[serde(default)]
1570    pub sales_quote_count: usize,
1571    #[serde(default)]
1572    pub kpi_count: usize,
1573    #[serde(default)]
1574    pub budget_line_count: usize,
1575    /// Tax counts.
1576    #[serde(default)]
1577    pub tax_jurisdiction_count: usize,
1578    #[serde(default)]
1579    pub tax_code_count: usize,
1580    /// ESG counts.
1581    #[serde(default)]
1582    pub esg_emission_count: usize,
1583    #[serde(default)]
1584    pub esg_disclosure_count: usize,
1585    /// Intercompany counts.
1586    #[serde(default)]
1587    pub ic_matched_pair_count: usize,
1588    #[serde(default)]
1589    pub ic_elimination_count: usize,
1590    /// Number of intercompany journal entries (seller + buyer side).
1591    #[serde(default)]
1592    pub ic_transaction_count: usize,
1593    /// Number of fixed asset subledger records.
1594    #[serde(default)]
1595    pub fa_subledger_count: usize,
1596    /// Number of inventory subledger records.
1597    #[serde(default)]
1598    pub inventory_subledger_count: usize,
1599    /// Treasury debt instrument count.
1600    #[serde(default)]
1601    pub treasury_debt_instrument_count: usize,
1602    /// Treasury hedging instrument count.
1603    #[serde(default)]
1604    pub treasury_hedging_instrument_count: usize,
1605    /// Project accounting project count.
1606    #[serde(default)]
1607    pub project_count: usize,
1608    /// Project accounting change order count.
1609    #[serde(default)]
1610    pub project_change_order_count: usize,
1611    /// Tax provision count.
1612    #[serde(default)]
1613    pub tax_provision_count: usize,
1614    /// Opening balance count.
1615    #[serde(default)]
1616    pub opening_balance_count: usize,
1617    /// Subledger reconciliation count.
1618    #[serde(default)]
1619    pub subledger_reconciliation_count: usize,
1620    /// Tax line count.
1621    #[serde(default)]
1622    pub tax_line_count: usize,
1623    /// Project cost line count.
1624    #[serde(default)]
1625    pub project_cost_line_count: usize,
1626    /// Cash position count.
1627    #[serde(default)]
1628    pub cash_position_count: usize,
1629    /// Cash forecast count.
1630    #[serde(default)]
1631    pub cash_forecast_count: usize,
1632    /// Cash pool count.
1633    #[serde(default)]
1634    pub cash_pool_count: usize,
1635    /// Process evolution event count.
1636    #[serde(default)]
1637    pub process_evolution_event_count: usize,
1638    /// Organizational event count.
1639    #[serde(default)]
1640    pub organizational_event_count: usize,
1641    /// Counterfactual pair count.
1642    #[serde(default)]
1643    pub counterfactual_pair_count: usize,
1644    /// Number of fraud red-flag indicators generated.
1645    #[serde(default)]
1646    pub red_flag_count: usize,
1647    /// Number of collusion rings generated.
1648    #[serde(default)]
1649    pub collusion_ring_count: usize,
1650    /// Number of bi-temporal vendor version chains generated.
1651    #[serde(default)]
1652    pub temporal_version_chain_count: usize,
1653    /// Number of nodes in the entity relationship graph.
1654    #[serde(default)]
1655    pub entity_relationship_node_count: usize,
1656    /// Number of edges in the entity relationship graph.
1657    #[serde(default)]
1658    pub entity_relationship_edge_count: usize,
1659    /// Number of cross-process links generated.
1660    #[serde(default)]
1661    pub cross_process_link_count: usize,
1662    /// Number of disruption events generated.
1663    #[serde(default)]
1664    pub disruption_event_count: usize,
1665    /// Number of industry-specific GL accounts generated.
1666    #[serde(default)]
1667    pub industry_gl_account_count: usize,
1668    /// Number of period-close journal entries generated (tax provision + closing entries).
1669    #[serde(default)]
1670    pub period_close_je_count: usize,
1671}
1672
1673/// Enhanced orchestrator with full feature integration.
1674pub struct EnhancedOrchestrator {
1675    config: GeneratorConfig,
1676    phase_config: PhaseConfig,
1677    coa: Option<Arc<ChartOfAccounts>>,
1678    master_data: MasterDataSnapshot,
1679    seed: u64,
1680    multi_progress: Option<MultiProgress>,
1681    /// Resource guard for memory, disk, and CPU monitoring
1682    resource_guard: ResourceGuard,
1683    /// Output path for disk space monitoring
1684    output_path: Option<PathBuf>,
1685    /// Copula generators for preserving correlations (from fingerprint)
1686    copula_generators: Vec<CopulaGeneratorSpec>,
1687    /// Country pack registry for localized data generation
1688    country_pack_registry: datasynth_core::CountryPackRegistry,
1689    /// Optional streaming sink for phase-by-phase output
1690    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1691    /// Shared template provider for user-supplied template packs.
1692    ///
1693    /// Constructed from `config.templates.path` at orchestrator creation
1694    /// time. When the path is `None`, this is still populated with an
1695    /// embedded-only provider so generators can always call trait methods
1696    /// without an `Option<…>` guard. v3.2.0+.
1697    template_provider: datasynth_core::templates::SharedTemplateProvider,
1698    /// v3.4.1+ temporal context for business-day / holiday awareness.
1699    ///
1700    /// Populated only when `temporal_patterns.business_days.enabled`. When
1701    /// `None`, document-flow / HR / treasury / period-close generators keep
1702    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1703    /// for the same seed).
1704    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1705    /// Optional shard-mode context (set by group-engine shard runners).
1706    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1707    shard_context: Option<crate::shard_context::ShardContext>,
1708}
1709
1710impl EnhancedOrchestrator {
1711    /// Create a new enhanced orchestrator.
1712    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1713        datasynth_config::validate_config(&config)?;
1714
1715        let seed = config.global.seed.unwrap_or_else(rand::random);
1716
1717        // Build resource guard from config
1718        let resource_guard = Self::build_resource_guard(&config, None);
1719
1720        // Build country pack registry from config
1721        let country_pack_registry = match &config.country_packs {
1722            Some(cp) => {
1723                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1724                    .map_err(|e| SynthError::config(e.to_string()))?
1725            }
1726            None => datasynth_core::CountryPackRegistry::builtin_only()
1727                .map_err(|e| SynthError::config(e.to_string()))?,
1728        };
1729
1730        // Build the shared template provider from config.templates.path.
1731        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1732        // `Some(path)` → load file/dir and honour `merge_strategy`.
1733        let template_provider = Self::build_template_provider(&config)?;
1734
1735        // v3.4.1: build a shared temporal context when
1736        // `temporal_patterns.business_days.enabled`. `None` preserves the
1737        // raw-RNG date-offset behaviour per-generator.
1738        let temporal_context = Self::build_temporal_context(&config)?;
1739
1740        Ok(Self {
1741            config,
1742            phase_config,
1743            coa: None,
1744            master_data: MasterDataSnapshot::default(),
1745            seed,
1746            multi_progress: None,
1747            resource_guard,
1748            output_path: None,
1749            copula_generators: Vec::new(),
1750            country_pack_registry,
1751            phase_sink: None,
1752            template_provider,
1753            temporal_context,
1754            shard_context: None,
1755        })
1756    }
1757
1758    /// Install shard-mode context.  Called by the group shard runner
1759    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1760    /// entry point).  Has no effect on single-entity runs.
1761    ///
1762    /// See [`crate::shard_context::ShardContext`] for rationale.
1763    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1764        self.shard_context = Some(ctx);
1765    }
1766
1767    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1768    ///
1769    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1770    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1771    /// enabled. Returns `Err` only for unrecoverable config errors.
1772    fn build_temporal_context(
1773        config: &GeneratorConfig,
1774    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1775        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1776
1777        let tp = &config.temporal_patterns;
1778        if !tp.enabled || !tp.business_days.enabled {
1779            return Ok(None);
1780        }
1781
1782        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1783            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1784        let end_date = start_date + chrono::Months::new(config.global.period_months);
1785
1786        let region_code = tp
1787            .calendars
1788            .regions
1789            .first()
1790            .cloned()
1791            .unwrap_or_else(|| "US".to_string());
1792        let region = parse_region_code(&region_code);
1793
1794        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1795    }
1796
1797    /// Build the shared template provider from `config.templates`.
1798    ///
1799    /// Always returns a provider — falls back to embedded-only when
1800    /// `config.templates.path` is `None`. The merge-strategy from config
1801    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1802    /// orchestrator-construction time are fatal (preferable to silently
1803    /// using embedded pools when the user supplied a bad path).
1804    fn build_template_provider(
1805        config: &GeneratorConfig,
1806    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1807        use datasynth_core::templates::{
1808            loader::{MergeStrategy, TemplateLoader},
1809            DefaultTemplateProvider,
1810        };
1811        use std::sync::Arc;
1812
1813        let provider = match &config.templates.path {
1814            None => DefaultTemplateProvider::new(),
1815            Some(path) => {
1816                let data = if path.is_dir() {
1817                    TemplateLoader::load_from_directory(path)
1818                } else {
1819                    TemplateLoader::load_from_file(path)
1820                }
1821                .map_err(|e| {
1822                    SynthError::config(format!(
1823                        "Failed to load templates from {}: {e}",
1824                        path.display()
1825                    ))
1826                })?;
1827                let strategy = match config.templates.merge_strategy {
1828                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1829                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1830                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1831                        MergeStrategy::MergePreferFile
1832                    }
1833                };
1834                DefaultTemplateProvider::with_templates(data, strategy)
1835            }
1836        };
1837        Ok(Arc::new(provider))
1838    }
1839
1840    /// Create with default phase config.
1841    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1842        Self::new(config, PhaseConfig::default())
1843    }
1844
1845    /// Set a streaming phase sink for real-time output (builder pattern).
1846    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1847        self.phase_sink = Some(sink);
1848        self
1849    }
1850
1851    /// Set a streaming phase sink on an existing orchestrator.
1852    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1853        self.phase_sink = Some(sink);
1854    }
1855
1856    /// Emit a batch of items to the phase sink (if configured).
1857    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1858        if let Some(ref sink) = self.phase_sink {
1859            for item in items {
1860                if let Ok(value) = serde_json::to_value(item) {
1861                    if let Err(e) = sink.emit(phase, type_name, &value) {
1862                        warn!(
1863                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1864                        );
1865                    }
1866                }
1867            }
1868            if let Err(e) = sink.phase_complete(phase) {
1869                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1870            }
1871        }
1872    }
1873
1874    /// Enable/disable progress bars.
1875    pub fn with_progress(mut self, show: bool) -> Self {
1876        self.phase_config.show_progress = show;
1877        if show {
1878            self.multi_progress = Some(MultiProgress::new());
1879        }
1880        self
1881    }
1882
1883    /// Set the output path for disk space monitoring.
1884    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1885        let path = path.into();
1886        self.output_path = Some(path.clone());
1887        // Rebuild resource guard with the output path
1888        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1889        self
1890    }
1891
1892    /// Access the country pack registry.
1893    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1894        &self.country_pack_registry
1895    }
1896
1897    /// Look up a country pack by country code string.
1898    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1899        self.country_pack_registry.get_by_str(country)
1900    }
1901
1902    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1903    /// company, defaulting to `"US"` if no companies are configured.
1904    fn primary_country_code(&self) -> &str {
1905        self.config
1906            .companies
1907            .first()
1908            .map(|c| c.country.as_str())
1909            .unwrap_or("US")
1910    }
1911
1912    /// Resolve the country pack for the primary (first) company.
1913    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1914        self.country_pack_for(self.primary_country_code())
1915    }
1916
1917    /// Resolve the CoA framework from config/country-pack.
1918    fn resolve_coa_framework(&self) -> CoAFramework {
1919        if self.config.accounting_standards.enabled {
1920            match self.config.accounting_standards.framework {
1921                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1922                    return CoAFramework::FrenchPcg;
1923                }
1924                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1925                    return CoAFramework::GermanSkr04;
1926                }
1927                _ => {}
1928            }
1929        }
1930        // Fallback: derive from country pack
1931        let pack = self.primary_pack();
1932        match pack.accounting.framework.as_str() {
1933            "french_gaap" => CoAFramework::FrenchPcg,
1934            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1935            _ => CoAFramework::UsGaap,
1936        }
1937    }
1938
1939    /// Check if copula generators are available.
1940    ///
1941    /// Returns true if the orchestrator has copula generators for preserving
1942    /// correlations (typically from fingerprint-based generation).
1943    pub fn has_copulas(&self) -> bool {
1944        !self.copula_generators.is_empty()
1945    }
1946
1947    /// Get the copula generators.
1948    ///
1949    /// Returns a reference to the copula generators for use during generation.
1950    /// These can be used to generate correlated samples that preserve the
1951    /// statistical relationships from the source data.
1952    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1953        &self.copula_generators
1954    }
1955
1956    /// Get a mutable reference to the copula generators.
1957    ///
1958    /// Allows generators to sample from copulas during data generation.
1959    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1960        &mut self.copula_generators
1961    }
1962
1963    /// Sample correlated values from a named copula.
1964    ///
1965    /// Returns None if the copula doesn't exist.
1966    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1967        self.copula_generators
1968            .iter_mut()
1969            .find(|c| c.name == copula_name)
1970            .map(|c| c.generator.sample())
1971    }
1972
1973    /// Create an orchestrator from a fingerprint file.
1974    ///
1975    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1976    /// and creates an orchestrator configured to generate data matching
1977    /// the statistical properties of the original data.
1978    ///
1979    /// # Arguments
1980    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1981    /// * `phase_config` - Phase configuration for generation
1982    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1983    ///
1984    /// # Example
1985    /// ```no_run
1986    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1987    /// use std::path::Path;
1988    ///
1989    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1990    ///     Path::new("fingerprint.dsf"),
1991    ///     PhaseConfig::default(),
1992    ///     1.0,
1993    /// ).unwrap();
1994    /// ```
1995    pub fn from_fingerprint(
1996        fingerprint_path: &std::path::Path,
1997        phase_config: PhaseConfig,
1998        scale: f64,
1999    ) -> SynthResult<Self> {
2000        info!("Loading fingerprint from: {}", fingerprint_path.display());
2001
2002        // Read the fingerprint
2003        let reader = FingerprintReader::new();
2004        let fingerprint = reader
2005            .read_from_file(fingerprint_path)
2006            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2007
2008        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2009    }
2010
2011    /// Create an orchestrator from a loaded fingerprint.
2012    ///
2013    /// # Arguments
2014    /// * `fingerprint` - The loaded fingerprint
2015    /// * `phase_config` - Phase configuration for generation
2016    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2017    pub fn from_fingerprint_data(
2018        fingerprint: Fingerprint,
2019        phase_config: PhaseConfig,
2020        scale: f64,
2021    ) -> SynthResult<Self> {
2022        info!(
2023            "Synthesizing config from fingerprint (version: {}, tables: {})",
2024            fingerprint.manifest.version,
2025            fingerprint.schema.tables.len()
2026        );
2027
2028        // Generate a seed for the synthesis
2029        let seed: u64 = rand::random();
2030        info!("Fingerprint synthesis seed: {}", seed);
2031
2032        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2033        let options = SynthesisOptions {
2034            scale,
2035            seed: Some(seed),
2036            preserve_correlations: true,
2037            inject_anomalies: true,
2038        };
2039        let synthesizer = ConfigSynthesizer::with_options(options);
2040
2041        // Synthesize full result including copula generators
2042        let synthesis_result = synthesizer
2043            .synthesize_full(&fingerprint, seed)
2044            .map_err(|e| {
2045                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2046            })?;
2047
2048        // Start with a base config from the fingerprint's industry if available
2049        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2050            Self::base_config_for_industry(industry)
2051        } else {
2052            Self::base_config_for_industry("manufacturing")
2053        };
2054
2055        // Apply the synthesized patches
2056        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2057
2058        // Log synthesis results
2059        info!(
2060            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2061            fingerprint.schema.tables.len(),
2062            scale,
2063            synthesis_result.copula_generators.len()
2064        );
2065
2066        if !synthesis_result.copula_generators.is_empty() {
2067            for spec in &synthesis_result.copula_generators {
2068                info!(
2069                    "  Copula '{}' for table '{}': {} columns",
2070                    spec.name,
2071                    spec.table,
2072                    spec.columns.len()
2073                );
2074            }
2075        }
2076
2077        // Create the orchestrator with the synthesized config
2078        let mut orchestrator = Self::new(config, phase_config)?;
2079
2080        // Store copula generators for use during generation
2081        orchestrator.copula_generators = synthesis_result.copula_generators;
2082
2083        Ok(orchestrator)
2084    }
2085
2086    /// Create a base config for a given industry.
2087    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2088        use datasynth_config::presets::create_preset;
2089        use datasynth_config::TransactionVolume;
2090        use datasynth_core::models::{CoAComplexity, IndustrySector};
2091
2092        let sector = match industry.to_lowercase().as_str() {
2093            "manufacturing" => IndustrySector::Manufacturing,
2094            "retail" => IndustrySector::Retail,
2095            "financial" | "financial_services" => IndustrySector::FinancialServices,
2096            "healthcare" => IndustrySector::Healthcare,
2097            "technology" | "tech" => IndustrySector::Technology,
2098            _ => IndustrySector::Manufacturing,
2099        };
2100
2101        // Create a preset with reasonable defaults
2102        create_preset(
2103            sector,
2104            1,  // company count
2105            12, // period months
2106            CoAComplexity::Medium,
2107            TransactionVolume::TenK,
2108        )
2109    }
2110
2111    /// Apply a config patch to a GeneratorConfig.
2112    fn apply_config_patch(
2113        mut config: GeneratorConfig,
2114        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2115    ) -> GeneratorConfig {
2116        use datasynth_fingerprint::synthesis::ConfigValue;
2117
2118        for (key, value) in patch.values() {
2119            match (key.as_str(), value) {
2120                // Transaction count is handled via TransactionVolume enum on companies
2121                // Log it but cannot directly set it (would need to modify company volumes)
2122                ("transactions.count", ConfigValue::Integer(n)) => {
2123                    info!(
2124                        "Fingerprint suggests {} transactions (apply via company volumes)",
2125                        n
2126                    );
2127                }
2128                ("global.period_months", ConfigValue::Integer(n)) => {
2129                    config.global.period_months = (*n).clamp(1, 120) as u32;
2130                }
2131                ("global.start_date", ConfigValue::String(s)) => {
2132                    config.global.start_date = s.clone();
2133                }
2134                ("global.seed", ConfigValue::Integer(n)) => {
2135                    config.global.seed = Some(*n as u64);
2136                }
2137                ("fraud.enabled", ConfigValue::Bool(b)) => {
2138                    config.fraud.enabled = *b;
2139                }
2140                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2141                    config.fraud.fraud_rate = *f;
2142                }
2143                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2144                    config.data_quality.enabled = *b;
2145                }
2146                // Handle anomaly injection paths (mapped to fraud config)
2147                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2148                    config.fraud.enabled = *b;
2149                }
2150                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2151                    config.fraud.fraud_rate = *f;
2152                }
2153                _ => {
2154                    debug!("Ignoring unknown config patch key: {}", key);
2155                }
2156            }
2157        }
2158
2159        config
2160    }
2161
2162    /// Build a resource guard from the configuration.
2163    fn build_resource_guard(
2164        config: &GeneratorConfig,
2165        output_path: Option<PathBuf>,
2166    ) -> ResourceGuard {
2167        let mut builder = ResourceGuardBuilder::new();
2168
2169        // Configure memory limit if set
2170        if config.global.memory_limit_mb > 0 {
2171            builder = builder.memory_limit(config.global.memory_limit_mb);
2172        }
2173
2174        // Configure disk monitoring for output path
2175        if let Some(path) = output_path {
2176            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2177        }
2178
2179        // Use conservative degradation settings for production safety
2180        builder = builder.conservative();
2181
2182        builder.build()
2183    }
2184
2185    /// Check resources (memory, disk, CPU) and return degradation level.
2186    ///
2187    /// Returns an error if hard limits are exceeded.
2188    /// Returns Ok(DegradationLevel) indicating current resource state.
2189    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2190        self.resource_guard.check()
2191    }
2192
2193    /// Check resources with logging.
2194    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2195        let level = self.resource_guard.check()?;
2196
2197        if level != DegradationLevel::Normal {
2198            warn!(
2199                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2200                phase,
2201                level,
2202                self.resource_guard.current_memory_mb(),
2203                self.resource_guard.available_disk_mb()
2204            );
2205        }
2206
2207        Ok(level)
2208    }
2209
2210    /// Get current degradation actions based on resource state.
2211    fn get_degradation_actions(&self) -> DegradationActions {
2212        self.resource_guard.get_actions()
2213    }
2214
2215    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2216    fn check_memory_limit(&self) -> SynthResult<()> {
2217        self.check_resources()?;
2218        Ok(())
2219    }
2220
2221    /// Run the complete generation workflow.
2222    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2223        info!("Starting enhanced generation workflow");
2224        info!(
2225            "Config: industry={:?}, period_months={}, companies={}",
2226            self.config.global.industry,
2227            self.config.global.period_months,
2228            self.config.companies.len()
2229        );
2230
2231        // Set decimal serialization mode (thread-local, affects JSON output).
2232        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2233        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2234        datasynth_core::serde_decimal::set_numeric_native(is_native);
2235        struct NumericModeGuard;
2236        impl Drop for NumericModeGuard {
2237            fn drop(&mut self) {
2238                datasynth_core::serde_decimal::set_numeric_native(false);
2239            }
2240        }
2241        let _numeric_guard = if is_native {
2242            Some(NumericModeGuard)
2243        } else {
2244            None
2245        };
2246
2247        // Initial resource check before starting
2248        let initial_level = self.check_resources_with_log("initial")?;
2249        if initial_level == DegradationLevel::Emergency {
2250            return Err(SynthError::resource(
2251                "Insufficient resources to start generation",
2252            ));
2253        }
2254
2255        let mut stats = EnhancedGenerationStatistics {
2256            companies_count: self.config.companies.len(),
2257            period_months: self.config.global.period_months,
2258            ..Default::default()
2259        };
2260
2261        // Phase 1: Chart of Accounts
2262        let coa = self.phase_chart_of_accounts(&mut stats)?;
2263
2264        // Phase 2: Master Data
2265        self.phase_master_data(&mut stats)?;
2266
2267        // Emit master data to stream sink
2268        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2269        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2270        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2271
2272        // Phase 3: Document Flows + Subledger Linking
2273        let (mut document_flows, mut subledger, fa_journal_entries) =
2274            self.phase_document_flows(&mut stats)?;
2275
2276        // Emit document flows to stream sink
2277        self.emit_phase_items(
2278            "document_flows",
2279            "PurchaseOrder",
2280            &document_flows.purchase_orders,
2281        );
2282        self.emit_phase_items(
2283            "document_flows",
2284            "GoodsReceipt",
2285            &document_flows.goods_receipts,
2286        );
2287        self.emit_phase_items(
2288            "document_flows",
2289            "VendorInvoice",
2290            &document_flows.vendor_invoices,
2291        );
2292        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2293        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2294
2295        // Phase 3b: Opening Balances (before JE generation)
2296        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2297
2298        // Phase 3c: Convert opening balances to journal entries and prepend them.
2299        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2300        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2301        // balance map type.
2302        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2303            .iter()
2304            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2305            .collect();
2306        if !opening_balance_jes.is_empty() {
2307            debug!(
2308                "Prepending {} opening balance JEs to entries",
2309                opening_balance_jes.len()
2310            );
2311        }
2312
2313        // Phase 4: Journal Entries
2314        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2315
2316        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2317        // starts from the correct initial state.
2318        if !opening_balance_jes.is_empty() {
2319            let mut combined = opening_balance_jes;
2320            combined.extend(entries);
2321            entries = combined;
2322        }
2323
2324        // Phase 4c: Append FA acquisition journal entries to main entries
2325        if !fa_journal_entries.is_empty() {
2326            debug!(
2327                "Appending {} FA acquisition JEs to main entries",
2328                fa_journal_entries.len()
2329            );
2330            entries.extend(fa_journal_entries);
2331        }
2332
2333        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2334        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2335
2336        // Get current degradation actions for optional phases
2337        let actions = self.get_degradation_actions();
2338
2339        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2340        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2341
2342        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2343        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2344        if !sourcing.contracts.is_empty() {
2345            let mut linked_count = 0usize;
2346            // Collect (vendor_id, po_id) pairs from P2P chains
2347            let po_vendor_pairs: Vec<(String, String)> = document_flows
2348                .p2p_chains
2349                .iter()
2350                .map(|chain| {
2351                    (
2352                        chain.purchase_order.vendor_id.clone(),
2353                        chain.purchase_order.header.document_id.clone(),
2354                    )
2355                })
2356                .collect();
2357
2358            for chain in &mut document_flows.p2p_chains {
2359                if chain.purchase_order.contract_id.is_none() {
2360                    if let Some(contract) = sourcing
2361                        .contracts
2362                        .iter()
2363                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2364                    {
2365                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2366                        linked_count += 1;
2367                    }
2368                }
2369            }
2370
2371            // Populate reverse FK: purchase_order_ids on each contract
2372            for contract in &mut sourcing.contracts {
2373                let po_ids: Vec<String> = po_vendor_pairs
2374                    .iter()
2375                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2376                    .map(|(_, po_id)| po_id.clone())
2377                    .collect();
2378                if !po_ids.is_empty() {
2379                    contract.purchase_order_ids = po_ids;
2380                }
2381            }
2382
2383            if linked_count > 0 {
2384                debug!(
2385                    "Linked {} purchase orders to S2C contracts by vendor match",
2386                    linked_count
2387                );
2388            }
2389        }
2390
2391        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2392        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2393
2394        // Phase 5c: Append IC journal entries to main entries
2395        if !intercompany.seller_journal_entries.is_empty()
2396            || !intercompany.buyer_journal_entries.is_empty()
2397        {
2398            let ic_je_count = intercompany.seller_journal_entries.len()
2399                + intercompany.buyer_journal_entries.len();
2400            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2401            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2402            debug!(
2403                "Appended {} IC journal entries to main entries",
2404                ic_je_count
2405            );
2406        }
2407
2408        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2409        if !intercompany.elimination_entries.is_empty() {
2410            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2411                &intercompany.elimination_entries,
2412            );
2413            if !elim_jes.is_empty() {
2414                debug!(
2415                    "Appended {} elimination journal entries to main entries",
2416                    elim_jes.len()
2417                );
2418                // IC elimination net-zero assertion (v2.5 hardening)
2419                let elim_debit: rust_decimal::Decimal =
2420                    elim_jes.iter().map(|je| je.total_debit()).sum();
2421                let elim_credit: rust_decimal::Decimal =
2422                    elim_jes.iter().map(|je| je.total_credit()).sum();
2423                let elim_diff = (elim_debit - elim_credit).abs();
2424                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2425                if elim_diff > tolerance {
2426                    return Err(datasynth_core::error::SynthError::generation(format!(
2427                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2428                        elim_debit, elim_credit, elim_diff, tolerance
2429                    )));
2430                }
2431                debug!(
2432                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2433                    elim_debit, elim_credit, elim_diff
2434                );
2435                entries.extend(elim_jes);
2436            }
2437        }
2438
2439        // Phase 5e: Wire IC source documents into document flow snapshot
2440        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2441            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2442                document_flows
2443                    .customer_invoices
2444                    .extend(ic_docs.seller_invoices.iter().cloned());
2445                document_flows
2446                    .purchase_orders
2447                    .extend(ic_docs.buyer_orders.iter().cloned());
2448                document_flows
2449                    .goods_receipts
2450                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2451                document_flows
2452                    .vendor_invoices
2453                    .extend(ic_docs.buyer_invoices.iter().cloned());
2454                debug!(
2455                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2456                    ic_docs.seller_invoices.len(),
2457                    ic_docs.buyer_orders.len(),
2458                    ic_docs.buyer_goods_receipts.len(),
2459                    ic_docs.buyer_invoices.len(),
2460                );
2461            }
2462        }
2463
2464        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2465        let hr = self.phase_hr_data(&mut stats)?;
2466
2467        // Phase 6b: Generate JEs from payroll runs
2468        if !hr.payroll_runs.is_empty() {
2469            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2470            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2471            entries.extend(payroll_jes);
2472        }
2473
2474        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2475        if !hr.pension_journal_entries.is_empty() {
2476            debug!(
2477                "Generated {} JEs from pension plans",
2478                hr.pension_journal_entries.len()
2479            );
2480            entries.extend(hr.pension_journal_entries.iter().cloned());
2481        }
2482
2483        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2484        if !hr.stock_comp_journal_entries.is_empty() {
2485            debug!(
2486                "Generated {} JEs from stock-based compensation",
2487                hr.stock_comp_journal_entries.len()
2488            );
2489            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2490        }
2491
2492        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2493        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2494
2495        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2496        if !manufacturing_snap.production_orders.is_empty() {
2497            let currency = self
2498                .config
2499                .companies
2500                .first()
2501                .map(|c| c.currency.as_str())
2502                .unwrap_or("USD");
2503            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2504                &manufacturing_snap.production_orders,
2505                &manufacturing_snap.quality_inspections,
2506                currency,
2507            );
2508            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2509            entries.extend(mfg_jes);
2510        }
2511
2512        // Phase 7a-warranty: Generate warranty provisions per company
2513        if !manufacturing_snap.quality_inspections.is_empty() {
2514            let framework = match self.config.accounting_standards.framework {
2515                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2516                _ => "US_GAAP",
2517            };
2518            for company in &self.config.companies {
2519                let company_orders: Vec<_> = manufacturing_snap
2520                    .production_orders
2521                    .iter()
2522                    .filter(|o| o.company_code == company.code)
2523                    .cloned()
2524                    .collect();
2525                let company_inspections: Vec<_> = manufacturing_snap
2526                    .quality_inspections
2527                    .iter()
2528                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2529                    .cloned()
2530                    .collect();
2531                if company_inspections.is_empty() {
2532                    continue;
2533                }
2534                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2535                let warranty_result = warranty_gen.generate(
2536                    &company.code,
2537                    &company_orders,
2538                    &company_inspections,
2539                    &company.currency,
2540                    framework,
2541                );
2542                if !warranty_result.journal_entries.is_empty() {
2543                    debug!(
2544                        "Generated {} warranty provision JEs for {}",
2545                        warranty_result.journal_entries.len(),
2546                        company.code
2547                    );
2548                    entries.extend(warranty_result.journal_entries);
2549                }
2550            }
2551        }
2552
2553        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2554        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2555        {
2556            let cogs_currency = self
2557                .config
2558                .companies
2559                .first()
2560                .map(|c| c.currency.as_str())
2561                .unwrap_or("USD");
2562            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2563                &document_flows.deliveries,
2564                &manufacturing_snap.production_orders,
2565                cogs_currency,
2566            );
2567            if !cogs_jes.is_empty() {
2568                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2569                entries.extend(cogs_jes);
2570            }
2571        }
2572
2573        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2574        //
2575        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2576        // subledger inventory positions.  Here we reconcile them so that position balances
2577        // reflect the actual stock movements within the generation period.
2578        if !manufacturing_snap.inventory_movements.is_empty()
2579            && !subledger.inventory_positions.is_empty()
2580        {
2581            use datasynth_core::models::MovementType as MfgMovementType;
2582            let mut receipt_count = 0usize;
2583            let mut issue_count = 0usize;
2584            for movement in &manufacturing_snap.inventory_movements {
2585                // Find a matching position by material code and company
2586                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2587                    p.material_id == movement.material_code
2588                        && p.company_code == movement.entity_code
2589                }) {
2590                    match movement.movement_type {
2591                        MfgMovementType::GoodsReceipt => {
2592                            // Increase stock and update weighted-average cost
2593                            pos.add_quantity(
2594                                movement.quantity,
2595                                movement.value,
2596                                movement.movement_date,
2597                            );
2598                            receipt_count += 1;
2599                        }
2600                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2601                            // Decrease stock (best-effort; silently skip if insufficient)
2602                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2603                            issue_count += 1;
2604                        }
2605                        _ => {}
2606                    }
2607                }
2608            }
2609            debug!(
2610                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2611                manufacturing_snap.inventory_movements.len(),
2612                receipt_count,
2613                issue_count,
2614            );
2615        }
2616
2617        // Update final entry/line-item stats after all JE-generating phases
2618        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2619        if !entries.is_empty() {
2620            stats.total_entries = entries.len() as u64;
2621            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2622            debug!(
2623                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2624                stats.total_entries, stats.total_line_items
2625            );
2626        }
2627
2628        // Phase 7b: Apply internal controls to journal entries
2629        if self.config.internal_controls.enabled && !entries.is_empty() {
2630            info!("Phase 7b: Applying internal controls to journal entries");
2631            let control_config = ControlGeneratorConfig {
2632                exception_rate: self.config.internal_controls.exception_rate,
2633                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2634                enable_sox_marking: true,
2635                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2636                    self.config.internal_controls.sox_materiality_threshold,
2637                )
2638                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2639                ..Default::default()
2640            };
2641            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2642            for entry in &mut entries {
2643                control_gen.apply_controls(entry, &coa);
2644            }
2645            let with_controls = entries
2646                .iter()
2647                .filter(|e| !e.header.control_ids.is_empty())
2648                .count();
2649            info!(
2650                "Applied controls to {} entries ({} with control IDs assigned)",
2651                entries.len(),
2652                with_controls
2653            );
2654        }
2655
2656        // Phase 7c: Extract SoD violations from annotated journal entries.
2657        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2658        // Here we materialise those flags into standalone SodViolation records.
2659        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2660            .iter()
2661            .filter(|e| e.header.sod_violation)
2662            .filter_map(|e| {
2663                e.header.sod_conflict_type.map(|ct| {
2664                    use datasynth_core::models::{RiskLevel, SodViolation};
2665                    let severity = match ct {
2666                        datasynth_core::models::SodConflictType::PaymentReleaser
2667                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2668                            RiskLevel::Critical
2669                        }
2670                        datasynth_core::models::SodConflictType::PreparerApprover
2671                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2672                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2673                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2674                            RiskLevel::High
2675                        }
2676                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2677                            RiskLevel::Medium
2678                        }
2679                    };
2680                    let action = format!(
2681                        "SoD conflict {:?} on entry {} ({})",
2682                        ct, e.header.document_id, e.header.company_code
2683                    );
2684                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2685                })
2686            })
2687            .collect();
2688        if !sod_violations.is_empty() {
2689            info!(
2690                "Phase 7c: Extracted {} SoD violations from {} entries",
2691                sod_violations.len(),
2692                entries.len()
2693            );
2694        }
2695
2696        // Emit journal entries to stream sink (after all JE-generating phases)
2697        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2698
2699        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2700        //
2701        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2702        // document-level fraud are exempt from subsequent line-level flag
2703        // overwrites, and so downstream consumers see a coherent picture.
2704        //
2705        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2706        {
2707            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2708            if self.config.fraud.enabled && doc_rate > 0.0 {
2709                use datasynth_core::fraud_propagation::{
2710                    inject_document_fraud, propagate_documents_to_entries,
2711                };
2712                use datasynth_core::utils::weighted_select;
2713                use datasynth_core::FraudType;
2714                use rand_chacha::rand_core::SeedableRng;
2715
2716                let dist = &self.config.fraud.fraud_type_distribution;
2717                let fraud_type_weights: [(FraudType, f64); 8] = [
2718                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2719                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2720                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2721                    (
2722                        FraudType::ImproperCapitalization,
2723                        dist.expense_capitalization,
2724                    ),
2725                    (FraudType::SplitTransaction, dist.split_transaction),
2726                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2727                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2728                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2729                ];
2730                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2731                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2732                    if weights_sum <= 0.0 {
2733                        FraudType::FictitiousEntry
2734                    } else {
2735                        *weighted_select(rng, &fraud_type_weights)
2736                    }
2737                };
2738
2739                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2740                let mut doc_tagged = 0usize;
2741                macro_rules! inject_into {
2742                    ($collection:expr) => {{
2743                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2744                            $collection.iter_mut().map(|d| &mut d.header).collect();
2745                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2746                    }};
2747                }
2748                inject_into!(document_flows.purchase_orders);
2749                inject_into!(document_flows.goods_receipts);
2750                inject_into!(document_flows.vendor_invoices);
2751                inject_into!(document_flows.payments);
2752                inject_into!(document_flows.sales_orders);
2753                inject_into!(document_flows.deliveries);
2754                inject_into!(document_flows.customer_invoices);
2755                if doc_tagged > 0 {
2756                    info!(
2757                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2758                    );
2759                }
2760
2761                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2762                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2763                        Vec::new();
2764                    headers.extend(
2765                        document_flows
2766                            .purchase_orders
2767                            .iter()
2768                            .map(|d| d.header.clone()),
2769                    );
2770                    headers.extend(
2771                        document_flows
2772                            .goods_receipts
2773                            .iter()
2774                            .map(|d| d.header.clone()),
2775                    );
2776                    headers.extend(
2777                        document_flows
2778                            .vendor_invoices
2779                            .iter()
2780                            .map(|d| d.header.clone()),
2781                    );
2782                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2783                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2784                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2785                    headers.extend(
2786                        document_flows
2787                            .customer_invoices
2788                            .iter()
2789                            .map(|d| d.header.clone()),
2790                    );
2791                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2792                    if propagated > 0 {
2793                        info!(
2794                            "Propagated document-level fraud to {propagated} derived journal entries"
2795                        );
2796                    }
2797                }
2798            }
2799        }
2800
2801        // Phase 8: Anomaly Injection (after all JE-generating phases)
2802        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2803
2804        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2805        // through the anomaly injector.
2806        //
2807        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2808        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2809        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2810        //   - Any external mutation that sets is_fraud after the fact
2811        //
2812        // The anomaly injector already applies the same bias inline when it
2813        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2814        // so gating this sweep on `!is_anomaly` avoids double-application.
2815        //
2816        // Without this sweep, fraud entries from these paths show 0 lift on
2817        // the canonical forensic signals (is_round_1000, is_off_hours,
2818        // is_weekend, is_post_close), which is exactly what the SDK-side
2819        // evaluator caught in v3.1 — fraud features had worse lift than
2820        // baseline. See DS-3.1 post-deploy feedback.
2821        {
2822            use datasynth_core::fraud_bias::{
2823                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2824            };
2825            use rand_chacha::rand_core::SeedableRng;
2826            let cfg = FraudBehavioralBiasConfig::default();
2827            if cfg.enabled {
2828                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2829                let mut swept = 0usize;
2830                for entry in entries.iter_mut() {
2831                    if entry.header.is_fraud && !entry.header.is_anomaly {
2832                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2833                        swept += 1;
2834                    }
2835                }
2836                if swept > 0 {
2837                    info!(
2838                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2839                         (doc-propagated + je_generator intrinsic fraud)"
2840                    );
2841                }
2842            }
2843        }
2844
2845        // Emit anomaly labels to stream sink
2846        self.emit_phase_items(
2847            "anomaly_injection",
2848            "LabeledAnomaly",
2849            &anomaly_labels.labels,
2850        );
2851
2852        // Propagate fraud labels from journal entries to source documents.
2853        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2854        // instead of tracing through document_references.json.
2855        //
2856        // Gated by `fraud.propagate_to_document` (default true) — disable when
2857        // downstream consumers want document fraud flags to reflect only
2858        // document-level injection, not line-level.
2859        if self.config.fraud.propagate_to_document {
2860            use std::collections::HashMap;
2861            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2862            //
2863            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2864            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2865            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2866            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2867            // we register BOTH the prefixed form (raw reference) AND the bare form
2868            // (post-colon portion) in the map. Also register the JE's document_id
2869            // UUID so documents that set `journal_entry_id` match via that path.
2870            //
2871            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2872            // looked up "foo", silently producing 0 propagations.
2873            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2874            for je in &entries {
2875                if je.header.is_fraud {
2876                    if let Some(ref fraud_type) = je.header.fraud_type {
2877                        if let Some(ref reference) = je.header.reference {
2878                            // Register the full reference ("GR:PO-2024-000001")
2879                            fraud_map.insert(reference.clone(), *fraud_type);
2880                            // Also register the bare document ID ("PO-2024-000001")
2881                            // by stripping the "PREFIX:" if present.
2882                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2883                                if !bare.is_empty() {
2884                                    fraud_map.insert(bare.to_string(), *fraud_type);
2885                                }
2886                            }
2887                        }
2888                        // Also tag via journal_entry_id on document headers
2889                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2890                    }
2891                }
2892            }
2893            if !fraud_map.is_empty() {
2894                let mut propagated = 0usize;
2895                // Use DocumentHeader::propagate_fraud method for each doc type
2896                macro_rules! propagate_to {
2897                    ($collection:expr) => {
2898                        for doc in &mut $collection {
2899                            if doc.header.propagate_fraud(&fraud_map) {
2900                                propagated += 1;
2901                            }
2902                        }
2903                    };
2904                }
2905                propagate_to!(document_flows.purchase_orders);
2906                propagate_to!(document_flows.goods_receipts);
2907                propagate_to!(document_flows.vendor_invoices);
2908                propagate_to!(document_flows.payments);
2909                propagate_to!(document_flows.sales_orders);
2910                propagate_to!(document_flows.deliveries);
2911                propagate_to!(document_flows.customer_invoices);
2912                if propagated > 0 {
2913                    info!(
2914                        "Propagated fraud labels to {} document flow records",
2915                        propagated
2916                    );
2917                }
2918            }
2919        }
2920
2921        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2922        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2923
2924        // Emit red flags to stream sink
2925        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2926
2927        // Phase 26b: Collusion Ring Generation (after red flags)
2928        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2929
2930        // Emit collusion rings to stream sink
2931        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2932
2933        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2934        let balance_validation = self.phase_balance_validation(&entries)?;
2935
2936        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
2937        // chart of accounts. Soft warning by default; hard fail when the
2938        // user passes --validate-coa-coverage / sets the strict flag.
2939        self.validate_coa_coverage(&entries, coa.as_ref())?;
2940
2941        // Phase 9b: GL-to-Subledger Reconciliation
2942        let subledger_reconciliation =
2943            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2944
2945        // Phase 10: Data Quality Injection
2946        let (data_quality_stats, quality_issues) =
2947            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2948
2949        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2950        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2951
2952        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2953        {
2954            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2955
2956            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2957            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2958            let mut unbalanced_clean = 0usize;
2959            for je in &entries {
2960                if je.header.is_fraud || je.header.is_anomaly {
2961                    continue;
2962                }
2963                let diff = (je.total_debit() - je.total_credit()).abs();
2964                if diff > tolerance {
2965                    unbalanced_clean += 1;
2966                    if unbalanced_clean <= 3 {
2967                        warn!(
2968                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2969                            je.header.document_id,
2970                            je.total_debit(),
2971                            je.total_credit(),
2972                            diff
2973                        );
2974                    }
2975                }
2976            }
2977            if unbalanced_clean > 0 {
2978                return Err(datasynth_core::error::SynthError::generation(format!(
2979                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2980                     First few logged above. Tolerance={}",
2981                    unbalanced_clean, tolerance
2982                )));
2983            }
2984            debug!(
2985                "Phase 10c: All {} non-anomaly JEs individually balanced",
2986                entries
2987                    .iter()
2988                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2989                    .count()
2990            );
2991
2992            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2993            let company_codes: Vec<String> = self
2994                .config
2995                .companies
2996                .iter()
2997                .map(|c| c.code.clone())
2998                .collect();
2999            for company_code in &company_codes {
3000                let mut assets = rust_decimal::Decimal::ZERO;
3001                let mut liab_equity = rust_decimal::Decimal::ZERO;
3002
3003                for entry in &entries {
3004                    if entry.header.company_code != *company_code {
3005                        continue;
3006                    }
3007                    for line in &entry.lines {
3008                        let acct = &line.gl_account;
3009                        let net = line.debit_amount - line.credit_amount;
3010                        // Asset accounts (1xxx): normal debit balance
3011                        if acct.starts_with('1') {
3012                            assets += net;
3013                        }
3014                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3015                        else if acct.starts_with('2') || acct.starts_with('3') {
3016                            liab_equity -= net; // credit-normal, so negate debit-net
3017                        }
3018                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3019                        // so they net to zero after closing entries
3020                    }
3021                }
3022
3023                let bs_diff = (assets - liab_equity).abs();
3024                if bs_diff > tolerance {
3025                    warn!(
3026                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3027                         revenue/expense closing entries may not fully offset",
3028                        company_code, assets, liab_equity, bs_diff
3029                    );
3030                    // Warn rather than error: multi-period datasets may have timing
3031                    // differences from accruals/deferrals that resolve in later periods.
3032                    // The TB footing check (Assert 1) is the hard gate.
3033                } else {
3034                    debug!(
3035                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3036                        company_code, assets, liab_equity, bs_diff
3037                    );
3038                }
3039            }
3040
3041            info!("Phase 10c: All generation-time accounting assertions passed");
3042        }
3043
3044        // Phase 11: Audit Data
3045        let audit = self.phase_audit_data(&entries, &mut stats)?;
3046
3047        // Phase 12: Banking KYC/AML Data
3048        let mut banking = self.phase_banking_data(&mut stats)?;
3049
3050        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3051        // Creates coherence between the accounting layer (payments, JEs) and the
3052        // banking layer (bank transactions). A vendor invoice payment now appears
3053        // on both sides with cross-references and fraud labels propagated.
3054        if self.phase_config.generate_banking
3055            && !document_flows.payments.is_empty()
3056            && !banking.accounts.is_empty()
3057        {
3058            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3059            if bridge_rate > 0.0 {
3060                let mut bridge =
3061                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3062                        self.seed,
3063                    );
3064                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3065                    &document_flows.payments,
3066                    &banking.customers,
3067                    &banking.accounts,
3068                    bridge_rate,
3069                );
3070                info!(
3071                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3072                    bridge_stats.bridged_count,
3073                    bridge_stats.transactions_emitted,
3074                    bridge_stats.fraud_propagated,
3075                );
3076                let bridged_count = bridged_txns.len();
3077                banking.transactions.extend(bridged_txns);
3078
3079                // Re-run velocity computation so bridged txns also get features
3080                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3081                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3082                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3083                        &mut banking.transactions,
3084                    );
3085                }
3086
3087                // Recompute suspicious count after bridging
3088                banking.suspicious_count = banking
3089                    .transactions
3090                    .iter()
3091                    .filter(|t| t.is_suspicious)
3092                    .count();
3093                stats.banking_transaction_count = banking.transactions.len();
3094                stats.banking_suspicious_count = banking.suspicious_count;
3095            }
3096        }
3097
3098        // Phase 13: Graph Export
3099        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3100
3101        // Phase 14: LLM Enrichment
3102        self.phase_llm_enrichment(&mut stats);
3103
3104        // Phase 15: Diffusion Enhancement
3105        self.phase_diffusion_enhancement(&entries, &mut stats);
3106
3107        // Phase 16: Causal Overlay
3108        self.phase_causal_overlay(&mut stats);
3109
3110        // Phase 17: Bank Reconciliation + Financial Statements
3111        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3112        // provision data (from accounting_standards / tax snapshots) can be wired in.
3113        let mut financial_reporting = self.phase_financial_reporting(
3114            &document_flows,
3115            &entries,
3116            &coa,
3117            &hr,
3118            &audit,
3119            &mut stats,
3120        )?;
3121
3122        // BS coherence check: assets = liabilities + equity
3123        {
3124            use datasynth_core::models::StatementType;
3125            for stmt in &financial_reporting.consolidated_statements {
3126                if stmt.statement_type == StatementType::BalanceSheet {
3127                    let total_assets: rust_decimal::Decimal = stmt
3128                        .line_items
3129                        .iter()
3130                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3131                        .map(|li| li.amount)
3132                        .sum();
3133                    let total_le: rust_decimal::Decimal = stmt
3134                        .line_items
3135                        .iter()
3136                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3137                        .map(|li| li.amount)
3138                        .sum();
3139                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3140                        warn!(
3141                            "BS equation imbalance: assets={}, L+E={}",
3142                            total_assets, total_le
3143                        );
3144                    }
3145                }
3146            }
3147        }
3148
3149        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3150        let accounting_standards =
3151            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3152
3153        // Phase 18a: Merge ECL journal entries into main GL
3154        if !accounting_standards.ecl_journal_entries.is_empty() {
3155            debug!(
3156                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3157                accounting_standards.ecl_journal_entries.len()
3158            );
3159            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3160        }
3161
3162        // Phase 18a: Merge provision journal entries into main GL
3163        if !accounting_standards.provision_journal_entries.is_empty() {
3164            debug!(
3165                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3166                accounting_standards.provision_journal_entries.len()
3167            );
3168            entries.extend(
3169                accounting_standards
3170                    .provision_journal_entries
3171                    .iter()
3172                    .cloned(),
3173            );
3174        }
3175
3176        // Phase 18b: OCPM Events (after all process data is available)
3177        let mut ocpm = self.phase_ocpm_events(
3178            &document_flows,
3179            &sourcing,
3180            &hr,
3181            &manufacturing_snap,
3182            &banking,
3183            &audit,
3184            &financial_reporting,
3185            &mut stats,
3186        )?;
3187
3188        // Emit OCPM events to stream sink
3189        if let Some(ref event_log) = ocpm.event_log {
3190            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3191        }
3192
3193        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3194        if let Some(ref event_log) = ocpm.event_log {
3195            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3196            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3197                std::collections::HashMap::new();
3198            for (idx, event) in event_log.events.iter().enumerate() {
3199                if let Some(ref doc_ref) = event.document_ref {
3200                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3201                }
3202            }
3203
3204            if !doc_index.is_empty() {
3205                let mut annotated = 0usize;
3206                for entry in &mut entries {
3207                    let doc_id_str = entry.header.document_id.to_string();
3208                    // Collect matching event indices from document_id and reference
3209                    let mut matched_indices: Vec<usize> = Vec::new();
3210                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3211                        matched_indices.extend(indices);
3212                    }
3213                    if let Some(ref reference) = entry.header.reference {
3214                        let bare_ref = reference
3215                            .find(':')
3216                            .map(|i| &reference[i + 1..])
3217                            .unwrap_or(reference.as_str());
3218                        if let Some(indices) = doc_index.get(bare_ref) {
3219                            for &idx in indices {
3220                                if !matched_indices.contains(&idx) {
3221                                    matched_indices.push(idx);
3222                                }
3223                            }
3224                        }
3225                    }
3226                    // Apply matches to JE header
3227                    if !matched_indices.is_empty() {
3228                        for &idx in &matched_indices {
3229                            let event = &event_log.events[idx];
3230                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3231                                entry.header.ocpm_event_ids.push(event.event_id);
3232                            }
3233                            for obj_ref in &event.object_refs {
3234                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3235                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3236                                }
3237                            }
3238                            if entry.header.ocpm_case_id.is_none() {
3239                                entry.header.ocpm_case_id = event.case_id;
3240                            }
3241                        }
3242                        annotated += 1;
3243                    }
3244                }
3245                debug!(
3246                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3247                    annotated
3248                );
3249            }
3250        }
3251
3252        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3253        // IC eliminations, opening balances, standards-driven entries) so
3254        // every JournalEntry carries at least one `ocpm_event_ids` link.
3255        if let Some(ref mut event_log) = ocpm.event_log {
3256            let synthesized =
3257                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3258            if synthesized > 0 {
3259                info!(
3260                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3261                );
3262            }
3263
3264            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3265            // events and their owning CaseTrace. Without this, every exported
3266            // OCEL event has `is_anomaly = false` even when the underlying JE
3267            // was flagged.
3268            let anomaly_events =
3269                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3270            if anomaly_events > 0 {
3271                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3272            }
3273
3274            // Phase 18f: Inject process-variant imperfections (rework, skipped
3275            // steps, out-of-order events) so conformance checkers see
3276            // realistic variant counts and fitness < 1.0. Uses the P2P
3277            // process rates as the single source of truth.
3278            let p2p_cfg = &self.config.ocpm.p2p_process;
3279            let any_imperfection = p2p_cfg.rework_probability > 0.0
3280                || p2p_cfg.skip_step_probability > 0.0
3281                || p2p_cfg.out_of_order_probability > 0.0;
3282            if any_imperfection {
3283                use rand_chacha::rand_core::SeedableRng;
3284                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3285                    rework_rate: p2p_cfg.rework_probability,
3286                    skip_rate: p2p_cfg.skip_step_probability,
3287                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3288                };
3289                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3290                let stats =
3291                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3292                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3293                    info!(
3294                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3295                        stats.rework, stats.skipped, stats.out_of_order
3296                    );
3297                }
3298            }
3299        }
3300
3301        // Phase 19: Sales Quotes, Management KPIs, Budgets
3302        let sales_kpi_budgets =
3303            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3304
3305        // Phase 22: Treasury Data Generation
3306        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3307        // are included in the pre-tax income used by phase_tax_generation.
3308        let treasury =
3309            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3310
3311        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3312        if !treasury.journal_entries.is_empty() {
3313            debug!(
3314                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3315                treasury.journal_entries.len()
3316            );
3317            entries.extend(treasury.journal_entries.iter().cloned());
3318        }
3319
3320        // Phase 20: Tax Generation
3321        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3322
3323        // Phase 20 JEs: Merge tax posting journal entries into main GL
3324        if !tax.tax_posting_journal_entries.is_empty() {
3325            debug!(
3326                "Merging {} tax posting JEs into GL",
3327                tax.tax_posting_journal_entries.len()
3328            );
3329            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3330        }
3331
3332        // Phase 20b: FINAL fraud behavioral bias sweep.
3333        //
3334        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3335        // period close) extend `entries` with new journal entries that may
3336        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3337        // already-fraudulent transactions). Those late additions miss the
3338        // Phase 8b sweep and ship without bias applied — which is exactly
3339        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3340        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3341        //
3342        // Running the sweep one more time here guarantees every is_fraud
3343        // entry — regardless of which phase added it — has bias applied.
3344        // `!is_anomaly` gates out anomaly-injector entries (which already
3345        // got biased inline); the sweep is otherwise idempotent-ish:
3346        // weekend / off_hours re-fire to another valid weekend / off-hour,
3347        // post_close is guarded by `!is_post_close`, and round-dollar
3348        // rescaling on an already-round amount is a no-op (ratio = 1).
3349        {
3350            use datasynth_core::fraud_bias::{
3351                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3352            };
3353            use rand_chacha::rand_core::SeedableRng;
3354            let cfg = FraudBehavioralBiasConfig::default();
3355            if cfg.enabled {
3356                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3357                let mut swept = 0usize;
3358                for entry in entries.iter_mut() {
3359                    if entry.header.is_fraud && !entry.header.is_anomaly {
3360                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3361                        swept += 1;
3362                    }
3363                }
3364                if swept > 0 {
3365                    info!(
3366                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3367                         non-anomaly fraud entries (covers late-added JEs from \
3368                         ECL / provisions / treasury / tax / period-close)"
3369                    );
3370                }
3371            }
3372        }
3373
3374        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3375        // Build supplementary cash flow items from upstream JE data (depreciation,
3376        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3377        {
3378            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3379
3380            let framework_str = {
3381                use datasynth_config::schema::AccountingFrameworkConfig;
3382                match self
3383                    .config
3384                    .accounting_standards
3385                    .framework
3386                    .unwrap_or_default()
3387                {
3388                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3389                        "IFRS"
3390                    }
3391                    _ => "US_GAAP",
3392                }
3393            };
3394
3395            // Sum depreciation debits (account 6000) from close JEs
3396            let depreciation_total: rust_decimal::Decimal = entries
3397                .iter()
3398                .filter(|je| je.header.document_type == "CL")
3399                .flat_map(|je| je.lines.iter())
3400                .filter(|l| l.gl_account.starts_with("6000"))
3401                .map(|l| l.debit_amount)
3402                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3403
3404            // Sum interest expense debits (account 7100)
3405            let interest_paid: rust_decimal::Decimal = entries
3406                .iter()
3407                .flat_map(|je| je.lines.iter())
3408                .filter(|l| l.gl_account.starts_with("7100"))
3409                .map(|l| l.debit_amount)
3410                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3411
3412            // Sum tax expense debits (account 8000)
3413            let tax_paid: rust_decimal::Decimal = entries
3414                .iter()
3415                .flat_map(|je| je.lines.iter())
3416                .filter(|l| l.gl_account.starts_with("8000"))
3417                .map(|l| l.debit_amount)
3418                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3419
3420            // Sum capex debits on fixed assets (account 1500)
3421            let capex: rust_decimal::Decimal = entries
3422                .iter()
3423                .flat_map(|je| je.lines.iter())
3424                .filter(|l| l.gl_account.starts_with("1500"))
3425                .map(|l| l.debit_amount)
3426                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3427
3428            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3429            let dividends_paid: rust_decimal::Decimal = entries
3430                .iter()
3431                .flat_map(|je| je.lines.iter())
3432                .filter(|l| l.gl_account == "2170")
3433                .map(|l| l.debit_amount)
3434                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3435
3436            let cf_data = CashFlowSourceData {
3437                depreciation_total,
3438                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3439                delta_ar: rust_decimal::Decimal::ZERO,
3440                delta_ap: rust_decimal::Decimal::ZERO,
3441                delta_inventory: rust_decimal::Decimal::ZERO,
3442                capex,
3443                debt_issuance: rust_decimal::Decimal::ZERO,
3444                debt_repayment: rust_decimal::Decimal::ZERO,
3445                interest_paid,
3446                tax_paid,
3447                dividends_paid,
3448                framework: framework_str.to_string(),
3449            };
3450
3451            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3452            if !enhanced_cf_items.is_empty() {
3453                // Merge into ALL cash flow statements (standalone + consolidated)
3454                use datasynth_core::models::StatementType;
3455                let merge_count = enhanced_cf_items.len();
3456                for stmt in financial_reporting
3457                    .financial_statements
3458                    .iter_mut()
3459                    .chain(financial_reporting.consolidated_statements.iter_mut())
3460                    .chain(
3461                        financial_reporting
3462                            .standalone_statements
3463                            .values_mut()
3464                            .flat_map(|v| v.iter_mut()),
3465                    )
3466                {
3467                    if stmt.statement_type == StatementType::CashFlowStatement {
3468                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3469                    }
3470                }
3471                info!(
3472                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3473                    merge_count
3474                );
3475            }
3476        }
3477
3478        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3479        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3480        self.generate_notes_to_financial_statements(
3481            &mut financial_reporting,
3482            &accounting_standards,
3483            &tax,
3484            &hr,
3485            &audit,
3486            &treasury,
3487        );
3488
3489        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3490        // When we have 2+ companies, derive segment data from actual journal entries
3491        // to complement or replace the FS-generator-based segments.
3492        if self.config.companies.len() >= 2 && !entries.is_empty() {
3493            let companies: Vec<(String, String)> = self
3494                .config
3495                .companies
3496                .iter()
3497                .map(|c| (c.code.clone(), c.name.clone()))
3498                .collect();
3499            let ic_elim: rust_decimal::Decimal =
3500                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3501            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3502                .unwrap_or(NaiveDate::MIN);
3503            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3504            let period_label = format!(
3505                "{}-{:02}",
3506                end_date.year(),
3507                (end_date - chrono::Days::new(1)).month()
3508            );
3509
3510            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3511            let (je_segments, je_recon) =
3512                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3513            if !je_segments.is_empty() {
3514                info!(
3515                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3516                    je_segments.len(),
3517                    ic_elim,
3518                );
3519                // Replace if existing segment_reports were empty; otherwise supplement
3520                if financial_reporting.segment_reports.is_empty() {
3521                    financial_reporting.segment_reports = je_segments;
3522                    financial_reporting.segment_reconciliations = vec![je_recon];
3523                } else {
3524                    financial_reporting.segment_reports.extend(je_segments);
3525                    financial_reporting.segment_reconciliations.push(je_recon);
3526                }
3527            }
3528        }
3529
3530        // Phase 21: ESG Data Generation
3531        let esg_snap =
3532            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3533
3534        // Phase 23: Project Accounting Data Generation
3535        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3536
3537        // Phase 24: Process Evolution + Organizational Events
3538        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3539
3540        // Phase 24b: Disruption Events
3541        let disruption_events = self.phase_disruption_events(&mut stats)?;
3542
3543        // Phase 27: Bi-Temporal Vendor Version Chains
3544        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3545
3546        // Phase 28: Entity Relationship Graph + Cross-Process Links
3547        let (entity_relationship_graph, cross_process_links) =
3548            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3549
3550        // Phase 29: Industry-specific GL accounts
3551        let industry_output = self.phase_industry_data(&mut stats);
3552
3553        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3554        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3555
3556        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3557        //
3558        // The neural / hybrid diffusion path was a documented L2 stub
3559        // in v3.x; actual neural-network training requires ML
3560        // infrastructure (PyTorch / candle bindings, GPU access,
3561        // training loops) that was never wired through the
3562        // orchestrator. Rather than keep a silently-no-op block that
3563        // misleads users into thinking neural training happens, v4.0
3564        // acknowledges the config — exposing stats so downstream
3565        // tooling can see the request — but emits a clear warning
3566        // when a non-statistical backend is requested. The statistical
3567        // diffusion backend continues to run via
3568        // `phase_diffusion_enhancement`.
3569        //
3570        // Users who need real neural diffusion: track the roadmap item
3571        // in the v4.x backlog and consider contributing the backend
3572        // (the `DiffusionBackend` trait is the integration point).
3573        if self.config.diffusion.enabled
3574            && (self.config.diffusion.backend == "neural"
3575                || self.config.diffusion.backend == "hybrid")
3576        {
3577            let neural = &self.config.diffusion.neural;
3578            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3579            stats.neural_hybrid_weight = Some(weight);
3580            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3581            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3582            warn!(
3583                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3584                 the neural/hybrid training path is not yet shipped. Config \
3585                 is captured in stats (weight={weight:.2}, strategy={}, \
3586                 columns={}) but no neural training runs. Statistical \
3587                 diffusion (backend='statistical') continues to work.",
3588                self.config.diffusion.backend,
3589                neural.hybrid_strategy,
3590                neural.neural_columns.len(),
3591            );
3592        }
3593
3594        // Phase 19b: Hypergraph Export (after all data is available)
3595        self.phase_hypergraph_export(
3596            &coa,
3597            &entries,
3598            &document_flows,
3599            &sourcing,
3600            &hr,
3601            &manufacturing_snap,
3602            &banking,
3603            &audit,
3604            &financial_reporting,
3605            &ocpm,
3606            &compliance_regulations,
3607            &mut stats,
3608        )?;
3609
3610        // Phase 10c: Additional graph builders (approval, entity, banking)
3611        // These run after all data is available since they need banking/IC data.
3612        if self.phase_config.generate_graph_export {
3613            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3614        }
3615
3616        // Log informational messages for config sections not yet fully wired
3617        if self.config.streaming.enabled {
3618            info!("Note: streaming config is enabled but batch mode does not use it");
3619        }
3620        if self.config.vendor_network.enabled {
3621            debug!("Vendor network config available; relationship graph generation is partial");
3622        }
3623        if self.config.customer_segmentation.enabled {
3624            debug!("Customer segmentation config available; segment-aware generation is partial");
3625        }
3626
3627        // Log final resource statistics
3628        let resource_stats = self.resource_guard.stats();
3629        info!(
3630            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3631            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3632            resource_stats.disk.estimated_bytes_written,
3633            resource_stats.degradation_level
3634        );
3635
3636        // Flush any remaining stream sink data
3637        if let Some(ref sink) = self.phase_sink {
3638            if let Err(e) = sink.flush() {
3639                warn!("Stream sink flush failed: {e}");
3640            }
3641        }
3642
3643        // Build data lineage graph
3644        let lineage = self.build_lineage_graph();
3645
3646        // Evaluate quality gates if enabled in config
3647        let gate_result = if self.config.quality_gates.enabled {
3648            let profile_name = &self.config.quality_gates.profile;
3649            match datasynth_eval::gates::get_profile(profile_name) {
3650                Some(profile) => {
3651                    // Build an evaluation populated with actual generation metrics.
3652                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3653
3654                    // Populate balance sheet evaluation from balance validation results
3655                    if balance_validation.validated {
3656                        eval.coherence.balance =
3657                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3658                                equation_balanced: balance_validation.is_balanced,
3659                                max_imbalance: (balance_validation.total_debits
3660                                    - balance_validation.total_credits)
3661                                    .abs(),
3662                                periods_evaluated: 1,
3663                                periods_imbalanced: if balance_validation.is_balanced {
3664                                    0
3665                                } else {
3666                                    1
3667                                },
3668                                period_results: Vec::new(),
3669                                companies_evaluated: self.config.companies.len(),
3670                            });
3671                    }
3672
3673                    // Set coherence passes based on balance validation
3674                    eval.coherence.passes = balance_validation.is_balanced;
3675                    if !balance_validation.is_balanced {
3676                        eval.coherence
3677                            .failures
3678                            .push("Balance sheet equation not satisfied".to_string());
3679                    }
3680
3681                    // Set statistical score based on entry count (basic sanity)
3682                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3683                    eval.statistical.passes = !entries.is_empty();
3684
3685                    // Set quality score from data quality stats
3686                    eval.quality.overall_score = 0.9; // Default high for generated data
3687                    eval.quality.passes = true;
3688
3689                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3690                    info!(
3691                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3692                        profile_name, result.gates_passed, result.gates_total, result.summary
3693                    );
3694                    Some(result)
3695                }
3696                None => {
3697                    warn!(
3698                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3699                        profile_name
3700                    );
3701                    None
3702                }
3703            }
3704        } else {
3705            None
3706        };
3707
3708        // Generate internal controls if enabled
3709        let internal_controls = if self.config.internal_controls.enabled {
3710            InternalControl::standard_controls()
3711        } else {
3712            Vec::new()
3713        };
3714
3715        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3716        // phases (including fraud-bias sweep at Phase 20b) so derived
3717        // outputs reflect final data.
3718        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3719
3720        // v3.5.1: statistical validation over the final amount
3721        // distribution. Runs *after* all JE-adding phases so the report
3722        // reflects everything the user will see in the output. Returns
3723        // `None` unless `distributions.validation.enabled = true`.
3724        let statistical_validation = self.phase_statistical_validation(&entries)?;
3725
3726        // v4.1.3+: interconnectivity snapshot — tier assignments,
3727        // value-segment labels, industry-specific metadata. Runs after
3728        // master data is settled so it can index stable IDs.
3729        let interconnectivity = self.phase_interconnectivity();
3730
3731        Ok(EnhancedGenerationResult {
3732            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3733            master_data: std::mem::take(&mut self.master_data),
3734            document_flows,
3735            subledger,
3736            ocpm,
3737            audit,
3738            banking,
3739            graph_export,
3740            sourcing,
3741            financial_reporting,
3742            hr,
3743            accounting_standards,
3744            manufacturing: manufacturing_snap,
3745            sales_kpi_budgets,
3746            tax,
3747            esg: esg_snap,
3748            treasury,
3749            project_accounting,
3750            process_evolution,
3751            organizational_events,
3752            disruption_events,
3753            intercompany,
3754            journal_entries: entries,
3755            anomaly_labels,
3756            balance_validation,
3757            data_quality_stats,
3758            quality_issues,
3759            statistics: stats,
3760            lineage: Some(lineage),
3761            gate_result,
3762            internal_controls,
3763            sod_violations,
3764            opening_balances,
3765            subledger_reconciliation,
3766            counterfactual_pairs,
3767            red_flags,
3768            collusion_rings,
3769            temporal_vendor_chains,
3770            entity_relationship_graph,
3771            cross_process_links,
3772            industry_output,
3773            compliance_regulations,
3774            analytics_metadata,
3775            statistical_validation,
3776            interconnectivity,
3777        })
3778    }
3779
3780    /// v4.1.3+: populate the interconnectivity snapshot from
3781    /// previously-inert schema sections. Empty when all sections are
3782    /// disabled.
3783    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3784        use rand::{RngExt, SeedableRng};
3785        use rand_chacha::ChaCha8Rng;
3786
3787        let mut snap = InterconnectivitySnapshot::default();
3788        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3789
3790        // --- Vendor network ---
3791        let vn = &self.config.vendor_network;
3792        if vn.enabled {
3793            let total = self.master_data.vendors.len();
3794            if total > 0 {
3795                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3796                let remaining_after_t1 = total.saturating_sub(tier1_count);
3797                let depth = vn.depth.clamp(1, 3);
3798                let tier2_count = if depth >= 2 {
3799                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3800                    (tier1_count * avg).min(remaining_after_t1)
3801                } else {
3802                    0
3803                };
3804                let tier3_count = total
3805                    .saturating_sub(tier1_count)
3806                    .saturating_sub(tier2_count);
3807
3808                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3809                    let tier = if idx < tier1_count {
3810                        1
3811                    } else if idx < tier1_count + tier2_count {
3812                        2
3813                    } else {
3814                        3
3815                    };
3816                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3817
3818                    // Cluster assignment via configured ratios.
3819                    let cl = &vn.clusters;
3820                    let roll: f64 = rng.random();
3821                    let cluster = if roll < cl.reliable_strategic {
3822                        "reliable_strategic"
3823                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3824                        "standard_operational"
3825                    } else if roll
3826                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3827                    {
3828                        "transactional"
3829                    } else {
3830                        "problematic"
3831                    };
3832                    snap.vendor_clusters
3833                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3834                }
3835                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3836            }
3837        }
3838
3839        // --- Customer segmentation ---
3840        let cs = &self.config.customer_segmentation;
3841        if cs.enabled {
3842            let seg = &cs.value_segments;
3843            for customer in &self.master_data.customers {
3844                let roll: f64 = rng.random();
3845                let value_segment = if roll < seg.enterprise.customer_share {
3846                    "enterprise"
3847                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3848                    "mid_market"
3849                } else if roll
3850                    < seg.enterprise.customer_share
3851                        + seg.mid_market.customer_share
3852                        + seg.smb.customer_share
3853                {
3854                    "smb"
3855                } else {
3856                    "consumer"
3857                };
3858                snap.customer_value_segments
3859                    .push((customer.customer_id.clone(), value_segment.to_string()));
3860
3861                let roll2: f64 = rng.random();
3862                let life = &cs.lifecycle;
3863                let lifecycle = if roll2 < life.prospect_rate {
3864                    "prospect"
3865                } else if roll2 < life.prospect_rate + life.new_rate {
3866                    "new"
3867                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3868                    "growth"
3869                } else if roll2
3870                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3871                {
3872                    "mature"
3873                } else if roll2
3874                    < life.prospect_rate
3875                        + life.new_rate
3876                        + life.growth_rate
3877                        + life.mature_rate
3878                        + life.at_risk_rate
3879                {
3880                    "at_risk"
3881                } else if roll2
3882                    < life.prospect_rate
3883                        + life.new_rate
3884                        + life.growth_rate
3885                        + life.mature_rate
3886                        + life.at_risk_rate
3887                        + life.churned_rate
3888                {
3889                    "churned"
3890                } else {
3891                    "won_back"
3892                };
3893                snap.customer_lifecycle_stages
3894                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3895            }
3896        }
3897
3898        // --- Industry-specific metadata (minimal) ---
3899        let is = &self.config.industry_specific;
3900        if is.enabled {
3901            snap.industry_metadata.push(format!(
3902                "industry_specific.enabled=true (industry={:?})",
3903                self.config.global.industry
3904            ));
3905        }
3906
3907        snap
3908    }
3909
3910    // ========================================================================
3911    // Generation Phase Methods
3912    // ========================================================================
3913
3914    /// Phase 1: Generate Chart of Accounts and update statistics.
3915    fn phase_chart_of_accounts(
3916        &mut self,
3917        stats: &mut EnhancedGenerationStatistics,
3918    ) -> SynthResult<Arc<ChartOfAccounts>> {
3919        info!("Phase 1: Generating Chart of Accounts");
3920        let coa = self.generate_coa()?;
3921        stats.accounts_count = coa.account_count();
3922        info!(
3923            "Chart of Accounts generated: {} accounts",
3924            stats.accounts_count
3925        );
3926        self.check_resources_with_log("post-coa")?;
3927        Ok(coa)
3928    }
3929
3930    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3931    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3932        if self.phase_config.generate_master_data {
3933            info!("Phase 2: Generating Master Data");
3934            self.generate_master_data()?;
3935            stats.vendor_count = self.master_data.vendors.len();
3936            stats.customer_count = self.master_data.customers.len();
3937            stats.material_count = self.master_data.materials.len();
3938            stats.asset_count = self.master_data.assets.len();
3939            stats.employee_count = self.master_data.employees.len();
3940            info!(
3941                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3942                stats.vendor_count, stats.customer_count, stats.material_count,
3943                stats.asset_count, stats.employee_count
3944            );
3945            self.check_resources_with_log("post-master-data")?;
3946        } else {
3947            debug!("Phase 2: Skipped (master data generation disabled)");
3948        }
3949        Ok(())
3950    }
3951
3952    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3953    fn phase_document_flows(
3954        &mut self,
3955        stats: &mut EnhancedGenerationStatistics,
3956    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3957        let mut document_flows = DocumentFlowSnapshot::default();
3958        let mut subledger = SubledgerSnapshot::default();
3959        // Dunning JEs (interest + charges) accumulated here and merged into the
3960        // main FA-JE list below so they appear in the GL.
3961        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3962
3963        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3964            info!("Phase 3: Generating Document Flows");
3965            self.generate_document_flows(&mut document_flows)?;
3966            stats.p2p_chain_count = document_flows.p2p_chains.len();
3967            stats.o2c_chain_count = document_flows.o2c_chains.len();
3968            info!(
3969                "Document flows generated: {} P2P chains, {} O2C chains",
3970                stats.p2p_chain_count, stats.o2c_chain_count
3971            );
3972
3973            // Phase 3b: Link document flows to subledgers (for data coherence)
3974            debug!("Phase 3b: Linking document flows to subledgers");
3975            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3976            stats.ap_invoice_count = subledger.ap_invoices.len();
3977            stats.ar_invoice_count = subledger.ar_invoices.len();
3978            debug!(
3979                "Subledgers linked: {} AP invoices, {} AR invoices",
3980                stats.ap_invoice_count, stats.ar_invoice_count
3981            );
3982
3983            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3984            // Without this step the subledger is systematically overstated because
3985            // amount_remaining is set at invoice creation and never reduced by
3986            // the payments that were generated in the document-flow phase.
3987            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3988            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3989            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3990            debug!("Payment settlements applied to AP and AR subledgers");
3991
3992            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3993            // The as-of date is the last day of the configured period.
3994            if let Ok(start_date) =
3995                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3996            {
3997                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3998                    - chrono::Days::new(1);
3999                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4000                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4001                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4002                // derived from JE-level aggregation and will typically differ. This is a known
4003                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4004                // generated independently. A future reconciliation phase should align them by
4005                // using subledger totals as the authoritative source for BS Receivables.
4006                for company in &self.config.companies {
4007                    let ar_report = ARAgingReport::from_invoices(
4008                        company.code.clone(),
4009                        &subledger.ar_invoices,
4010                        as_of_date,
4011                    );
4012                    subledger.ar_aging_reports.push(ar_report);
4013
4014                    let ap_report = APAgingReport::from_invoices(
4015                        company.code.clone(),
4016                        &subledger.ap_invoices,
4017                        as_of_date,
4018                    );
4019                    subledger.ap_aging_reports.push(ap_report);
4020                }
4021                debug!(
4022                    "AR/AP aging reports built: {} AR, {} AP",
4023                    subledger.ar_aging_reports.len(),
4024                    subledger.ap_aging_reports.len()
4025                );
4026
4027                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4028                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4029                {
4030                    use datasynth_generators::DunningGenerator;
4031                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4032                    for company in &self.config.companies {
4033                        let currency = company.currency.as_str();
4034                        // Collect mutable references to AR invoices for this company
4035                        // (dunning generator updates dunning_info on invoices in-place).
4036                        let mut company_invoices: Vec<
4037                            datasynth_core::models::subledger::ar::ARInvoice,
4038                        > = subledger
4039                            .ar_invoices
4040                            .iter()
4041                            .filter(|inv| inv.company_code == company.code)
4042                            .cloned()
4043                            .collect();
4044
4045                        if company_invoices.is_empty() {
4046                            continue;
4047                        }
4048
4049                        let result = dunning_gen.execute_dunning_run(
4050                            &company.code,
4051                            as_of_date,
4052                            &mut company_invoices,
4053                            currency,
4054                        );
4055
4056                        // Write back updated dunning info to the main AR invoice list
4057                        for updated in &company_invoices {
4058                            if let Some(orig) = subledger
4059                                .ar_invoices
4060                                .iter_mut()
4061                                .find(|i| i.invoice_number == updated.invoice_number)
4062                            {
4063                                orig.dunning_info = updated.dunning_info.clone();
4064                            }
4065                        }
4066
4067                        subledger.dunning_runs.push(result.dunning_run);
4068                        subledger.dunning_letters.extend(result.letters);
4069                        // Dunning JEs (interest + charges) collected into local buffer.
4070                        dunning_journal_entries.extend(result.journal_entries);
4071                    }
4072                    debug!(
4073                        "Dunning runs complete: {} runs, {} letters",
4074                        subledger.dunning_runs.len(),
4075                        subledger.dunning_letters.len()
4076                    );
4077                }
4078            }
4079
4080            self.check_resources_with_log("post-document-flows")?;
4081        } else {
4082            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4083        }
4084
4085        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4086        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4087        if !self.master_data.assets.is_empty() {
4088            debug!("Generating FA subledger records");
4089            let company_code = self
4090                .config
4091                .companies
4092                .first()
4093                .map(|c| c.code.as_str())
4094                .unwrap_or("1000");
4095            let currency = self
4096                .config
4097                .companies
4098                .first()
4099                .map(|c| c.currency.as_str())
4100                .unwrap_or("USD");
4101
4102            let mut fa_gen = datasynth_generators::FAGenerator::new(
4103                datasynth_generators::FAGeneratorConfig::default(),
4104                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4105            );
4106
4107            for asset in &self.master_data.assets {
4108                let (record, je) = fa_gen.generate_asset_acquisition(
4109                    company_code,
4110                    &format!("{:?}", asset.asset_class),
4111                    &asset.description,
4112                    asset.acquisition_date,
4113                    currency,
4114                    asset.cost_center.as_deref(),
4115                );
4116                subledger.fa_records.push(record);
4117                fa_journal_entries.push(je);
4118            }
4119
4120            stats.fa_subledger_count = subledger.fa_records.len();
4121            debug!(
4122                "FA subledger records generated: {} (with {} acquisition JEs)",
4123                stats.fa_subledger_count,
4124                fa_journal_entries.len()
4125            );
4126        }
4127
4128        // Generate Inventory subledger records from master data materials
4129        if !self.master_data.materials.is_empty() {
4130            debug!("Generating Inventory subledger records");
4131            let first_company = self.config.companies.first();
4132            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4133            let inv_currency = first_company
4134                .map(|c| c.currency.clone())
4135                .unwrap_or_else(|| "USD".to_string());
4136
4137            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4138                datasynth_generators::InventoryGeneratorConfig::default(),
4139                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4140                inv_currency.clone(),
4141            );
4142
4143            for (i, material) in self.master_data.materials.iter().enumerate() {
4144                let plant = format!("PLANT{:02}", (i % 3) + 1);
4145                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4146                let initial_qty = rust_decimal::Decimal::from(
4147                    material
4148                        .safety_stock
4149                        .to_string()
4150                        .parse::<i64>()
4151                        .unwrap_or(100),
4152                );
4153
4154                let position = inv_gen.generate_position(
4155                    company_code,
4156                    &plant,
4157                    &storage_loc,
4158                    &material.material_id,
4159                    &material.description,
4160                    initial_qty,
4161                    Some(material.standard_cost),
4162                    &inv_currency,
4163                );
4164                subledger.inventory_positions.push(position);
4165            }
4166
4167            stats.inventory_subledger_count = subledger.inventory_positions.len();
4168            debug!(
4169                "Inventory subledger records generated: {}",
4170                stats.inventory_subledger_count
4171            );
4172        }
4173
4174        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4175        if !subledger.fa_records.is_empty() {
4176            if let Ok(start_date) =
4177                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4178            {
4179                let company_code = self
4180                    .config
4181                    .companies
4182                    .first()
4183                    .map(|c| c.code.as_str())
4184                    .unwrap_or("1000");
4185                let fiscal_year = start_date.year();
4186                let start_period = start_date.month();
4187                let end_period =
4188                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4189
4190                let depr_cfg = FaDepreciationScheduleConfig {
4191                    fiscal_year,
4192                    start_period,
4193                    end_period,
4194                    seed_offset: 800,
4195                };
4196                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4197                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4198                let run_count = runs.len();
4199                subledger.depreciation_runs = runs;
4200                debug!(
4201                    "Depreciation runs generated: {} runs for {} periods",
4202                    run_count, self.config.global.period_months
4203                );
4204            }
4205        }
4206
4207        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4208        if !subledger.inventory_positions.is_empty() {
4209            if let Ok(start_date) =
4210                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4211            {
4212                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4213                    - chrono::Days::new(1);
4214
4215                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4216                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4217
4218                for company in &self.config.companies {
4219                    let result = inv_val_gen.generate(
4220                        &company.code,
4221                        &subledger.inventory_positions,
4222                        as_of_date,
4223                    );
4224                    subledger.inventory_valuations.push(result);
4225                }
4226                debug!(
4227                    "Inventory valuations generated: {} company reports",
4228                    subledger.inventory_valuations.len()
4229                );
4230            }
4231        }
4232
4233        Ok((document_flows, subledger, fa_journal_entries))
4234    }
4235
4236    /// Phase 3c: Generate OCPM events from document flows.
4237    #[allow(clippy::too_many_arguments)]
4238    fn phase_ocpm_events(
4239        &mut self,
4240        document_flows: &DocumentFlowSnapshot,
4241        sourcing: &SourcingSnapshot,
4242        hr: &HrSnapshot,
4243        manufacturing: &ManufacturingSnapshot,
4244        banking: &BankingSnapshot,
4245        audit: &AuditSnapshot,
4246        financial_reporting: &FinancialReportingSnapshot,
4247        stats: &mut EnhancedGenerationStatistics,
4248    ) -> SynthResult<OcpmSnapshot> {
4249        let degradation = self.check_resources()?;
4250        if degradation >= DegradationLevel::Reduced {
4251            debug!(
4252                "Phase skipped due to resource pressure (degradation: {:?})",
4253                degradation
4254            );
4255            return Ok(OcpmSnapshot::default());
4256        }
4257        if self.phase_config.generate_ocpm_events {
4258            info!("Phase 3c: Generating OCPM Events");
4259            let ocpm_snapshot = self.generate_ocpm_events(
4260                document_flows,
4261                sourcing,
4262                hr,
4263                manufacturing,
4264                banking,
4265                audit,
4266                financial_reporting,
4267            )?;
4268            stats.ocpm_event_count = ocpm_snapshot.event_count;
4269            stats.ocpm_object_count = ocpm_snapshot.object_count;
4270            stats.ocpm_case_count = ocpm_snapshot.case_count;
4271            info!(
4272                "OCPM events generated: {} events, {} objects, {} cases",
4273                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4274            );
4275            self.check_resources_with_log("post-ocpm")?;
4276            Ok(ocpm_snapshot)
4277        } else {
4278            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4279            Ok(OcpmSnapshot::default())
4280        }
4281    }
4282
4283    /// Phase 4: Generate journal entries from document flows and standalone generation.
4284    fn phase_journal_entries(
4285        &mut self,
4286        coa: &Arc<ChartOfAccounts>,
4287        document_flows: &DocumentFlowSnapshot,
4288        _stats: &mut EnhancedGenerationStatistics,
4289    ) -> SynthResult<Vec<JournalEntry>> {
4290        let mut entries = Vec::new();
4291
4292        // Phase 4a: Generate JEs from document flows (for data coherence)
4293        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4294            debug!("Phase 4a: Generating JEs from document flows");
4295            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4296            debug!("Generated {} JEs from document flows", flow_entries.len());
4297            entries.extend(flow_entries);
4298        }
4299
4300        // Phase 4b: Generate standalone journal entries
4301        if self.phase_config.generate_journal_entries {
4302            info!("Phase 4: Generating Journal Entries");
4303            let je_entries = self.generate_journal_entries(coa)?;
4304            info!("Generated {} standalone journal entries", je_entries.len());
4305            entries.extend(je_entries);
4306        } else {
4307            debug!("Phase 4: Skipped (journal entry generation disabled)");
4308        }
4309
4310        // Phase 4c (shard mode): inject pre-built IC journal entries from
4311        // `ShardContext`. When running standalone (no group engine), this
4312        // is a no-op. See crate::shard_context::ShardContext for rationale.
4313        if let Some(ctx) = &self.shard_context {
4314            if !ctx.extra_journal_entries.is_empty() {
4315                debug!(
4316                    "Phase 4c: appending {} shard-mode IC journal entries",
4317                    ctx.extra_journal_entries.len()
4318                );
4319                entries.extend(ctx.extra_journal_entries.iter().cloned());
4320            }
4321        }
4322
4323        if !entries.is_empty() {
4324            // Note: stats.total_entries/total_line_items are set in generate()
4325            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4326            self.check_resources_with_log("post-journal-entries")?;
4327        }
4328
4329        Ok(entries)
4330    }
4331
4332    /// Phase 5: Inject anomalies into journal entries.
4333    fn phase_anomaly_injection(
4334        &mut self,
4335        entries: &mut [JournalEntry],
4336        actions: &DegradationActions,
4337        stats: &mut EnhancedGenerationStatistics,
4338    ) -> SynthResult<AnomalyLabels> {
4339        if self.phase_config.inject_anomalies
4340            && !entries.is_empty()
4341            && !actions.skip_anomaly_injection
4342        {
4343            info!("Phase 5: Injecting Anomalies");
4344            let result = self.inject_anomalies(entries)?;
4345            stats.anomalies_injected = result.labels.len();
4346            info!("Injected {} anomalies", stats.anomalies_injected);
4347            self.check_resources_with_log("post-anomaly-injection")?;
4348            Ok(result)
4349        } else if actions.skip_anomaly_injection {
4350            warn!("Phase 5: Skipped due to resource degradation");
4351            Ok(AnomalyLabels::default())
4352        } else {
4353            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4354            Ok(AnomalyLabels::default())
4355        }
4356    }
4357
4358    /// Phase 6: Validate balance sheet equation on journal entries.
4359    fn phase_balance_validation(
4360        &mut self,
4361        entries: &[JournalEntry],
4362    ) -> SynthResult<BalanceValidationResult> {
4363        if self.phase_config.validate_balances && !entries.is_empty() {
4364            debug!("Phase 6: Validating Balances");
4365            let balance_validation = self.validate_journal_entries(entries)?;
4366            if balance_validation.is_balanced {
4367                debug!("Balance validation passed");
4368            } else {
4369                warn!(
4370                    "Balance validation found {} errors",
4371                    balance_validation.validation_errors.len()
4372                );
4373            }
4374            Ok(balance_validation)
4375        } else {
4376            Ok(BalanceValidationResult::default())
4377        }
4378    }
4379
4380    /// Validate that every `gl_account` referenced in `entries` exists in the
4381    /// chart of accounts.
4382    ///
4383    /// Always emits a warn-level log when the COA is missing accounts; in
4384    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4385    /// `SynthError::generation` so the caller can fail fast.
4386    fn validate_coa_coverage(
4387        &self,
4388        entries: &[JournalEntry],
4389        coa: &ChartOfAccounts,
4390    ) -> SynthResult<()> {
4391        if entries.is_empty() {
4392            return Ok(());
4393        }
4394        let coa_set: std::collections::HashSet<&str> = coa
4395            .accounts
4396            .iter()
4397            .map(|a| a.account_number.as_str())
4398            .collect();
4399        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4400        for je in entries {
4401            for line in je.lines.iter() {
4402                if !coa_set.contains(line.gl_account.as_str()) {
4403                    missing.insert(line.gl_account.clone());
4404                }
4405            }
4406        }
4407        if missing.is_empty() {
4408            debug!("COA coverage validation passed");
4409            return Ok(());
4410        }
4411        let msg = format!(
4412            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4413            missing.len(),
4414            missing.iter().take(10).collect::<Vec<_>>()
4415        );
4416        if self.phase_config.validate_coa_coverage_strict {
4417            Err(SynthError::generation(msg))
4418        } else {
4419            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4420            Ok(())
4421        }
4422    }
4423
4424    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4425    fn phase_data_quality_injection(
4426        &mut self,
4427        entries: &mut [JournalEntry],
4428        actions: &DegradationActions,
4429        stats: &mut EnhancedGenerationStatistics,
4430    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4431        if self.phase_config.inject_data_quality
4432            && !entries.is_empty()
4433            && !actions.skip_data_quality
4434        {
4435            info!("Phase 7: Injecting Data Quality Variations");
4436            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4437            stats.data_quality_issues = dq_stats.records_with_issues;
4438            info!("Injected {} data quality issues", stats.data_quality_issues);
4439            self.check_resources_with_log("post-data-quality")?;
4440            Ok((dq_stats, quality_issues))
4441        } else if actions.skip_data_quality {
4442            warn!("Phase 7: Skipped due to resource degradation");
4443            // v4.4.1: report the denominator (entries seen) even when
4444            // injection is skipped, so downstream consumers can tell
4445            // "skipped, 0/N" apart from "ran but found nothing".
4446            Ok((stats_with_denominator(entries.len()), Vec::new()))
4447        } else {
4448            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4449            Ok((stats_with_denominator(entries.len()), Vec::new()))
4450        }
4451    }
4452
4453    /// Phase 10b: Generate period-close journal entries.
4454    ///
4455    /// Generates:
4456    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4457    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4458    ///    for the configured period.
4459    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4460    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4461    ///    earnings via the Income Summary (3600) clearing account.
4462    fn phase_period_close(
4463        &mut self,
4464        entries: &mut Vec<JournalEntry>,
4465        subledger: &SubledgerSnapshot,
4466        stats: &mut EnhancedGenerationStatistics,
4467    ) -> SynthResult<()> {
4468        if !self.phase_config.generate_period_close || entries.is_empty() {
4469            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4470            return Ok(());
4471        }
4472
4473        info!("Phase 10b: Generating period-close journal entries");
4474
4475        use datasynth_core::accounts::{
4476            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4477        };
4478        use rust_decimal::Decimal;
4479
4480        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4481            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4482        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4483        // Posting date for close entries is the last day of the period
4484        let close_date = end_date - chrono::Days::new(1);
4485
4486        // Statutory tax rate (21% — configurable rates come in later tiers)
4487        let tax_rate = Decimal::new(21, 2); // 0.21
4488
4489        // Collect company codes from config
4490        let company_codes: Vec<String> = self
4491            .config
4492            .companies
4493            .iter()
4494            .map(|c| c.code.clone())
4495            .collect();
4496
4497        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4498        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4499        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4500
4501        // --- Depreciation JEs (per asset) ---
4502        // Compute period depreciation for each active fixed asset using straight-line method.
4503        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4504        let period_months = self.config.global.period_months;
4505        for asset in &subledger.fa_records {
4506            // Skip assets that are inactive / fully depreciated / non-depreciable
4507            use datasynth_core::models::subledger::fa::AssetStatus;
4508            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4509                continue;
4510            }
4511            let useful_life_months = asset.useful_life_months();
4512            if useful_life_months == 0 {
4513                // Land or CIP — not depreciated
4514                continue;
4515            }
4516            let salvage_value = asset.salvage_value();
4517            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4518            if depreciable_base == Decimal::ZERO {
4519                continue;
4520            }
4521            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4522                * Decimal::from(period_months))
4523            .round_dp(2);
4524            if period_depr <= Decimal::ZERO {
4525                continue;
4526            }
4527
4528            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4529            depr_header.document_type = "CL".to_string();
4530            depr_header.header_text = Some(format!(
4531                "Depreciation - {} {}",
4532                asset.asset_number, asset.description
4533            ));
4534            depr_header.created_by = "CLOSE_ENGINE".to_string();
4535            depr_header.source = TransactionSource::Automated;
4536            depr_header.business_process = Some(BusinessProcess::R2R);
4537
4538            let doc_id = depr_header.document_id;
4539            let mut depr_je = JournalEntry::new(depr_header);
4540
4541            // DR Depreciation Expense (6000)
4542            depr_je.add_line(JournalEntryLine::debit(
4543                doc_id,
4544                1,
4545                expense_accounts::DEPRECIATION.to_string(),
4546                period_depr,
4547            ));
4548            // CR Accumulated Depreciation (1510)
4549            depr_je.add_line(JournalEntryLine::credit(
4550                doc_id,
4551                2,
4552                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4553                period_depr,
4554            ));
4555
4556            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4557            close_jes.push(depr_je);
4558        }
4559
4560        if !subledger.fa_records.is_empty() {
4561            debug!(
4562                "Generated {} depreciation JEs from {} FA records",
4563                close_jes.len(),
4564                subledger.fa_records.len()
4565            );
4566        }
4567
4568        // --- Accrual entries (standard period-end accruals per company) ---
4569        // Generate standard accrued expense entries (utilities, rent, interest) using
4570        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4571        {
4572            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4573            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4574            // v3.4.3: snap reversal dates to business days. No-op when
4575            // temporal_patterns.business_days is disabled.
4576            if let Some(ctx) = &self.temporal_context {
4577                accrual_gen.set_temporal_context(Arc::clone(ctx));
4578            }
4579
4580            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4581            let accrual_items: &[(&str, &str, &str)] = &[
4582                ("Accrued Utilities", "6200", "2100"),
4583                ("Accrued Rent", "6300", "2100"),
4584                ("Accrued Interest", "6100", "2150"),
4585            ];
4586
4587            for company_code in &company_codes {
4588                // Estimate company revenue from existing JEs
4589                let company_revenue: Decimal = entries
4590                    .iter()
4591                    .filter(|e| e.header.company_code == *company_code)
4592                    .flat_map(|e| e.lines.iter())
4593                    .filter(|l| l.gl_account.starts_with('4'))
4594                    .map(|l| l.credit_amount - l.debit_amount)
4595                    .fold(Decimal::ZERO, |acc, v| acc + v);
4596
4597                if company_revenue <= Decimal::ZERO {
4598                    continue;
4599                }
4600
4601                // Use 0.5% of period revenue per accrual item as a proxy
4602                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4603                if accrual_base <= Decimal::ZERO {
4604                    continue;
4605                }
4606
4607                for (description, expense_acct, liability_acct) in accrual_items {
4608                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4609                        company_code,
4610                        description,
4611                        accrual_base,
4612                        expense_acct,
4613                        liability_acct,
4614                        close_date,
4615                        None,
4616                    );
4617                    close_jes.push(accrual_je);
4618                    if let Some(rev_je) = reversal_je {
4619                        close_jes.push(rev_je);
4620                    }
4621                }
4622            }
4623
4624            debug!(
4625                "Generated accrual entries for {} companies",
4626                company_codes.len()
4627            );
4628        }
4629
4630        for company_code in &company_codes {
4631            // Calculate net income for this company from existing JEs:
4632            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4633            // Revenue (4xxx): credit-normal, so net = credits - debits
4634            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4635            let mut total_revenue = Decimal::ZERO;
4636            let mut total_expenses = Decimal::ZERO;
4637
4638            for entry in entries.iter() {
4639                if entry.header.company_code != *company_code {
4640                    continue;
4641                }
4642                for line in &entry.lines {
4643                    let category = AccountCategory::from_account(&line.gl_account);
4644                    match category {
4645                        AccountCategory::Revenue => {
4646                            // Revenue is credit-normal: net revenue = credits - debits
4647                            total_revenue += line.credit_amount - line.debit_amount;
4648                        }
4649                        AccountCategory::Cogs
4650                        | AccountCategory::OperatingExpense
4651                        | AccountCategory::OtherIncomeExpense
4652                        | AccountCategory::Tax => {
4653                            // Expenses are debit-normal: net expense = debits - credits
4654                            total_expenses += line.debit_amount - line.credit_amount;
4655                        }
4656                        _ => {}
4657                    }
4658                }
4659            }
4660
4661            let pre_tax_income = total_revenue - total_expenses;
4662
4663            // Skip if no income statement activity
4664            if pre_tax_income == Decimal::ZERO {
4665                debug!(
4666                    "Company {}: no pre-tax income, skipping period close",
4667                    company_code
4668                );
4669                continue;
4670            }
4671
4672            // --- Tax provision / DTA JE ---
4673            if pre_tax_income > Decimal::ZERO {
4674                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4675                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4676
4677                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4678                tax_header.document_type = "CL".to_string();
4679                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4680                tax_header.created_by = "CLOSE_ENGINE".to_string();
4681                tax_header.source = TransactionSource::Automated;
4682                tax_header.business_process = Some(BusinessProcess::R2R);
4683
4684                let doc_id = tax_header.document_id;
4685                let mut tax_je = JournalEntry::new(tax_header);
4686
4687                // DR Tax Expense (8000)
4688                tax_je.add_line(JournalEntryLine::debit(
4689                    doc_id,
4690                    1,
4691                    tax_accounts::TAX_EXPENSE.to_string(),
4692                    tax_amount,
4693                ));
4694                // CR Income Tax Payable (2130)
4695                tax_je.add_line(JournalEntryLine::credit(
4696                    doc_id,
4697                    2,
4698                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4699                    tax_amount,
4700                ));
4701
4702                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4703                close_jes.push(tax_je);
4704            } else {
4705                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4706                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4707                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4708                if dta_amount > Decimal::ZERO {
4709                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4710                    dta_header.document_type = "CL".to_string();
4711                    dta_header.header_text =
4712                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4713                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4714                    dta_header.source = TransactionSource::Automated;
4715                    dta_header.business_process = Some(BusinessProcess::R2R);
4716
4717                    let doc_id = dta_header.document_id;
4718                    let mut dta_je = JournalEntry::new(dta_header);
4719
4720                    // DR Deferred Tax Asset (1600)
4721                    dta_je.add_line(JournalEntryLine::debit(
4722                        doc_id,
4723                        1,
4724                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4725                        dta_amount,
4726                    ));
4727                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4728                    // reflecting the benefit of the future deductible temporary difference.
4729                    dta_je.add_line(JournalEntryLine::credit(
4730                        doc_id,
4731                        2,
4732                        tax_accounts::TAX_EXPENSE.to_string(),
4733                        dta_amount,
4734                    ));
4735
4736                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4737                    close_jes.push(dta_je);
4738                    debug!(
4739                        "Company {}: loss year — recognised DTA of {}",
4740                        company_code, dta_amount
4741                    );
4742                }
4743            }
4744
4745            // --- Dividend JEs (v2.4) ---
4746            // If the entity is profitable after tax, declare a 10% dividend payout.
4747            // This runs AFTER tax provision so the dividend is based on post-tax income
4748            // but BEFORE the retained earnings close so the RE transfer reflects the
4749            // reduced balance.
4750            let tax_provision = if pre_tax_income > Decimal::ZERO {
4751                (pre_tax_income * tax_rate).round_dp(2)
4752            } else {
4753                Decimal::ZERO
4754            };
4755            let net_income = pre_tax_income - tax_provision;
4756
4757            if net_income > Decimal::ZERO {
4758                use datasynth_generators::DividendGenerator;
4759                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4760                let mut div_gen = DividendGenerator::new(self.seed + 460);
4761                let currency_str = self
4762                    .config
4763                    .companies
4764                    .iter()
4765                    .find(|c| c.code == *company_code)
4766                    .map(|c| c.currency.as_str())
4767                    .unwrap_or("USD");
4768                let div_result = div_gen.generate(
4769                    company_code,
4770                    close_date,
4771                    Decimal::new(1, 0), // $1 per share placeholder
4772                    dividend_amount,
4773                    currency_str,
4774                );
4775                let div_je_count = div_result.journal_entries.len();
4776                close_jes.extend(div_result.journal_entries);
4777                debug!(
4778                    "Company {}: declared dividend of {} ({} JEs)",
4779                    company_code, dividend_amount, div_je_count
4780                );
4781            }
4782
4783            // --- Income statement closing JE ---
4784            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4785            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4786            // close the pre-tax loss into Retained Earnings as-is.
4787            if net_income != Decimal::ZERO {
4788                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4789                close_header.document_type = "CL".to_string();
4790                close_header.header_text =
4791                    Some(format!("Income statement close - {}", company_code));
4792                close_header.created_by = "CLOSE_ENGINE".to_string();
4793                close_header.source = TransactionSource::Automated;
4794                close_header.business_process = Some(BusinessProcess::R2R);
4795
4796                let doc_id = close_header.document_id;
4797                let mut close_je = JournalEntry::new(close_header);
4798
4799                let abs_net_income = net_income.abs();
4800
4801                if net_income > Decimal::ZERO {
4802                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4803                    close_je.add_line(JournalEntryLine::debit(
4804                        doc_id,
4805                        1,
4806                        equity_accounts::INCOME_SUMMARY.to_string(),
4807                        abs_net_income,
4808                    ));
4809                    close_je.add_line(JournalEntryLine::credit(
4810                        doc_id,
4811                        2,
4812                        equity_accounts::RETAINED_EARNINGS.to_string(),
4813                        abs_net_income,
4814                    ));
4815                } else {
4816                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4817                    close_je.add_line(JournalEntryLine::debit(
4818                        doc_id,
4819                        1,
4820                        equity_accounts::RETAINED_EARNINGS.to_string(),
4821                        abs_net_income,
4822                    ));
4823                    close_je.add_line(JournalEntryLine::credit(
4824                        doc_id,
4825                        2,
4826                        equity_accounts::INCOME_SUMMARY.to_string(),
4827                        abs_net_income,
4828                    ));
4829                }
4830
4831                debug_assert!(
4832                    close_je.is_balanced(),
4833                    "Income statement closing JE must be balanced"
4834                );
4835                close_jes.push(close_je);
4836            }
4837        }
4838
4839        let close_count = close_jes.len();
4840        if close_count > 0 {
4841            info!("Generated {} period-close journal entries", close_count);
4842            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4843            entries.extend(close_jes);
4844            stats.period_close_je_count = close_count;
4845
4846            // Update total entry/line-item stats
4847            stats.total_entries = entries.len() as u64;
4848            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4849        } else {
4850            debug!("No period-close entries generated (no income statement activity)");
4851        }
4852
4853        Ok(())
4854    }
4855
4856    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4857    fn phase_audit_data(
4858        &mut self,
4859        entries: &[JournalEntry],
4860        stats: &mut EnhancedGenerationStatistics,
4861    ) -> SynthResult<AuditSnapshot> {
4862        if self.phase_config.generate_audit {
4863            info!("Phase 8: Generating Audit Data");
4864            let audit_snapshot = self.generate_audit_data(entries)?;
4865            stats.audit_engagement_count = audit_snapshot.engagements.len();
4866            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4867            stats.audit_evidence_count = audit_snapshot.evidence.len();
4868            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4869            stats.audit_finding_count = audit_snapshot.findings.len();
4870            stats.audit_judgment_count = audit_snapshot.judgments.len();
4871            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4872            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4873            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4874            stats.audit_sample_count = audit_snapshot.samples.len();
4875            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4876            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4877            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4878            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4879            stats.audit_related_party_transaction_count =
4880                audit_snapshot.related_party_transactions.len();
4881            info!(
4882                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4883                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4884                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4885                 {} RP transactions",
4886                stats.audit_engagement_count,
4887                stats.audit_workpaper_count,
4888                stats.audit_evidence_count,
4889                stats.audit_risk_count,
4890                stats.audit_finding_count,
4891                stats.audit_judgment_count,
4892                stats.audit_confirmation_count,
4893                stats.audit_procedure_step_count,
4894                stats.audit_sample_count,
4895                stats.audit_analytical_result_count,
4896                stats.audit_ia_function_count,
4897                stats.audit_ia_report_count,
4898                stats.audit_related_party_count,
4899                stats.audit_related_party_transaction_count,
4900            );
4901            self.check_resources_with_log("post-audit")?;
4902            Ok(audit_snapshot)
4903        } else {
4904            debug!("Phase 8: Skipped (audit generation disabled)");
4905            Ok(AuditSnapshot::default())
4906        }
4907    }
4908
4909    /// Phase 9: Generate banking KYC/AML data.
4910    fn phase_banking_data(
4911        &mut self,
4912        stats: &mut EnhancedGenerationStatistics,
4913    ) -> SynthResult<BankingSnapshot> {
4914        if self.phase_config.generate_banking {
4915            info!("Phase 9: Generating Banking KYC/AML Data");
4916            let banking_snapshot = self.generate_banking_data()?;
4917            stats.banking_customer_count = banking_snapshot.customers.len();
4918            stats.banking_account_count = banking_snapshot.accounts.len();
4919            stats.banking_transaction_count = banking_snapshot.transactions.len();
4920            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4921            info!(
4922                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4923                stats.banking_customer_count, stats.banking_account_count,
4924                stats.banking_transaction_count, stats.banking_suspicious_count
4925            );
4926            self.check_resources_with_log("post-banking")?;
4927            Ok(banking_snapshot)
4928        } else {
4929            debug!("Phase 9: Skipped (banking generation disabled)");
4930            Ok(BankingSnapshot::default())
4931        }
4932    }
4933
4934    /// Phase 10: Export accounting network graphs for ML training.
4935    fn phase_graph_export(
4936        &mut self,
4937        entries: &[JournalEntry],
4938        coa: &Arc<ChartOfAccounts>,
4939        stats: &mut EnhancedGenerationStatistics,
4940    ) -> SynthResult<GraphExportSnapshot> {
4941        if self.phase_config.generate_graph_export && !entries.is_empty() {
4942            info!("Phase 10: Exporting Accounting Network Graphs");
4943            match self.export_graphs(entries, coa, stats) {
4944                Ok(snapshot) => {
4945                    info!(
4946                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4947                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4948                    );
4949                    Ok(snapshot)
4950                }
4951                Err(e) => {
4952                    warn!("Phase 10: Graph export failed: {}", e);
4953                    Ok(GraphExportSnapshot::default())
4954                }
4955            }
4956        } else {
4957            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4958            Ok(GraphExportSnapshot::default())
4959        }
4960    }
4961
4962    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4963    #[allow(clippy::too_many_arguments)]
4964    fn phase_hypergraph_export(
4965        &self,
4966        coa: &Arc<ChartOfAccounts>,
4967        entries: &[JournalEntry],
4968        document_flows: &DocumentFlowSnapshot,
4969        sourcing: &SourcingSnapshot,
4970        hr: &HrSnapshot,
4971        manufacturing: &ManufacturingSnapshot,
4972        banking: &BankingSnapshot,
4973        audit: &AuditSnapshot,
4974        financial_reporting: &FinancialReportingSnapshot,
4975        ocpm: &OcpmSnapshot,
4976        compliance: &ComplianceRegulationsSnapshot,
4977        stats: &mut EnhancedGenerationStatistics,
4978    ) -> SynthResult<()> {
4979        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4980            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4981            match self.export_hypergraph(
4982                coa,
4983                entries,
4984                document_flows,
4985                sourcing,
4986                hr,
4987                manufacturing,
4988                banking,
4989                audit,
4990                financial_reporting,
4991                ocpm,
4992                compliance,
4993                stats,
4994            ) {
4995                Ok(info) => {
4996                    info!(
4997                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4998                        info.node_count, info.edge_count, info.hyperedge_count
4999                    );
5000                }
5001                Err(e) => {
5002                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5003                }
5004            }
5005        } else {
5006            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5007        }
5008        Ok(())
5009    }
5010
5011    /// Phase 11: LLM Enrichment.
5012    ///
5013    /// Uses an LLM provider (mock by default) to enrich vendor names with
5014    /// realistic, context-aware names. This phase is non-blocking: failures
5015    /// log a warning but do not stop the generation pipeline.
5016    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5017        if !self.config.llm.enabled {
5018            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5019            return;
5020        }
5021
5022        info!("Phase 11: Starting LLM Enrichment");
5023        let start = std::time::Instant::now();
5024
5025        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5026            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5027            // and the corresponding API key environment variable is present.
5028            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5029                let schema_provider = &self.config.llm.provider;
5030                let api_key_env = match schema_provider.as_str() {
5031                    "openai" => Some("OPENAI_API_KEY"),
5032                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5033                    "custom" => Some("LLM_API_KEY"),
5034                    _ => None,
5035                };
5036                if let Some(key_env) = api_key_env {
5037                    if std::env::var(key_env).is_ok() {
5038                        let llm_config = datasynth_core::llm::LlmConfig {
5039                            model: self.config.llm.model.clone(),
5040                            api_key_env: key_env.to_string(),
5041                            ..datasynth_core::llm::LlmConfig::default()
5042                        };
5043                        match HttpLlmProvider::new(llm_config) {
5044                            Ok(p) => Arc::new(p),
5045                            Err(e) => {
5046                                warn!(
5047                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5048                                    e
5049                                );
5050                                Arc::new(MockLlmProvider::new(self.seed))
5051                            }
5052                        }
5053                    } else {
5054                        Arc::new(MockLlmProvider::new(self.seed))
5055                    }
5056                } else {
5057                    Arc::new(MockLlmProvider::new(self.seed))
5058                }
5059            };
5060            // v4.1.1+: multi-category enrichment. Vendors remain the
5061            // default path; customers and materials opt in via
5062            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5063            let industry = format!("{:?}", self.config.global.industry);
5064
5065            let vendor_enricher =
5066                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5067            let max_vendors = self
5068                .config
5069                .llm
5070                .max_vendor_enrichments
5071                .min(self.master_data.vendors.len());
5072            let mut vendors_enriched = 0usize;
5073            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5074                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5075                    Ok(name) => {
5076                        vendor.name = name;
5077                        vendors_enriched += 1;
5078                    }
5079                    Err(e) => warn!(
5080                        "LLM vendor enrichment failed for {}: {}",
5081                        vendor.vendor_id, e
5082                    ),
5083                }
5084            }
5085
5086            let mut customers_enriched = 0usize;
5087            if self.config.llm.enrich_customers {
5088                let customer_enricher =
5089                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5090                        &provider,
5091                    ));
5092                let max_customers = self
5093                    .config
5094                    .llm
5095                    .max_customer_enrichments
5096                    .min(self.master_data.customers.len());
5097                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5098                    match customer_enricher.enrich_customer_name(
5099                        &industry,
5100                        "general",
5101                        &customer.country,
5102                    ) {
5103                        Ok(name) => {
5104                            customer.name = name;
5105                            customers_enriched += 1;
5106                        }
5107                        Err(e) => warn!(
5108                            "LLM customer enrichment failed for {}: {}",
5109                            customer.customer_id, e
5110                        ),
5111                    }
5112                }
5113            }
5114
5115            let mut materials_enriched = 0usize;
5116            if self.config.llm.enrich_materials {
5117                let material_enricher =
5118                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5119                        &provider,
5120                    ));
5121                let max_materials = self
5122                    .config
5123                    .llm
5124                    .max_material_enrichments
5125                    .min(self.master_data.materials.len());
5126                for material in self.master_data.materials.iter_mut().take(max_materials) {
5127                    let material_type = format!("{:?}", material.material_type);
5128                    match material_enricher.enrich_material_description(&material_type, &industry) {
5129                        Ok(desc) => {
5130                            material.description = desc;
5131                            materials_enriched += 1;
5132                        }
5133                        Err(e) => warn!(
5134                            "LLM material enrichment failed for {}: {}",
5135                            material.material_id, e
5136                        ),
5137                    }
5138                }
5139            }
5140
5141            (vendors_enriched, customers_enriched, materials_enriched)
5142        }));
5143
5144        match result {
5145            Ok((v, c, m)) => {
5146                stats.llm_vendors_enriched = v;
5147                stats.llm_customers_enriched = c;
5148                stats.llm_materials_enriched = m;
5149                let elapsed = start.elapsed();
5150                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5151                info!(
5152                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5153                    v, c, m, stats.llm_enrichment_ms
5154                );
5155            }
5156            Err(_) => {
5157                let elapsed = start.elapsed();
5158                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5159                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5160            }
5161        }
5162    }
5163
5164    /// Phase 12: Diffusion Enhancement.
5165    ///
5166    /// Generates a sample set matching distribution properties from the
5167    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5168    /// - `"statistical"` (default) — moment-matching backend, always fast.
5169    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5170    ///   the `neural` Cargo feature; falls back to statistical when the
5171    ///   feature isn't compiled in, with a loud warning.
5172    ///
5173    /// This phase is non-blocking: failures log a warning but do not
5174    /// stop the pipeline.
5175    fn phase_diffusion_enhancement(
5176        &self,
5177        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5178        stats: &mut EnhancedGenerationStatistics,
5179    ) {
5180        if !self.config.diffusion.enabled {
5181            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5182            return;
5183        }
5184
5185        info!("Phase 12: Starting Diffusion Enhancement");
5186        let start = std::time::Instant::now();
5187
5188        let backend_choice = self.config.diffusion.backend.as_str();
5189        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5190
5191        if use_neural {
5192            #[cfg(feature = "neural")]
5193            {
5194                match self.run_neural_diffusion_phase(entries) {
5195                    Ok(sample_count) => {
5196                        stats.diffusion_samples_generated = sample_count;
5197                        let elapsed = start.elapsed();
5198                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5199                        info!(
5200                            "Phase 12 complete ({}): {} samples in {}ms",
5201                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5202                        );
5203                        return;
5204                    }
5205                    Err(e) => {
5206                        warn!(
5207                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5208                        );
5209                        // Fall through to statistical path below.
5210                    }
5211                }
5212            }
5213            #[cfg(not(feature = "neural"))]
5214            {
5215                warn!(
5216                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5217                     not compiled in — falling back to statistical. Rebuild with \
5218                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5219                    backend_choice
5220                );
5221            }
5222        } else if !matches!(backend_choice, "statistical" | "") {
5223            warn!(
5224                "Phase 12: unknown backend '{}', falling back to statistical",
5225                backend_choice
5226            );
5227        }
5228
5229        // Statistical path (default + fallback).
5230        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5231            let means = vec![5000.0, 3.0, 2.0];
5232            let stds = vec![2000.0, 1.5, 1.0];
5233
5234            let diffusion_config = DiffusionConfig {
5235                n_steps: self.config.diffusion.n_steps,
5236                seed: self.seed,
5237                ..Default::default()
5238            };
5239
5240            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5241            let n_samples = self.config.diffusion.sample_size;
5242            let n_features = 3;
5243            backend.generate(n_samples, n_features, self.seed).len()
5244        }));
5245
5246        match result {
5247            Ok(sample_count) => {
5248                stats.diffusion_samples_generated = sample_count;
5249                let elapsed = start.elapsed();
5250                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5251                info!(
5252                    "Phase 12 complete (statistical): {} samples in {}ms",
5253                    sample_count, stats.diffusion_enhancement_ms
5254                );
5255            }
5256            Err(_) => {
5257                let elapsed = start.elapsed();
5258                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5259                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5260            }
5261        }
5262    }
5263
5264    /// Neural-backend execution — either load a pre-trained checkpoint
5265    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5266    /// from the first batch of JE amounts. Returns the sample count
5267    /// produced; any error bubbles up to the statistical fallback.
5268    #[cfg(feature = "neural")]
5269    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5270        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5271
5272        if entries.is_empty() {
5273            return Err(SynthError::generation(
5274                "neural diffusion: no journal entries available as training data",
5275            ));
5276        }
5277
5278        let training_data: Vec<Vec<f64>> = entries
5279            .iter()
5280            .take(5000)
5281            .map(|je| {
5282                let total_amount: f64 = je
5283                    .lines
5284                    .iter()
5285                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5286                    .map(|l| {
5287                        use rust_decimal::prelude::ToPrimitive;
5288                        l.debit_amount.to_f64().unwrap_or(0.0)
5289                    })
5290                    .sum();
5291                let line_count = je.lines.len() as f64;
5292                // Use the approval-workflow depth as the third feature
5293                // (proxy for complexity / risk). `None` → 1.
5294                let approval_level = je
5295                    .header
5296                    .approval_workflow
5297                    .as_ref()
5298                    .map(|w| w.required_levels as f64)
5299                    .unwrap_or(1.0);
5300                vec![total_amount, line_count, approval_level]
5301            })
5302            .collect();
5303
5304        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5305
5306        let cfg = &self.config.diffusion;
5307        let neural_cfg = &cfg.neural;
5308
5309        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5310            neural_cfg.checkpoint_path.as_ref()
5311        {
5312            let path = std::path::Path::new(ckpt_path);
5313            info!(
5314                "  Neural diffusion: loading checkpoint from {}",
5315                path.display()
5316            );
5317            NeuralDiffusionBackend::load(path)
5318                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5319        } else {
5320            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5321            info!(
5322                "  Neural diffusion: training score network on {} rows × {} features, \
5323                     {} epochs, hidden_dims={:?}",
5324                training_data.len(),
5325                n_features,
5326                neural_cfg.training_epochs,
5327                neural_cfg.hidden_dims
5328            );
5329            let training_config = NeuralTrainingConfig {
5330                n_steps: cfg.n_steps,
5331                schedule: cfg.schedule.clone(),
5332                hidden_dims: neural_cfg.hidden_dims.clone(),
5333                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5334                learning_rate: neural_cfg.learning_rate,
5335                epochs: neural_cfg.training_epochs,
5336                batch_size: neural_cfg.batch_size,
5337            };
5338            let (backend, report) =
5339                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5340                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5341            info!(
5342                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5343                report.epochs_completed, report.final_loss
5344            );
5345            backend
5346        };
5347
5348        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5349        Ok(samples.len())
5350    }
5351
5352    /// Phase 13: Causal Overlay.
5353    ///
5354    /// Builds a structural causal model from a built-in template (e.g.,
5355    /// fraud_detection) and generates causal samples. Optionally validates
5356    /// that the output respects the causal structure. This phase is
5357    /// non-blocking: failures log a warning but do not stop the pipeline.
5358    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5359        if !self.config.causal.enabled {
5360            debug!("Phase 13: Skipped (causal generation disabled)");
5361            return;
5362        }
5363
5364        info!("Phase 13: Starting Causal Overlay");
5365        let start = std::time::Instant::now();
5366
5367        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5368            // Select template based on config
5369            let graph = match self.config.causal.template.as_str() {
5370                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5371                _ => CausalGraph::fraud_detection_template(),
5372            };
5373
5374            let scm = StructuralCausalModel::new(graph.clone())
5375                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5376
5377            let n_samples = self.config.causal.sample_size;
5378            let samples = scm
5379                .generate(n_samples, self.seed)
5380                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5381
5382            // Optionally validate causal structure
5383            let validation_passed = if self.config.causal.validate {
5384                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5385                if report.valid {
5386                    info!(
5387                        "Causal validation passed: all {} checks OK",
5388                        report.checks.len()
5389                    );
5390                } else {
5391                    warn!(
5392                        "Causal validation: {} violations detected: {:?}",
5393                        report.violations.len(),
5394                        report.violations
5395                    );
5396                }
5397                Some(report.valid)
5398            } else {
5399                None
5400            };
5401
5402            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5403        }));
5404
5405        match result {
5406            Ok(Ok((sample_count, validation_passed))) => {
5407                stats.causal_samples_generated = sample_count;
5408                stats.causal_validation_passed = validation_passed;
5409                let elapsed = start.elapsed();
5410                stats.causal_generation_ms = elapsed.as_millis() as u64;
5411                info!(
5412                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5413                    sample_count, stats.causal_generation_ms, validation_passed,
5414                );
5415            }
5416            Ok(Err(e)) => {
5417                let elapsed = start.elapsed();
5418                stats.causal_generation_ms = elapsed.as_millis() as u64;
5419                warn!("Phase 13: Causal generation failed: {}", e);
5420            }
5421            Err(_) => {
5422                let elapsed = start.elapsed();
5423                stats.causal_generation_ms = elapsed.as_millis() as u64;
5424                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5425            }
5426        }
5427    }
5428
5429    /// Phase 14: Generate S2C sourcing data.
5430    fn phase_sourcing_data(
5431        &mut self,
5432        stats: &mut EnhancedGenerationStatistics,
5433    ) -> SynthResult<SourcingSnapshot> {
5434        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5435            debug!("Phase 14: Skipped (sourcing generation disabled)");
5436            return Ok(SourcingSnapshot::default());
5437        }
5438        let degradation = self.check_resources()?;
5439        if degradation >= DegradationLevel::Reduced {
5440            debug!(
5441                "Phase skipped due to resource pressure (degradation: {:?})",
5442                degradation
5443            );
5444            return Ok(SourcingSnapshot::default());
5445        }
5446
5447        info!("Phase 14: Generating S2C Sourcing Data");
5448        let seed = self.seed;
5449
5450        // Gather vendor data from master data
5451        let vendor_ids: Vec<String> = self
5452            .master_data
5453            .vendors
5454            .iter()
5455            .map(|v| v.vendor_id.clone())
5456            .collect();
5457        if vendor_ids.is_empty() {
5458            debug!("Phase 14: Skipped (no vendors available)");
5459            return Ok(SourcingSnapshot::default());
5460        }
5461
5462        let categories: Vec<(String, String)> = vec![
5463            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5464            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5465            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5466            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5467            ("CAT-LOG".to_string(), "Logistics".to_string()),
5468        ];
5469        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5470            .iter()
5471            .map(|(id, name)| {
5472                (
5473                    id.clone(),
5474                    name.clone(),
5475                    rust_decimal::Decimal::from(100_000),
5476                )
5477            })
5478            .collect();
5479
5480        let company_code = self
5481            .config
5482            .companies
5483            .first()
5484            .map(|c| c.code.as_str())
5485            .unwrap_or("1000");
5486        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5487            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5488        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5489        let fiscal_year = start_date.year() as u16;
5490        let owner_ids: Vec<String> = self
5491            .master_data
5492            .employees
5493            .iter()
5494            .take(5)
5495            .map(|e| e.employee_id.clone())
5496            .collect();
5497        let owner_id = owner_ids
5498            .first()
5499            .map(std::string::String::as_str)
5500            .unwrap_or("BUYER-001");
5501
5502        // Step 1: Spend Analysis
5503        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5504        let spend_analyses =
5505            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5506
5507        // Step 2: Sourcing Projects
5508        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5509        let sourcing_projects = if owner_ids.is_empty() {
5510            Vec::new()
5511        } else {
5512            project_gen.generate(
5513                company_code,
5514                &categories_with_spend,
5515                &owner_ids,
5516                start_date,
5517                self.config.global.period_months,
5518            )
5519        };
5520        stats.sourcing_project_count = sourcing_projects.len();
5521
5522        // Step 3: Qualifications
5523        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5524        let mut qual_gen = QualificationGenerator::new(seed + 2);
5525        let qualifications = qual_gen.generate(
5526            company_code,
5527            &qual_vendor_ids,
5528            sourcing_projects.first().map(|p| p.project_id.as_str()),
5529            owner_id,
5530            start_date,
5531        );
5532
5533        // Step 4: RFx Events
5534        let mut rfx_gen = RfxGenerator::new(seed + 3);
5535        let rfx_events: Vec<RfxEvent> = sourcing_projects
5536            .iter()
5537            .map(|proj| {
5538                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5539                rfx_gen.generate(
5540                    company_code,
5541                    &proj.project_id,
5542                    &proj.category_id,
5543                    &qualified_vids,
5544                    owner_id,
5545                    start_date,
5546                    50000.0,
5547                )
5548            })
5549            .collect();
5550        stats.rfx_event_count = rfx_events.len();
5551
5552        // Step 5: Bids
5553        let mut bid_gen = BidGenerator::new(seed + 4);
5554        let mut all_bids = Vec::new();
5555        for rfx in &rfx_events {
5556            let bidder_count = vendor_ids.len().clamp(2, 5);
5557            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5558            let bids = bid_gen.generate(rfx, &responding, start_date);
5559            all_bids.extend(bids);
5560        }
5561        stats.bid_count = all_bids.len();
5562
5563        // Step 6: Bid Evaluations
5564        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5565        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5566            .iter()
5567            .map(|rfx| {
5568                let rfx_bids: Vec<SupplierBid> = all_bids
5569                    .iter()
5570                    .filter(|b| b.rfx_id == rfx.rfx_id)
5571                    .cloned()
5572                    .collect();
5573                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5574            })
5575            .collect();
5576
5577        // Step 7: Contracts from winning bids
5578        let mut contract_gen = ContractGenerator::new(seed + 6);
5579        let contracts: Vec<ProcurementContract> = bid_evaluations
5580            .iter()
5581            .zip(rfx_events.iter())
5582            .filter_map(|(eval, rfx)| {
5583                eval.ranked_bids.first().and_then(|winner| {
5584                    all_bids
5585                        .iter()
5586                        .find(|b| b.bid_id == winner.bid_id)
5587                        .map(|winning_bid| {
5588                            contract_gen.generate_from_bid(
5589                                winning_bid,
5590                                Some(&rfx.sourcing_project_id),
5591                                &rfx.category_id,
5592                                owner_id,
5593                                start_date,
5594                            )
5595                        })
5596                })
5597            })
5598            .collect();
5599        stats.contract_count = contracts.len();
5600
5601        // Step 8: Catalog Items
5602        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5603        let catalog_items = catalog_gen.generate(&contracts);
5604        stats.catalog_item_count = catalog_items.len();
5605
5606        // Step 9: Scorecards
5607        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5608        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5609            .iter()
5610            .fold(
5611                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5612                |mut acc, c| {
5613                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5614                    acc
5615                },
5616            )
5617            .into_iter()
5618            .collect();
5619        let scorecards = scorecard_gen.generate(
5620            company_code,
5621            &vendor_contracts,
5622            start_date,
5623            end_date,
5624            owner_id,
5625        );
5626        stats.scorecard_count = scorecards.len();
5627
5628        // Back-populate cross-references on sourcing projects (Task 35)
5629        // Link each project to its RFx events, contracts, and spend analyses
5630        let mut sourcing_projects = sourcing_projects;
5631        for project in &mut sourcing_projects {
5632            // Link RFx events generated for this project
5633            project.rfx_ids = rfx_events
5634                .iter()
5635                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5636                .map(|rfx| rfx.rfx_id.clone())
5637                .collect();
5638
5639            // Link contract awarded from this project's RFx
5640            project.contract_id = contracts
5641                .iter()
5642                .find(|c| {
5643                    c.sourcing_project_id
5644                        .as_deref()
5645                        .is_some_and(|sp| sp == project.project_id)
5646                })
5647                .map(|c| c.contract_id.clone());
5648
5649            // Link spend analysis for matching category (use category_id as the reference)
5650            project.spend_analysis_id = spend_analyses
5651                .iter()
5652                .find(|sa| sa.category_id == project.category_id)
5653                .map(|sa| sa.category_id.clone());
5654        }
5655
5656        info!(
5657            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5658            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5659            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5660        );
5661        self.check_resources_with_log("post-sourcing")?;
5662
5663        Ok(SourcingSnapshot {
5664            spend_analyses,
5665            sourcing_projects,
5666            qualifications,
5667            rfx_events,
5668            bids: all_bids,
5669            bid_evaluations,
5670            contracts,
5671            catalog_items,
5672            scorecards,
5673        })
5674    }
5675
5676    /// Build a [`GroupStructure`] from the current company configuration.
5677    ///
5678    /// The first company in the configuration is treated as the ultimate parent.
5679    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5680    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5681    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5682        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5683
5684        let parent_code = self
5685            .config
5686            .companies
5687            .first()
5688            .map(|c| c.code.clone())
5689            .unwrap_or_else(|| "PARENT".to_string());
5690
5691        let mut group = GroupStructure::new(parent_code);
5692
5693        for company in self.config.companies.iter().skip(1) {
5694            let sub =
5695                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5696            group.add_subsidiary(sub);
5697        }
5698
5699        group
5700    }
5701
5702    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5703    fn phase_intercompany(
5704        &mut self,
5705        journal_entries: &[JournalEntry],
5706        stats: &mut EnhancedGenerationStatistics,
5707    ) -> SynthResult<IntercompanySnapshot> {
5708        // Skip if intercompany is disabled in config
5709        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5710            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5711            return Ok(IntercompanySnapshot::default());
5712        }
5713
5714        // Intercompany requires at least 2 companies
5715        if self.config.companies.len() < 2 {
5716            debug!(
5717                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5718                self.config.companies.len()
5719            );
5720            return Ok(IntercompanySnapshot::default());
5721        }
5722
5723        info!("Phase 14b: Generating Intercompany Transactions");
5724
5725        // Build the group structure early — used by ISA 600 component auditor scope
5726        // and consolidated financial statement generators downstream.
5727        let group_structure = self.build_group_structure();
5728        debug!(
5729            "Group structure built: parent={}, subsidiaries={}",
5730            group_structure.parent_entity,
5731            group_structure.subsidiaries.len()
5732        );
5733
5734        let seed = self.seed;
5735        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5736            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5737        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5738
5739        // Build ownership structure from company configs
5740        // First company is treated as the parent, remaining are subsidiaries
5741        let parent_code = self.config.companies[0].code.clone();
5742        let mut ownership_structure =
5743            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5744
5745        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5746            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5747                format!("REL{:03}", i + 1),
5748                parent_code.clone(),
5749                company.code.clone(),
5750                rust_decimal::Decimal::from(100), // Default 100% ownership
5751                start_date,
5752            );
5753            ownership_structure.add_relationship(relationship);
5754        }
5755
5756        // Convert config transfer pricing method to core model enum
5757        let tp_method = match self.config.intercompany.transfer_pricing_method {
5758            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5759                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5760            }
5761            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5762                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5763            }
5764            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5765                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5766            }
5767            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5768                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5769            }
5770            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5771                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5772            }
5773        };
5774
5775        // Build IC generator config from schema config
5776        let ic_currency = self
5777            .config
5778            .companies
5779            .first()
5780            .map(|c| c.currency.clone())
5781            .unwrap_or_else(|| "USD".to_string());
5782        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5783            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5784            transfer_pricing_method: tp_method,
5785            markup_percent: rust_decimal::Decimal::from_f64_retain(
5786                self.config.intercompany.markup_percent,
5787            )
5788            .unwrap_or(rust_decimal::Decimal::from(5)),
5789            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5790            default_currency: ic_currency,
5791            ..Default::default()
5792        };
5793
5794        // Create IC generator
5795        let mut ic_generator = datasynth_generators::ICGenerator::new(
5796            ic_gen_config,
5797            ownership_structure.clone(),
5798            seed + 50,
5799        );
5800
5801        // Generate IC transactions for the period
5802        // Use ~3 transactions per day as a reasonable default
5803        let transactions_per_day = 3;
5804        let matched_pairs = ic_generator.generate_transactions_for_period(
5805            start_date,
5806            end_date,
5807            transactions_per_day,
5808        );
5809
5810        // Generate IC source P2P/O2C documents
5811        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5812        debug!(
5813            "Generated {} IC seller invoices, {} IC buyer POs",
5814            ic_doc_chains.seller_invoices.len(),
5815            ic_doc_chains.buyer_orders.len()
5816        );
5817
5818        // Generate journal entries from matched pairs
5819        let mut seller_entries = Vec::new();
5820        let mut buyer_entries = Vec::new();
5821        let fiscal_year = start_date.year();
5822
5823        for pair in &matched_pairs {
5824            let fiscal_period = pair.posting_date.month();
5825            let (seller_je, buyer_je) =
5826                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5827            seller_entries.push(seller_je);
5828            buyer_entries.push(buyer_je);
5829        }
5830
5831        // Run matching engine
5832        let matching_config = datasynth_generators::ICMatchingConfig {
5833            base_currency: self
5834                .config
5835                .companies
5836                .first()
5837                .map(|c| c.currency.clone())
5838                .unwrap_or_else(|| "USD".to_string()),
5839            ..Default::default()
5840        };
5841        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5842        matching_engine.load_matched_pairs(&matched_pairs);
5843        let matching_result = matching_engine.run_matching(end_date);
5844
5845        // Generate elimination entries if configured
5846        let mut elimination_entries = Vec::new();
5847        if self.config.intercompany.generate_eliminations {
5848            let elim_config = datasynth_generators::EliminationConfig {
5849                consolidation_entity: "GROUP".to_string(),
5850                base_currency: self
5851                    .config
5852                    .companies
5853                    .first()
5854                    .map(|c| c.currency.clone())
5855                    .unwrap_or_else(|| "USD".to_string()),
5856                ..Default::default()
5857            };
5858
5859            let mut elim_generator =
5860                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5861
5862            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5863            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5864                matching_result
5865                    .matched_balances
5866                    .iter()
5867                    .chain(matching_result.unmatched_balances.iter())
5868                    .cloned()
5869                    .collect();
5870
5871            // Build investment and equity maps from the group structure so that the
5872            // elimination generator can produce equity-investment elimination entries
5873            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5874            //
5875            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5876            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5877            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5878            //
5879            // Net assets are derived from the journal entries using account-range heuristics:
5880            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5881            // no JE data is available (IC phase runs early in the generation pipeline).
5882            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5883                std::collections::HashMap::new();
5884            let mut equity_amounts: std::collections::HashMap<
5885                String,
5886                std::collections::HashMap<String, rust_decimal::Decimal>,
5887            > = std::collections::HashMap::new();
5888            {
5889                use rust_decimal::Decimal;
5890                let hundred = Decimal::from(100u32);
5891                let ten_pct = Decimal::new(10, 2); // 0.10
5892                let thirty_pct = Decimal::new(30, 2); // 0.30
5893                let sixty_pct = Decimal::new(60, 2); // 0.60
5894                let parent_code = &group_structure.parent_entity;
5895                for sub in &group_structure.subsidiaries {
5896                    let net_assets = {
5897                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5898                        if na > Decimal::ZERO {
5899                            na
5900                        } else {
5901                            Decimal::from(1_000_000u64)
5902                        }
5903                    };
5904                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5905                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5906                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5907
5908                    // Split subsidiary equity into conventional components:
5909                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5910                    let mut eq_map = std::collections::HashMap::new();
5911                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5912                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5913                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5914                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5915                }
5916            }
5917
5918            let journal = elim_generator.generate_eliminations(
5919                &fiscal_period,
5920                end_date,
5921                &all_balances,
5922                &matched_pairs,
5923                &investment_amounts,
5924                &equity_amounts,
5925            );
5926
5927            elimination_entries = journal.entries.clone();
5928        }
5929
5930        let matched_pair_count = matched_pairs.len();
5931        let elimination_entry_count = elimination_entries.len();
5932        let match_rate = matching_result.match_rate;
5933
5934        stats.ic_matched_pair_count = matched_pair_count;
5935        stats.ic_elimination_count = elimination_entry_count;
5936        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5937
5938        info!(
5939            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5940            matched_pair_count,
5941            stats.ic_transaction_count,
5942            seller_entries.len(),
5943            buyer_entries.len(),
5944            elimination_entry_count,
5945            match_rate * 100.0
5946        );
5947        self.check_resources_with_log("post-intercompany")?;
5948
5949        // ----------------------------------------------------------------
5950        // NCI measurements: derive from group structure ownership percentages
5951        // ----------------------------------------------------------------
5952        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5953            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5954            use rust_decimal::Decimal;
5955
5956            let eight_pct = Decimal::new(8, 2); // 0.08
5957
5958            group_structure
5959                .subsidiaries
5960                .iter()
5961                .filter(|sub| {
5962                    sub.nci_percentage > Decimal::ZERO
5963                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5964                })
5965                .map(|sub| {
5966                    // Compute net assets from actual journal entries for this subsidiary.
5967                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5968                    // IC phase runs before the main JE batch has been populated).
5969                    let net_assets_from_jes =
5970                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5971
5972                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5973                        net_assets_from_jes.round_dp(2)
5974                    } else {
5975                        // Fallback: use a plausible base amount
5976                        Decimal::from(1_000_000u64)
5977                    };
5978
5979                    // Net income approximated as 8% of net assets
5980                    let net_income = (net_assets * eight_pct).round_dp(2);
5981
5982                    NciMeasurement::compute(
5983                        sub.entity_code.clone(),
5984                        sub.nci_percentage,
5985                        net_assets,
5986                        net_income,
5987                    )
5988                })
5989                .collect()
5990        };
5991
5992        if !nci_measurements.is_empty() {
5993            info!(
5994                "NCI measurements: {} subsidiaries with non-controlling interests",
5995                nci_measurements.len()
5996            );
5997        }
5998
5999        Ok(IntercompanySnapshot {
6000            group_structure: Some(group_structure),
6001            matched_pairs,
6002            seller_journal_entries: seller_entries,
6003            buyer_journal_entries: buyer_entries,
6004            elimination_entries,
6005            nci_measurements,
6006            ic_document_chains: Some(ic_doc_chains),
6007            matched_pair_count,
6008            elimination_entry_count,
6009            match_rate,
6010        })
6011    }
6012
6013    /// Phase 15: Generate bank reconciliations and financial statements.
6014    fn phase_financial_reporting(
6015        &mut self,
6016        document_flows: &DocumentFlowSnapshot,
6017        journal_entries: &[JournalEntry],
6018        coa: &Arc<ChartOfAccounts>,
6019        _hr: &HrSnapshot,
6020        _audit: &AuditSnapshot,
6021        stats: &mut EnhancedGenerationStatistics,
6022    ) -> SynthResult<FinancialReportingSnapshot> {
6023        let fs_enabled = self.phase_config.generate_financial_statements
6024            || self.config.financial_reporting.enabled;
6025        let br_enabled = self.phase_config.generate_bank_reconciliation;
6026
6027        if !fs_enabled && !br_enabled {
6028            debug!("Phase 15: Skipped (financial reporting disabled)");
6029            return Ok(FinancialReportingSnapshot::default());
6030        }
6031
6032        info!("Phase 15: Generating Financial Reporting Data");
6033
6034        let seed = self.seed;
6035        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6036            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6037
6038        let mut financial_statements = Vec::new();
6039        let mut bank_reconciliations = Vec::new();
6040        let mut trial_balances = Vec::new();
6041        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6042        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6043            Vec::new();
6044        // Standalone statements keyed by entity code
6045        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6046            std::collections::HashMap::new();
6047        // Consolidated statements (one per period)
6048        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6049        // Consolidation schedules (one per period)
6050        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6051
6052        // Generate financial statements from JE-derived trial balances.
6053        //
6054        // When journal entries are available, we use cumulative trial balances for
6055        // balance sheet accounts and current-period trial balances for income
6056        // statement accounts. We also track prior-period trial balances so the
6057        // generator can produce comparative amounts, and we build a proper
6058        // cash flow statement from working capital changes rather than random data.
6059        if fs_enabled {
6060            let has_journal_entries = !journal_entries.is_empty();
6061
6062            // Use FinancialStatementGenerator for balance sheet and income statement,
6063            // but build cash flow ourselves from TB data when JEs are available.
6064            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6065            // Separate generator for consolidated statements (different seed offset)
6066            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6067
6068            // Collect elimination JEs once (reused across periods)
6069            let elimination_entries: Vec<&JournalEntry> = journal_entries
6070                .iter()
6071                .filter(|je| je.header.is_elimination)
6072                .collect();
6073
6074            // Generate one set of statements per period, per entity
6075            for period in 0..self.config.global.period_months {
6076                let period_start = start_date + chrono::Months::new(period);
6077                let period_end =
6078                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6079                let fiscal_year = period_end.year() as u16;
6080                let fiscal_period = period_end.month() as u8;
6081                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6082
6083                // Build per-entity trial balances for this period (non-elimination JEs)
6084                // We accumulate them for the consolidation step.
6085                let mut entity_tb_map: std::collections::HashMap<
6086                    String,
6087                    std::collections::HashMap<String, rust_decimal::Decimal>,
6088                > = std::collections::HashMap::new();
6089
6090                // --- Standalone: one set of statements per company ---
6091                for (company_idx, company) in self.config.companies.iter().enumerate() {
6092                    let company_code = company.code.as_str();
6093                    let currency = company.currency.as_str();
6094                    // Use a unique seed offset per company to keep statements deterministic
6095                    // and distinct across companies
6096                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6097                    let mut company_fs_gen =
6098                        FinancialStatementGenerator::new(seed + company_seed_offset);
6099
6100                    if has_journal_entries {
6101                        let tb_entries = Self::build_cumulative_trial_balance(
6102                            journal_entries,
6103                            coa,
6104                            company_code,
6105                            start_date,
6106                            period_end,
6107                            fiscal_year,
6108                            fiscal_period,
6109                        );
6110
6111                        // Accumulate per-entity category balances for consolidation
6112                        let entity_cat_map =
6113                            entity_tb_map.entry(company_code.to_string()).or_default();
6114                        for tb_entry in &tb_entries {
6115                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6116                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6117                        }
6118
6119                        let stmts = company_fs_gen.generate(
6120                            company_code,
6121                            currency,
6122                            &tb_entries,
6123                            period_start,
6124                            period_end,
6125                            fiscal_year,
6126                            fiscal_period,
6127                            None,
6128                            "SYS-AUTOCLOSE",
6129                        );
6130
6131                        let mut entity_stmts = Vec::new();
6132                        for stmt in stmts {
6133                            if stmt.statement_type == StatementType::CashFlowStatement {
6134                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6135                                let cf_items = Self::build_cash_flow_from_trial_balances(
6136                                    &tb_entries,
6137                                    None,
6138                                    net_income,
6139                                );
6140                                entity_stmts.push(FinancialStatement {
6141                                    cash_flow_items: cf_items,
6142                                    ..stmt
6143                                });
6144                            } else {
6145                                entity_stmts.push(stmt);
6146                            }
6147                        }
6148
6149                        // Add to the flat financial_statements list (used by KPI/budget)
6150                        financial_statements.extend(entity_stmts.clone());
6151
6152                        // Store standalone per-entity
6153                        standalone_statements
6154                            .entry(company_code.to_string())
6155                            .or_default()
6156                            .extend(entity_stmts);
6157
6158                        // Only store trial balance for the first company in the period
6159                        // to avoid duplicates in the trial_balances list
6160                        if company_idx == 0 {
6161                            trial_balances.push(PeriodTrialBalance {
6162                                fiscal_year,
6163                                fiscal_period,
6164                                period_start,
6165                                period_end,
6166                                entries: tb_entries,
6167                            });
6168                        }
6169                    } else {
6170                        // Fallback: no JEs available
6171                        let tb_entries = Self::build_trial_balance_from_entries(
6172                            journal_entries,
6173                            coa,
6174                            company_code,
6175                            fiscal_year,
6176                            fiscal_period,
6177                        );
6178
6179                        let stmts = company_fs_gen.generate(
6180                            company_code,
6181                            currency,
6182                            &tb_entries,
6183                            period_start,
6184                            period_end,
6185                            fiscal_year,
6186                            fiscal_period,
6187                            None,
6188                            "SYS-AUTOCLOSE",
6189                        );
6190                        financial_statements.extend(stmts.clone());
6191                        standalone_statements
6192                            .entry(company_code.to_string())
6193                            .or_default()
6194                            .extend(stmts);
6195
6196                        if company_idx == 0 && !tb_entries.is_empty() {
6197                            trial_balances.push(PeriodTrialBalance {
6198                                fiscal_year,
6199                                fiscal_period,
6200                                period_start,
6201                                period_end,
6202                                entries: tb_entries,
6203                            });
6204                        }
6205                    }
6206                }
6207
6208                // --- Consolidated: aggregate all entities + apply eliminations ---
6209                // Use the primary (first) company's currency for the consolidated statement
6210                let group_currency = self
6211                    .config
6212                    .companies
6213                    .first()
6214                    .map(|c| c.currency.as_str())
6215                    .unwrap_or("USD");
6216
6217                // Build owned elimination entries for this period
6218                let period_eliminations: Vec<JournalEntry> = elimination_entries
6219                    .iter()
6220                    .filter(|je| {
6221                        je.header.fiscal_year == fiscal_year
6222                            && je.header.fiscal_period == fiscal_period
6223                    })
6224                    .map(|je| (*je).clone())
6225                    .collect();
6226
6227                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6228                    &entity_tb_map,
6229                    &period_eliminations,
6230                    &period_label,
6231                );
6232
6233                // Build a pseudo trial balance from consolidated line items for the
6234                // FinancialStatementGenerator to use (only for cash flow direction).
6235                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6236                    .line_items
6237                    .iter()
6238                    .map(|li| {
6239                        let net = li.post_elimination_total;
6240                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6241                            (net, rust_decimal::Decimal::ZERO)
6242                        } else {
6243                            (rust_decimal::Decimal::ZERO, -net)
6244                        };
6245                        datasynth_generators::TrialBalanceEntry {
6246                            account_code: li.account_category.clone(),
6247                            account_name: li.account_category.clone(),
6248                            category: li.account_category.clone(),
6249                            debit_balance: debit,
6250                            credit_balance: credit,
6251                        }
6252                    })
6253                    .collect();
6254
6255                let mut cons_stmts = cons_gen.generate(
6256                    "GROUP",
6257                    group_currency,
6258                    &cons_tb,
6259                    period_start,
6260                    period_end,
6261                    fiscal_year,
6262                    fiscal_period,
6263                    None,
6264                    "SYS-AUTOCLOSE",
6265                );
6266
6267                // Split consolidated line items by statement type.
6268                // The consolidation generator returns BS items first, then IS items,
6269                // identified by their CONS- prefix and category.
6270                let bs_categories: &[&str] = &[
6271                    "CASH",
6272                    "RECEIVABLES",
6273                    "INVENTORY",
6274                    "FIXEDASSETS",
6275                    "PAYABLES",
6276                    "ACCRUEDLIABILITIES",
6277                    "LONGTERMDEBT",
6278                    "EQUITY",
6279                ];
6280                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6281                    cons_line_items.into_iter().partition(|li| {
6282                        let upper = li.label.to_uppercase();
6283                        bs_categories.iter().any(|c| upper == *c)
6284                    });
6285
6286                for stmt in &mut cons_stmts {
6287                    stmt.is_consolidated = true;
6288                    match stmt.statement_type {
6289                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6290                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6291                        _ => {} // CF and equity change statements keep generator output
6292                    }
6293                }
6294
6295                consolidated_statements.extend(cons_stmts);
6296                consolidation_schedules.push(schedule);
6297            }
6298
6299            // Backward compat: if only 1 company, use existing code path logic
6300            // (prior_cumulative_tb for comparative amounts). Already handled above;
6301            // the prior_ref is omitted to keep this change minimal.
6302            let _ = &mut fs_gen; // suppress unused warning
6303
6304            stats.financial_statement_count = financial_statements.len();
6305            info!(
6306                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6307                stats.financial_statement_count,
6308                consolidated_statements.len(),
6309                has_journal_entries
6310            );
6311
6312            // ----------------------------------------------------------------
6313            // IFRS 8 / ASC 280: Operating Segment Reporting
6314            // ----------------------------------------------------------------
6315            // Build entity seeds from the company configuration.
6316            let entity_seeds: Vec<SegmentSeed> = self
6317                .config
6318                .companies
6319                .iter()
6320                .map(|c| SegmentSeed {
6321                    code: c.code.clone(),
6322                    name: c.name.clone(),
6323                    currency: c.currency.clone(),
6324                })
6325                .collect();
6326
6327            let mut seg_gen = SegmentGenerator::new(seed + 30);
6328
6329            // Generate one set of segment reports per period.
6330            // We extract consolidated revenue / profit / assets from the consolidated
6331            // financial statements produced above, falling back to simple sums when
6332            // no consolidated statements were generated (single-entity path).
6333            for period in 0..self.config.global.period_months {
6334                let period_end =
6335                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6336                let fiscal_year = period_end.year() as u16;
6337                let fiscal_period = period_end.month() as u8;
6338                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6339
6340                use datasynth_core::models::StatementType;
6341
6342                // Try to find consolidated income statement for this period
6343                let cons_is = consolidated_statements.iter().find(|s| {
6344                    s.fiscal_year == fiscal_year
6345                        && s.fiscal_period == fiscal_period
6346                        && s.statement_type == StatementType::IncomeStatement
6347                });
6348                let cons_bs = consolidated_statements.iter().find(|s| {
6349                    s.fiscal_year == fiscal_year
6350                        && s.fiscal_period == fiscal_period
6351                        && s.statement_type == StatementType::BalanceSheet
6352                });
6353
6354                // If consolidated statements not available fall back to the flat list
6355                let is_stmt = cons_is.or_else(|| {
6356                    financial_statements.iter().find(|s| {
6357                        s.fiscal_year == fiscal_year
6358                            && s.fiscal_period == fiscal_period
6359                            && s.statement_type == StatementType::IncomeStatement
6360                    })
6361                });
6362                let bs_stmt = cons_bs.or_else(|| {
6363                    financial_statements.iter().find(|s| {
6364                        s.fiscal_year == fiscal_year
6365                            && s.fiscal_period == fiscal_period
6366                            && s.statement_type == StatementType::BalanceSheet
6367                    })
6368                });
6369
6370                let consolidated_revenue = is_stmt
6371                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6372                    .map(|li| -li.amount) // revenue is stored as negative in IS
6373                    .unwrap_or(rust_decimal::Decimal::ZERO);
6374
6375                let consolidated_profit = is_stmt
6376                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6377                    .map(|li| li.amount)
6378                    .unwrap_or(rust_decimal::Decimal::ZERO);
6379
6380                let consolidated_assets = bs_stmt
6381                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6382                    .map(|li| li.amount)
6383                    .unwrap_or(rust_decimal::Decimal::ZERO);
6384
6385                // Skip periods where we have no financial data
6386                if consolidated_revenue == rust_decimal::Decimal::ZERO
6387                    && consolidated_assets == rust_decimal::Decimal::ZERO
6388                {
6389                    continue;
6390                }
6391
6392                let group_code = self
6393                    .config
6394                    .companies
6395                    .first()
6396                    .map(|c| c.code.as_str())
6397                    .unwrap_or("GROUP");
6398
6399                // Compute period depreciation from JEs with document type "CL" hitting account
6400                // 6000 (depreciation expense).  These are generated by phase_period_close.
6401                let total_depr: rust_decimal::Decimal = journal_entries
6402                    .iter()
6403                    .filter(|je| je.header.document_type == "CL")
6404                    .flat_map(|je| je.lines.iter())
6405                    .filter(|l| l.gl_account.starts_with("6000"))
6406                    .map(|l| l.debit_amount)
6407                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6408                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6409                    Some(total_depr)
6410                } else {
6411                    None
6412                };
6413
6414                let (segs, recon) = seg_gen.generate(
6415                    group_code,
6416                    &period_label,
6417                    consolidated_revenue,
6418                    consolidated_profit,
6419                    consolidated_assets,
6420                    &entity_seeds,
6421                    depr_param,
6422                );
6423                segment_reports.extend(segs);
6424                segment_reconciliations.push(recon);
6425            }
6426
6427            info!(
6428                "Segment reports generated: {} segments, {} reconciliations",
6429                segment_reports.len(),
6430                segment_reconciliations.len()
6431            );
6432        }
6433
6434        // Generate bank reconciliations from payment data
6435        if br_enabled && !document_flows.payments.is_empty() {
6436            let employee_ids: Vec<String> = self
6437                .master_data
6438                .employees
6439                .iter()
6440                .map(|e| e.employee_id.clone())
6441                .collect();
6442            let mut br_gen =
6443                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6444
6445            // Group payments by company code and period
6446            for company in &self.config.companies {
6447                let company_payments: Vec<PaymentReference> = document_flows
6448                    .payments
6449                    .iter()
6450                    .filter(|p| p.header.company_code == company.code)
6451                    .map(|p| PaymentReference {
6452                        id: p.header.document_id.clone(),
6453                        amount: if p.is_vendor { p.amount } else { -p.amount },
6454                        date: p.header.document_date,
6455                        reference: p
6456                            .check_number
6457                            .clone()
6458                            .or_else(|| p.wire_reference.clone())
6459                            .unwrap_or_else(|| p.header.document_id.clone()),
6460                    })
6461                    .collect();
6462
6463                if company_payments.is_empty() {
6464                    continue;
6465                }
6466
6467                let bank_account_id = format!("{}-MAIN", company.code);
6468
6469                // Generate one reconciliation per period
6470                for period in 0..self.config.global.period_months {
6471                    let period_start = start_date + chrono::Months::new(period);
6472                    let period_end =
6473                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6474
6475                    let period_payments: Vec<PaymentReference> = company_payments
6476                        .iter()
6477                        .filter(|p| p.date >= period_start && p.date <= period_end)
6478                        .cloned()
6479                        .collect();
6480
6481                    let recon = br_gen.generate(
6482                        &company.code,
6483                        &bank_account_id,
6484                        period_start,
6485                        period_end,
6486                        &company.currency,
6487                        &period_payments,
6488                    );
6489                    bank_reconciliations.push(recon);
6490                }
6491            }
6492            info!(
6493                "Bank reconciliations generated: {} reconciliations",
6494                bank_reconciliations.len()
6495            );
6496        }
6497
6498        stats.bank_reconciliation_count = bank_reconciliations.len();
6499        self.check_resources_with_log("post-financial-reporting")?;
6500
6501        if !trial_balances.is_empty() {
6502            info!(
6503                "Period-close trial balances captured: {} periods",
6504                trial_balances.len()
6505            );
6506        }
6507
6508        // Notes to financial statements are generated in a separate post-processing step
6509        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6510        // phases have completed, so that deferred tax and provision data can be wired in.
6511        let notes_to_financial_statements = Vec::new();
6512
6513        Ok(FinancialReportingSnapshot {
6514            financial_statements,
6515            standalone_statements,
6516            consolidated_statements,
6517            consolidation_schedules,
6518            bank_reconciliations,
6519            trial_balances,
6520            segment_reports,
6521            segment_reconciliations,
6522            notes_to_financial_statements,
6523        })
6524    }
6525
6526    /// Populate notes to financial statements using fully-resolved snapshots.
6527    ///
6528    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6529    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6530    /// can be wired into the notes context.  The method mutates
6531    /// `financial_reporting.notes_to_financial_statements` in-place.
6532    fn generate_notes_to_financial_statements(
6533        &self,
6534        financial_reporting: &mut FinancialReportingSnapshot,
6535        accounting_standards: &AccountingStandardsSnapshot,
6536        tax: &TaxSnapshot,
6537        hr: &HrSnapshot,
6538        audit: &AuditSnapshot,
6539        treasury: &TreasurySnapshot,
6540    ) {
6541        use datasynth_config::schema::AccountingFrameworkConfig;
6542        use datasynth_core::models::StatementType;
6543        use datasynth_generators::period_close::notes_generator::{
6544            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6545        };
6546
6547        let seed = self.seed;
6548        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6549        {
6550            Ok(d) => d,
6551            Err(_) => return,
6552        };
6553
6554        let mut notes_gen = NotesGenerator::new(seed + 4235);
6555
6556        for company in &self.config.companies {
6557            let last_period_end = start_date
6558                + chrono::Months::new(self.config.global.period_months)
6559                - chrono::Days::new(1);
6560            let fiscal_year = last_period_end.year() as u16;
6561
6562            // Extract relevant amounts from the already-generated financial statements
6563            let entity_is = financial_reporting
6564                .standalone_statements
6565                .get(&company.code)
6566                .and_then(|stmts| {
6567                    stmts.iter().find(|s| {
6568                        s.fiscal_year == fiscal_year
6569                            && s.statement_type == StatementType::IncomeStatement
6570                    })
6571                });
6572            let entity_bs = financial_reporting
6573                .standalone_statements
6574                .get(&company.code)
6575                .and_then(|stmts| {
6576                    stmts.iter().find(|s| {
6577                        s.fiscal_year == fiscal_year
6578                            && s.statement_type == StatementType::BalanceSheet
6579                    })
6580                });
6581
6582            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6583            let revenue_amount = entity_is
6584                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6585                .map(|li| li.amount);
6586            let ppe_gross = entity_bs
6587                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6588                .map(|li| li.amount);
6589
6590            let framework = match self
6591                .config
6592                .accounting_standards
6593                .framework
6594                .unwrap_or_default()
6595            {
6596                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6597                    "IFRS".to_string()
6598                }
6599                _ => "US GAAP".to_string(),
6600            };
6601
6602            // ---- Deferred tax (IAS 12 / ASC 740) ----
6603            // Sum closing DTA and DTL from rollforward entries for this entity.
6604            let (entity_dta, entity_dtl) = {
6605                let mut dta = rust_decimal::Decimal::ZERO;
6606                let mut dtl = rust_decimal::Decimal::ZERO;
6607                for rf in &tax.deferred_tax.rollforwards {
6608                    if rf.entity_code == company.code {
6609                        dta += rf.closing_dta;
6610                        dtl += rf.closing_dtl;
6611                    }
6612                }
6613                (
6614                    if dta > rust_decimal::Decimal::ZERO {
6615                        Some(dta)
6616                    } else {
6617                        None
6618                    },
6619                    if dtl > rust_decimal::Decimal::ZERO {
6620                        Some(dtl)
6621                    } else {
6622                        None
6623                    },
6624                )
6625            };
6626
6627            // ---- Provisions (IAS 37 / ASC 450) ----
6628            // Filter provisions to this entity; sum best_estimate amounts.
6629            let entity_provisions: Vec<_> = accounting_standards
6630                .provisions
6631                .iter()
6632                .filter(|p| p.entity_code == company.code)
6633                .collect();
6634            let provision_count = entity_provisions.len();
6635            let total_provisions = if provision_count > 0 {
6636                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6637            } else {
6638                None
6639            };
6640
6641            // ---- Pension data from HR snapshot ----
6642            let entity_pension_plan_count = hr
6643                .pension_plans
6644                .iter()
6645                .filter(|p| p.entity_code == company.code)
6646                .count();
6647            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6648                let sum: rust_decimal::Decimal = hr
6649                    .pension_disclosures
6650                    .iter()
6651                    .filter(|d| {
6652                        hr.pension_plans
6653                            .iter()
6654                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6655                    })
6656                    .map(|d| d.net_pension_liability)
6657                    .sum();
6658                let plan_assets_sum: rust_decimal::Decimal = hr
6659                    .pension_plan_assets
6660                    .iter()
6661                    .filter(|a| {
6662                        hr.pension_plans
6663                            .iter()
6664                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6665                    })
6666                    .map(|a| a.fair_value_closing)
6667                    .sum();
6668                if entity_pension_plan_count > 0 {
6669                    Some(sum + plan_assets_sum)
6670                } else {
6671                    None
6672                }
6673            };
6674            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6675                let sum: rust_decimal::Decimal = hr
6676                    .pension_plan_assets
6677                    .iter()
6678                    .filter(|a| {
6679                        hr.pension_plans
6680                            .iter()
6681                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6682                    })
6683                    .map(|a| a.fair_value_closing)
6684                    .sum();
6685                if entity_pension_plan_count > 0 {
6686                    Some(sum)
6687                } else {
6688                    None
6689                }
6690            };
6691
6692            // ---- Audit data: related parties + subsequent events ----
6693            // Audit snapshot covers all entities; use total counts (common case = single entity).
6694            let rp_count = audit.related_party_transactions.len();
6695            let se_count = audit.subsequent_events.len();
6696            let adjusting_count = audit
6697                .subsequent_events
6698                .iter()
6699                .filter(|e| {
6700                    matches!(
6701                        e.classification,
6702                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6703                    )
6704                })
6705                .count();
6706
6707            let ctx = NotesGeneratorContext {
6708                entity_code: company.code.clone(),
6709                framework,
6710                period: format!("FY{}", fiscal_year),
6711                period_end: last_period_end,
6712                currency: company.currency.clone(),
6713                revenue_amount,
6714                total_ppe_gross: ppe_gross,
6715                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6716                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6717                deferred_tax_asset: entity_dta,
6718                deferred_tax_liability: entity_dtl,
6719                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6720                provision_count,
6721                total_provisions,
6722                // Pension data from HR snapshot
6723                pension_plan_count: entity_pension_plan_count,
6724                total_dbo: entity_total_dbo,
6725                total_plan_assets: entity_total_plan_assets,
6726                // Audit data
6727                related_party_transaction_count: rp_count,
6728                subsequent_event_count: se_count,
6729                adjusting_event_count: adjusting_count,
6730                ..NotesGeneratorContext::default()
6731            };
6732
6733            let entity_notes = notes_gen.generate(&ctx);
6734            let standard_note_count = entity_notes.len() as u32;
6735            info!(
6736                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6737                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6738            );
6739            financial_reporting
6740                .notes_to_financial_statements
6741                .extend(entity_notes);
6742
6743            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6744            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6745                .debt_instruments
6746                .iter()
6747                .filter(|d| d.entity_id == company.code)
6748                .map(|d| {
6749                    (
6750                        format!("{:?}", d.instrument_type),
6751                        d.principal,
6752                        d.maturity_date.to_string(),
6753                    )
6754                })
6755                .collect();
6756
6757            let hedge_count = treasury.hedge_relationships.len();
6758            let effective_hedges = treasury
6759                .hedge_relationships
6760                .iter()
6761                .filter(|h| h.is_effective)
6762                .count();
6763            let total_notional: rust_decimal::Decimal = treasury
6764                .hedging_instruments
6765                .iter()
6766                .map(|h| h.notional_amount)
6767                .sum();
6768            let total_fair_value: rust_decimal::Decimal = treasury
6769                .hedging_instruments
6770                .iter()
6771                .map(|h| h.fair_value)
6772                .sum();
6773
6774            // Join provision_movements with provisions to get entity/type info
6775            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6776                .provisions
6777                .iter()
6778                .filter(|p| p.entity_code == company.code)
6779                .map(|p| p.id.as_str())
6780                .collect();
6781            let provision_movements: Vec<(
6782                String,
6783                rust_decimal::Decimal,
6784                rust_decimal::Decimal,
6785                rust_decimal::Decimal,
6786            )> = accounting_standards
6787                .provision_movements
6788                .iter()
6789                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6790                .map(|m| {
6791                    let prov_type = accounting_standards
6792                        .provisions
6793                        .iter()
6794                        .find(|p| p.id == m.provision_id)
6795                        .map(|p| format!("{:?}", p.provision_type))
6796                        .unwrap_or_else(|| "Unknown".to_string());
6797                    (prov_type, m.opening, m.additions, m.closing)
6798                })
6799                .collect();
6800
6801            let enhanced_ctx = EnhancedNotesContext {
6802                entity_code: company.code.clone(),
6803                period: format!("FY{}", fiscal_year),
6804                currency: company.currency.clone(),
6805                // Inventory breakdown: best-effort using zero (would need balance tracker)
6806                finished_goods_value: rust_decimal::Decimal::ZERO,
6807                wip_value: rust_decimal::Decimal::ZERO,
6808                raw_materials_value: rust_decimal::Decimal::ZERO,
6809                debt_instruments,
6810                hedge_count,
6811                effective_hedges,
6812                total_notional,
6813                total_fair_value,
6814                provision_movements,
6815            };
6816
6817            let enhanced_notes =
6818                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6819            if !enhanced_notes.is_empty() {
6820                info!(
6821                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6822                    company.code,
6823                    enhanced_notes.len(),
6824                    enhanced_ctx.debt_instruments.len(),
6825                    hedge_count,
6826                    enhanced_ctx.provision_movements.len(),
6827                );
6828                financial_reporting
6829                    .notes_to_financial_statements
6830                    .extend(enhanced_notes);
6831            }
6832        }
6833    }
6834
6835    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6836    ///
6837    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6838    /// posted in the journal entries flows through to the trial balance, using the real
6839    /// GL account numbers from the CoA.
6840    fn build_trial_balance_from_entries(
6841        journal_entries: &[JournalEntry],
6842        coa: &ChartOfAccounts,
6843        company_code: &str,
6844        fiscal_year: u16,
6845        fiscal_period: u8,
6846    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6847        use rust_decimal::Decimal;
6848
6849        // Accumulate total debits and credits per GL account
6850        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6851        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6852
6853        for je in journal_entries {
6854            // Filter to matching company, fiscal year, and period
6855            if je.header.company_code != company_code
6856                || je.header.fiscal_year != fiscal_year
6857                || je.header.fiscal_period != fiscal_period
6858            {
6859                continue;
6860            }
6861
6862            for line in &je.lines {
6863                let acct = &line.gl_account;
6864                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6865                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6866            }
6867        }
6868
6869        // Build a TrialBalanceEntry for each account that had activity
6870        let mut all_accounts: Vec<&String> = account_debits
6871            .keys()
6872            .chain(account_credits.keys())
6873            .collect::<std::collections::HashSet<_>>()
6874            .into_iter()
6875            .collect();
6876        all_accounts.sort();
6877
6878        let mut entries = Vec::new();
6879
6880        for acct_number in all_accounts {
6881            let debit = account_debits
6882                .get(acct_number)
6883                .copied()
6884                .unwrap_or(Decimal::ZERO);
6885            let credit = account_credits
6886                .get(acct_number)
6887                .copied()
6888                .unwrap_or(Decimal::ZERO);
6889
6890            if debit.is_zero() && credit.is_zero() {
6891                continue;
6892            }
6893
6894            // Look up account name from CoA, fall back to "Account {code}"
6895            let account_name = coa
6896                .get_account(acct_number)
6897                .map(|gl| gl.short_description.clone())
6898                .unwrap_or_else(|| format!("Account {acct_number}"));
6899
6900            // Map account code prefix to the category strings expected by
6901            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6902            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6903            // OperatingExpenses).
6904            let category = Self::category_from_account_code(acct_number);
6905
6906            entries.push(datasynth_generators::TrialBalanceEntry {
6907                account_code: acct_number.clone(),
6908                account_name,
6909                category,
6910                debit_balance: debit,
6911                credit_balance: credit,
6912            });
6913        }
6914
6915        entries
6916    }
6917
6918    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6919    /// (and including) the given period end date.
6920    ///
6921    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6922    /// while income statement accounts (revenue, expenses) show only the current period.
6923    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6924    fn build_cumulative_trial_balance(
6925        journal_entries: &[JournalEntry],
6926        coa: &ChartOfAccounts,
6927        company_code: &str,
6928        start_date: NaiveDate,
6929        period_end: NaiveDate,
6930        fiscal_year: u16,
6931        fiscal_period: u8,
6932    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6933        use rust_decimal::Decimal;
6934
6935        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6936        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6937        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6938
6939        // Accumulate debits/credits for income statement accounts (current period only)
6940        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6941        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6942
6943        for je in journal_entries {
6944            if je.header.company_code != company_code {
6945                continue;
6946            }
6947
6948            for line in &je.lines {
6949                let acct = &line.gl_account;
6950                let category = Self::category_from_account_code(acct);
6951                let is_bs_account = matches!(
6952                    category.as_str(),
6953                    "Cash"
6954                        | "Receivables"
6955                        | "Inventory"
6956                        | "FixedAssets"
6957                        | "Payables"
6958                        | "AccruedLiabilities"
6959                        | "LongTermDebt"
6960                        | "Equity"
6961                );
6962
6963                if is_bs_account {
6964                    // Balance sheet: accumulate from start through period_end
6965                    if je.header.document_date <= period_end
6966                        && je.header.document_date >= start_date
6967                    {
6968                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6969                            line.debit_amount;
6970                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6971                            line.credit_amount;
6972                    }
6973                } else {
6974                    // Income statement: current period only
6975                    if je.header.fiscal_year == fiscal_year
6976                        && je.header.fiscal_period == fiscal_period
6977                    {
6978                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6979                            line.debit_amount;
6980                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6981                            line.credit_amount;
6982                    }
6983                }
6984            }
6985        }
6986
6987        // Merge all accounts
6988        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6989        all_accounts.extend(bs_debits.keys().cloned());
6990        all_accounts.extend(bs_credits.keys().cloned());
6991        all_accounts.extend(is_debits.keys().cloned());
6992        all_accounts.extend(is_credits.keys().cloned());
6993
6994        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6995        sorted_accounts.sort();
6996
6997        let mut entries = Vec::new();
6998
6999        for acct_number in &sorted_accounts {
7000            let category = Self::category_from_account_code(acct_number);
7001            let is_bs_account = matches!(
7002                category.as_str(),
7003                "Cash"
7004                    | "Receivables"
7005                    | "Inventory"
7006                    | "FixedAssets"
7007                    | "Payables"
7008                    | "AccruedLiabilities"
7009                    | "LongTermDebt"
7010                    | "Equity"
7011            );
7012
7013            let (debit, credit) = if is_bs_account {
7014                (
7015                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7016                    bs_credits
7017                        .get(acct_number)
7018                        .copied()
7019                        .unwrap_or(Decimal::ZERO),
7020                )
7021            } else {
7022                (
7023                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7024                    is_credits
7025                        .get(acct_number)
7026                        .copied()
7027                        .unwrap_or(Decimal::ZERO),
7028                )
7029            };
7030
7031            if debit.is_zero() && credit.is_zero() {
7032                continue;
7033            }
7034
7035            let account_name = coa
7036                .get_account(acct_number)
7037                .map(|gl| gl.short_description.clone())
7038                .unwrap_or_else(|| format!("Account {acct_number}"));
7039
7040            entries.push(datasynth_generators::TrialBalanceEntry {
7041                account_code: acct_number.clone(),
7042                account_name,
7043                category,
7044                debit_balance: debit,
7045                credit_balance: credit,
7046            });
7047        }
7048
7049        entries
7050    }
7051
7052    /// Build a JE-derived cash flow statement using the indirect method.
7053    ///
7054    /// Compares current and prior cumulative trial balances to derive working capital
7055    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7056    fn build_cash_flow_from_trial_balances(
7057        current_tb: &[datasynth_generators::TrialBalanceEntry],
7058        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7059        net_income: rust_decimal::Decimal,
7060    ) -> Vec<CashFlowItem> {
7061        use rust_decimal::Decimal;
7062
7063        // Helper: aggregate a TB by category and return net (debit - credit)
7064        let aggregate =
7065            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7066                let mut map: HashMap<String, Decimal> = HashMap::new();
7067                for entry in tb {
7068                    let net = entry.debit_balance - entry.credit_balance;
7069                    *map.entry(entry.category.clone()).or_default() += net;
7070                }
7071                map
7072            };
7073
7074        let current = aggregate(current_tb);
7075        let prior = prior_tb.map(aggregate);
7076
7077        // Get balance for a category, defaulting to zero
7078        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7079            *map.get(key).unwrap_or(&Decimal::ZERO)
7080        };
7081
7082        // Compute change: current - prior (or current if no prior)
7083        let change = |key: &str| -> Decimal {
7084            let curr = get(&current, key);
7085            match &prior {
7086                Some(p) => curr - get(p, key),
7087                None => curr,
7088            }
7089        };
7090
7091        // Operating activities (indirect method)
7092        // Depreciation add-back: approximate from FixedAssets decrease
7093        let fixed_asset_change = change("FixedAssets");
7094        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7095            -fixed_asset_change
7096        } else {
7097            Decimal::ZERO
7098        };
7099
7100        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7101        let ar_change = change("Receivables");
7102        let inventory_change = change("Inventory");
7103        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7104        let ap_change = change("Payables");
7105        let accrued_change = change("AccruedLiabilities");
7106
7107        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7108            + (-ap_change)
7109            + (-accrued_change);
7110
7111        // Investing activities
7112        let capex = if fixed_asset_change > Decimal::ZERO {
7113            -fixed_asset_change
7114        } else {
7115            Decimal::ZERO
7116        };
7117        let investing_cf = capex;
7118
7119        // Financing activities
7120        let debt_change = -change("LongTermDebt");
7121        let equity_change = -change("Equity");
7122        let financing_cf = debt_change + equity_change;
7123
7124        let net_change = operating_cf + investing_cf + financing_cf;
7125
7126        vec![
7127            CashFlowItem {
7128                item_code: "CF-NI".to_string(),
7129                label: "Net Income".to_string(),
7130                category: CashFlowCategory::Operating,
7131                amount: net_income,
7132                amount_prior: None,
7133                sort_order: 1,
7134                is_total: false,
7135            },
7136            CashFlowItem {
7137                item_code: "CF-DEP".to_string(),
7138                label: "Depreciation & Amortization".to_string(),
7139                category: CashFlowCategory::Operating,
7140                amount: depreciation_addback,
7141                amount_prior: None,
7142                sort_order: 2,
7143                is_total: false,
7144            },
7145            CashFlowItem {
7146                item_code: "CF-AR".to_string(),
7147                label: "Change in Accounts Receivable".to_string(),
7148                category: CashFlowCategory::Operating,
7149                amount: -ar_change,
7150                amount_prior: None,
7151                sort_order: 3,
7152                is_total: false,
7153            },
7154            CashFlowItem {
7155                item_code: "CF-AP".to_string(),
7156                label: "Change in Accounts Payable".to_string(),
7157                category: CashFlowCategory::Operating,
7158                amount: -ap_change,
7159                amount_prior: None,
7160                sort_order: 4,
7161                is_total: false,
7162            },
7163            CashFlowItem {
7164                item_code: "CF-INV".to_string(),
7165                label: "Change in Inventory".to_string(),
7166                category: CashFlowCategory::Operating,
7167                amount: -inventory_change,
7168                amount_prior: None,
7169                sort_order: 5,
7170                is_total: false,
7171            },
7172            CashFlowItem {
7173                item_code: "CF-OP".to_string(),
7174                label: "Net Cash from Operating Activities".to_string(),
7175                category: CashFlowCategory::Operating,
7176                amount: operating_cf,
7177                amount_prior: None,
7178                sort_order: 6,
7179                is_total: true,
7180            },
7181            CashFlowItem {
7182                item_code: "CF-CAPEX".to_string(),
7183                label: "Capital Expenditures".to_string(),
7184                category: CashFlowCategory::Investing,
7185                amount: capex,
7186                amount_prior: None,
7187                sort_order: 7,
7188                is_total: false,
7189            },
7190            CashFlowItem {
7191                item_code: "CF-INV-T".to_string(),
7192                label: "Net Cash from Investing Activities".to_string(),
7193                category: CashFlowCategory::Investing,
7194                amount: investing_cf,
7195                amount_prior: None,
7196                sort_order: 8,
7197                is_total: true,
7198            },
7199            CashFlowItem {
7200                item_code: "CF-DEBT".to_string(),
7201                label: "Net Borrowings / (Repayments)".to_string(),
7202                category: CashFlowCategory::Financing,
7203                amount: debt_change,
7204                amount_prior: None,
7205                sort_order: 9,
7206                is_total: false,
7207            },
7208            CashFlowItem {
7209                item_code: "CF-EQ".to_string(),
7210                label: "Equity Changes".to_string(),
7211                category: CashFlowCategory::Financing,
7212                amount: equity_change,
7213                amount_prior: None,
7214                sort_order: 10,
7215                is_total: false,
7216            },
7217            CashFlowItem {
7218                item_code: "CF-FIN-T".to_string(),
7219                label: "Net Cash from Financing Activities".to_string(),
7220                category: CashFlowCategory::Financing,
7221                amount: financing_cf,
7222                amount_prior: None,
7223                sort_order: 11,
7224                is_total: true,
7225            },
7226            CashFlowItem {
7227                item_code: "CF-NET".to_string(),
7228                label: "Net Change in Cash".to_string(),
7229                category: CashFlowCategory::Operating,
7230                amount: net_change,
7231                amount_prior: None,
7232                sort_order: 12,
7233                is_total: true,
7234            },
7235        ]
7236    }
7237
7238    /// Calculate net income from a set of trial balance entries.
7239    ///
7240    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7241    fn calculate_net_income_from_tb(
7242        tb: &[datasynth_generators::TrialBalanceEntry],
7243    ) -> rust_decimal::Decimal {
7244        use rust_decimal::Decimal;
7245
7246        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7247        for entry in tb {
7248            let net = entry.debit_balance - entry.credit_balance;
7249            *aggregated.entry(entry.category.clone()).or_default() += net;
7250        }
7251
7252        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7253        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7254        let opex = *aggregated
7255            .get("OperatingExpenses")
7256            .unwrap_or(&Decimal::ZERO);
7257        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7258        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7259
7260        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7261        // other_income is typically negative (credit), other_expenses is typically positive
7262        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7263        let tax_rate = Decimal::new(25, 2); // 0.25
7264        let tax = operating_income * tax_rate;
7265        operating_income - tax
7266    }
7267
7268    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7269    ///
7270    /// Uses the first two digits of the account code to classify into the categories
7271    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7272    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7273    /// OperatingExpenses, OtherIncome, OtherExpenses.
7274    fn category_from_account_code(code: &str) -> String {
7275        let prefix: String = code.chars().take(2).collect();
7276        match prefix.as_str() {
7277            "10" => "Cash",
7278            "11" => "Receivables",
7279            "12" | "13" | "14" => "Inventory",
7280            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7281            "20" => "Payables",
7282            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7283            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7284            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7285            "40" | "41" | "42" | "43" | "44" => "Revenue",
7286            "50" | "51" | "52" => "CostOfSales",
7287            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7288                "OperatingExpenses"
7289            }
7290            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7291            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7292            _ => "OperatingExpenses",
7293        }
7294        .to_string()
7295    }
7296
7297    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7298    fn phase_hr_data(
7299        &mut self,
7300        stats: &mut EnhancedGenerationStatistics,
7301    ) -> SynthResult<HrSnapshot> {
7302        if !self.phase_config.generate_hr {
7303            debug!("Phase 16: Skipped (HR generation disabled)");
7304            return Ok(HrSnapshot::default());
7305        }
7306
7307        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7308
7309        let seed = self.seed;
7310        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7311            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7312        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7313        let company_code = self
7314            .config
7315            .companies
7316            .first()
7317            .map(|c| c.code.as_str())
7318            .unwrap_or("1000");
7319        let currency = self
7320            .config
7321            .companies
7322            .first()
7323            .map(|c| c.currency.as_str())
7324            .unwrap_or("USD");
7325
7326        let employee_ids: Vec<String> = self
7327            .master_data
7328            .employees
7329            .iter()
7330            .map(|e| e.employee_id.clone())
7331            .collect();
7332
7333        if employee_ids.is_empty() {
7334            debug!("Phase 16: Skipped (no employees available)");
7335            return Ok(HrSnapshot::default());
7336        }
7337
7338        // Extract cost-center pool from master data employees for cross-reference
7339        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7340        let cost_center_ids: Vec<String> = self
7341            .master_data
7342            .employees
7343            .iter()
7344            .filter_map(|e| e.cost_center.clone())
7345            .collect::<std::collections::HashSet<_>>()
7346            .into_iter()
7347            .collect();
7348
7349        let mut snapshot = HrSnapshot::default();
7350
7351        // Generate payroll runs (one per month)
7352        if self.config.hr.payroll.enabled {
7353            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7354                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7355
7356            // Look up country pack for payroll deductions and labels
7357            let payroll_pack = self.primary_pack();
7358
7359            // Store the pack on the generator so generate() resolves
7360            // localized deduction rates and labels from it.
7361            payroll_gen.set_country_pack(payroll_pack.clone());
7362
7363            let employees_with_salary: Vec<(
7364                String,
7365                rust_decimal::Decimal,
7366                Option<String>,
7367                Option<String>,
7368            )> = self
7369                .master_data
7370                .employees
7371                .iter()
7372                .map(|e| {
7373                    // Use the employee's actual annual base salary.
7374                    // Fall back to $60,000 / yr if somehow zero.
7375                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7376                        e.base_salary
7377                    } else {
7378                        rust_decimal::Decimal::from(60_000)
7379                    };
7380                    (
7381                        e.employee_id.clone(),
7382                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7383                        e.cost_center.clone(),
7384                        e.department_id.clone(),
7385                    )
7386                })
7387                .collect();
7388
7389            // Use generate_with_changes when employee change history is available
7390            // so that salary adjustments, transfers, etc. are reflected in payroll.
7391            let change_history = &self.master_data.employee_change_history;
7392            let has_changes = !change_history.is_empty();
7393            if has_changes {
7394                debug!(
7395                    "Payroll will incorporate {} employee change events",
7396                    change_history.len()
7397                );
7398            }
7399
7400            for month in 0..self.config.global.period_months {
7401                let period_start = start_date + chrono::Months::new(month);
7402                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7403                let (run, items) = if has_changes {
7404                    payroll_gen.generate_with_changes(
7405                        company_code,
7406                        &employees_with_salary,
7407                        period_start,
7408                        period_end,
7409                        currency,
7410                        change_history,
7411                    )
7412                } else {
7413                    payroll_gen.generate(
7414                        company_code,
7415                        &employees_with_salary,
7416                        period_start,
7417                        period_end,
7418                        currency,
7419                    )
7420                };
7421                snapshot.payroll_runs.push(run);
7422                snapshot.payroll_run_count += 1;
7423                snapshot.payroll_line_item_count += items.len();
7424                snapshot.payroll_line_items.extend(items);
7425            }
7426        }
7427
7428        // Generate time entries
7429        if self.config.hr.time_attendance.enabled {
7430            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7431                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7432            // v3.4.2: when a temporal context is configured, time entries
7433            // respect holidays (not just weekends) and submitted_at lag
7434            // snaps to business days.
7435            if let Some(ctx) = &self.temporal_context {
7436                time_gen.set_temporal_context(Arc::clone(ctx));
7437            }
7438            let entries = time_gen.generate(
7439                &employee_ids,
7440                start_date,
7441                end_date,
7442                &self.config.hr.time_attendance,
7443            );
7444            snapshot.time_entry_count = entries.len();
7445            snapshot.time_entries = entries;
7446        }
7447
7448        // Generate expense reports
7449        if self.config.hr.expenses.enabled {
7450            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7451                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7452            expense_gen.set_country_pack(self.primary_pack().clone());
7453            // v3.4.2: snap submission / approval / paid / line-item dates
7454            // to business days when temporal_context is present.
7455            if let Some(ctx) = &self.temporal_context {
7456                expense_gen.set_temporal_context(Arc::clone(ctx));
7457            }
7458            let company_currency = self
7459                .config
7460                .companies
7461                .first()
7462                .map(|c| c.currency.as_str())
7463                .unwrap_or("USD");
7464            let reports = expense_gen.generate_with_currency(
7465                &employee_ids,
7466                start_date,
7467                end_date,
7468                &self.config.hr.expenses,
7469                company_currency,
7470            );
7471            snapshot.expense_report_count = reports.len();
7472            snapshot.expense_reports = reports;
7473        }
7474
7475        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7476        if self.config.hr.payroll.enabled {
7477            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7478            let employee_pairs: Vec<(String, String)> = self
7479                .master_data
7480                .employees
7481                .iter()
7482                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7483                .collect();
7484            let enrollments =
7485                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7486            snapshot.benefit_enrollment_count = enrollments.len();
7487            snapshot.benefit_enrollments = enrollments;
7488        }
7489
7490        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7491        if self.phase_config.generate_hr {
7492            let entity_name = self
7493                .config
7494                .companies
7495                .first()
7496                .map(|c| c.name.as_str())
7497                .unwrap_or("Entity");
7498            let period_months = self.config.global.period_months;
7499            let period_label = {
7500                let y = start_date.year();
7501                let m = start_date.month();
7502                if period_months >= 12 {
7503                    format!("FY{y}")
7504                } else {
7505                    format!("{y}-{m:02}")
7506                }
7507            };
7508            let reporting_date =
7509                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7510
7511            // Compute average annual salary from actual payroll data when available.
7512            // PayrollRun.total_gross covers all employees for one pay period; we sum
7513            // across all runs and divide by employee_count to get per-employee total,
7514            // then annualise for sub-annual periods.
7515            let avg_salary: Option<rust_decimal::Decimal> = {
7516                let employee_count = employee_ids.len();
7517                if self.config.hr.payroll.enabled
7518                    && employee_count > 0
7519                    && !snapshot.payroll_runs.is_empty()
7520                {
7521                    // Sum total gross pay across all payroll runs for this company
7522                    let total_gross: rust_decimal::Decimal = snapshot
7523                        .payroll_runs
7524                        .iter()
7525                        .filter(|r| r.company_code == company_code)
7526                        .map(|r| r.total_gross)
7527                        .sum();
7528                    if total_gross > rust_decimal::Decimal::ZERO {
7529                        // Annualise: total_gross covers `period_months` months of pay
7530                        let annual_total = if period_months > 0 && period_months < 12 {
7531                            total_gross * rust_decimal::Decimal::from(12u32)
7532                                / rust_decimal::Decimal::from(period_months)
7533                        } else {
7534                            total_gross
7535                        };
7536                        Some(
7537                            (annual_total / rust_decimal::Decimal::from(employee_count))
7538                                .round_dp(2),
7539                        )
7540                    } else {
7541                        None
7542                    }
7543                } else {
7544                    None
7545                }
7546            };
7547
7548            let mut pension_gen =
7549                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7550            let pension_snap = pension_gen.generate(
7551                company_code,
7552                entity_name,
7553                &period_label,
7554                reporting_date,
7555                employee_ids.len(),
7556                currency,
7557                avg_salary,
7558                period_months,
7559            );
7560            snapshot.pension_plan_count = pension_snap.plans.len();
7561            snapshot.pension_plans = pension_snap.plans;
7562            snapshot.pension_obligations = pension_snap.obligations;
7563            snapshot.pension_plan_assets = pension_snap.plan_assets;
7564            snapshot.pension_disclosures = pension_snap.disclosures;
7565            // Pension JEs are returned here so they can be added to entries
7566            // in the caller (stored temporarily on snapshot for transfer).
7567            // We embed them in the hr snapshot for simplicity; the orchestrator
7568            // will extract and extend `entries`.
7569            snapshot.pension_journal_entries = pension_snap.journal_entries;
7570        }
7571
7572        // Generate stock-based compensation (ASC 718 / IFRS 2)
7573        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7574            let period_months = self.config.global.period_months;
7575            let period_label = {
7576                let y = start_date.year();
7577                let m = start_date.month();
7578                if period_months >= 12 {
7579                    format!("FY{y}")
7580                } else {
7581                    format!("{y}-{m:02}")
7582                }
7583            };
7584            let reporting_date =
7585                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7586
7587            let mut stock_comp_gen =
7588                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7589            let stock_snap = stock_comp_gen.generate(
7590                company_code,
7591                &employee_ids,
7592                start_date,
7593                &period_label,
7594                reporting_date,
7595                currency,
7596            );
7597            snapshot.stock_grant_count = stock_snap.grants.len();
7598            snapshot.stock_grants = stock_snap.grants;
7599            snapshot.stock_comp_expenses = stock_snap.expenses;
7600            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7601        }
7602
7603        stats.payroll_run_count = snapshot.payroll_run_count;
7604        stats.time_entry_count = snapshot.time_entry_count;
7605        stats.expense_report_count = snapshot.expense_report_count;
7606        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7607        stats.pension_plan_count = snapshot.pension_plan_count;
7608        stats.stock_grant_count = snapshot.stock_grant_count;
7609
7610        info!(
7611            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7612            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7613            snapshot.time_entry_count, snapshot.expense_report_count,
7614            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7615            snapshot.stock_grant_count
7616        );
7617        self.check_resources_with_log("post-hr")?;
7618
7619        Ok(snapshot)
7620    }
7621
7622    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7623    fn phase_accounting_standards(
7624        &mut self,
7625        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7626        journal_entries: &[JournalEntry],
7627        stats: &mut EnhancedGenerationStatistics,
7628    ) -> SynthResult<AccountingStandardsSnapshot> {
7629        if !self.phase_config.generate_accounting_standards {
7630            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7631            return Ok(AccountingStandardsSnapshot::default());
7632        }
7633        info!("Phase 17: Generating Accounting Standards Data");
7634
7635        let seed = self.seed;
7636        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7637            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7638        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7639        let company_code = self
7640            .config
7641            .companies
7642            .first()
7643            .map(|c| c.code.as_str())
7644            .unwrap_or("1000");
7645        let currency = self
7646            .config
7647            .companies
7648            .first()
7649            .map(|c| c.currency.as_str())
7650            .unwrap_or("USD");
7651
7652        // Convert config framework to standards framework.
7653        // If the user explicitly set a framework in the YAML config, use that.
7654        // Otherwise, fall back to the country pack's accounting.framework field,
7655        // and if that is also absent or unrecognised, default to US GAAP.
7656        let framework = match self.config.accounting_standards.framework {
7657            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7658                datasynth_standards::framework::AccountingFramework::UsGaap
7659            }
7660            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7661                datasynth_standards::framework::AccountingFramework::Ifrs
7662            }
7663            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7664                datasynth_standards::framework::AccountingFramework::DualReporting
7665            }
7666            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7667                datasynth_standards::framework::AccountingFramework::FrenchGaap
7668            }
7669            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7670                datasynth_standards::framework::AccountingFramework::GermanGaap
7671            }
7672            None => {
7673                // Derive framework from the primary company's country pack
7674                let pack = self.primary_pack();
7675                let pack_fw = pack.accounting.framework.as_str();
7676                match pack_fw {
7677                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7678                    "dual_reporting" => {
7679                        datasynth_standards::framework::AccountingFramework::DualReporting
7680                    }
7681                    "french_gaap" => {
7682                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7683                    }
7684                    "german_gaap" | "hgb" => {
7685                        datasynth_standards::framework::AccountingFramework::GermanGaap
7686                    }
7687                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7688                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7689                }
7690            }
7691        };
7692
7693        let mut snapshot = AccountingStandardsSnapshot::default();
7694
7695        // Revenue recognition
7696        if self.config.accounting_standards.revenue_recognition.enabled {
7697            let customer_ids: Vec<String> = self
7698                .master_data
7699                .customers
7700                .iter()
7701                .map(|c| c.customer_id.clone())
7702                .collect();
7703
7704            if !customer_ids.is_empty() {
7705                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7706                let contracts = rev_gen.generate(
7707                    company_code,
7708                    &customer_ids,
7709                    start_date,
7710                    end_date,
7711                    currency,
7712                    &self.config.accounting_standards.revenue_recognition,
7713                    framework,
7714                );
7715                snapshot.revenue_contract_count = contracts.len();
7716                snapshot.contracts = contracts;
7717            }
7718        }
7719
7720        // Impairment testing
7721        if self.config.accounting_standards.impairment.enabled {
7722            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7723                .master_data
7724                .assets
7725                .iter()
7726                .map(|a| {
7727                    (
7728                        a.asset_id.clone(),
7729                        a.description.clone(),
7730                        a.acquisition_cost,
7731                    )
7732                })
7733                .collect();
7734
7735            if !asset_data.is_empty() {
7736                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7737                let tests = imp_gen.generate(
7738                    company_code,
7739                    &asset_data,
7740                    end_date,
7741                    &self.config.accounting_standards.impairment,
7742                    framework,
7743                );
7744                snapshot.impairment_test_count = tests.len();
7745                snapshot.impairment_tests = tests;
7746            }
7747        }
7748
7749        // Business combinations (IFRS 3 / ASC 805)
7750        if self
7751            .config
7752            .accounting_standards
7753            .business_combinations
7754            .enabled
7755        {
7756            let bc_config = &self.config.accounting_standards.business_combinations;
7757            let framework_str = match framework {
7758                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7759                _ => "US_GAAP",
7760            };
7761            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7762            let bc_snap = bc_gen.generate(
7763                company_code,
7764                currency,
7765                start_date,
7766                end_date,
7767                bc_config.acquisition_count,
7768                framework_str,
7769            );
7770            snapshot.business_combination_count = bc_snap.combinations.len();
7771            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7772            snapshot.business_combinations = bc_snap.combinations;
7773        }
7774
7775        // Expected Credit Loss (IFRS 9 / ASC 326)
7776        if self
7777            .config
7778            .accounting_standards
7779            .expected_credit_loss
7780            .enabled
7781        {
7782            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7783            let framework_str = match framework {
7784                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7785                _ => "ASC_326",
7786            };
7787
7788            // Use AR aging data from the subledger snapshot if available;
7789            // otherwise generate synthetic bucket exposures.
7790            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7791
7792            let mut ecl_gen = EclGenerator::new(seed + 43);
7793
7794            // Collect combined bucket totals across all company AR aging reports.
7795            let bucket_exposures: Vec<(
7796                datasynth_core::models::subledger::ar::AgingBucket,
7797                rust_decimal::Decimal,
7798            )> = if ar_aging_reports.is_empty() {
7799                // No AR aging data — synthesise plausible bucket exposures.
7800                use datasynth_core::models::subledger::ar::AgingBucket;
7801                vec![
7802                    (
7803                        AgingBucket::Current,
7804                        rust_decimal::Decimal::from(500_000_u32),
7805                    ),
7806                    (
7807                        AgingBucket::Days1To30,
7808                        rust_decimal::Decimal::from(120_000_u32),
7809                    ),
7810                    (
7811                        AgingBucket::Days31To60,
7812                        rust_decimal::Decimal::from(45_000_u32),
7813                    ),
7814                    (
7815                        AgingBucket::Days61To90,
7816                        rust_decimal::Decimal::from(15_000_u32),
7817                    ),
7818                    (
7819                        AgingBucket::Over90Days,
7820                        rust_decimal::Decimal::from(8_000_u32),
7821                    ),
7822                ]
7823            } else {
7824                use datasynth_core::models::subledger::ar::AgingBucket;
7825                // Sum bucket totals from all reports.
7826                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7827                    std::collections::HashMap::new();
7828                for report in ar_aging_reports {
7829                    for (bucket, amount) in &report.bucket_totals {
7830                        *totals.entry(*bucket).or_default() += amount;
7831                    }
7832                }
7833                AgingBucket::all()
7834                    .into_iter()
7835                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7836                    .collect()
7837            };
7838
7839            let ecl_snap = ecl_gen.generate(
7840                company_code,
7841                end_date,
7842                &bucket_exposures,
7843                ecl_config,
7844                &period_label,
7845                framework_str,
7846            );
7847
7848            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7849            snapshot.ecl_models = ecl_snap.ecl_models;
7850            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7851            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7852        }
7853
7854        // Provisions and contingencies (IAS 37 / ASC 450)
7855        {
7856            let framework_str = match framework {
7857                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7858                _ => "US_GAAP",
7859            };
7860
7861            // Compute actual revenue from the journal entries generated so far.
7862            // The `journal_entries` slice passed to this phase contains all GL entries
7863            // up to and including Period Close. Fall back to a minimum of 100_000 to
7864            // avoid degenerate zero-based provision amounts on first-period datasets.
7865            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7866                .max(rust_decimal::Decimal::from(100_000_u32));
7867
7868            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7869
7870            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7871            let prov_snap = prov_gen.generate(
7872                company_code,
7873                currency,
7874                revenue_proxy,
7875                end_date,
7876                &period_label,
7877                framework_str,
7878                None, // prior_opening: no carry-forward data in single-period runs
7879            );
7880
7881            snapshot.provision_count = prov_snap.provisions.len();
7882            snapshot.provisions = prov_snap.provisions;
7883            snapshot.provision_movements = prov_snap.movements;
7884            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7885            snapshot.provision_journal_entries = prov_snap.journal_entries;
7886        }
7887
7888        // IAS 21 Functional Currency Translation
7889        // For each company whose functional currency differs from the presentation
7890        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7891        {
7892            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7893
7894            let presentation_currency = self
7895                .config
7896                .global
7897                .presentation_currency
7898                .clone()
7899                .unwrap_or_else(|| self.config.global.group_currency.clone());
7900
7901            // Build a minimal rate table populated with approximate rates from
7902            // the FX model base rates (USD-based) so we can do the translation.
7903            let mut rate_table = FxRateTable::new(&presentation_currency);
7904
7905            // Populate with base rates against USD; if presentation_currency is
7906            // not USD we do a best-effort two-step conversion using the table's
7907            // triangulation support.
7908            let base_rates = base_rates_usd();
7909            for (ccy, rate) in &base_rates {
7910                rate_table.add_rate(FxRate::new(
7911                    ccy,
7912                    "USD",
7913                    RateType::Closing,
7914                    end_date,
7915                    *rate,
7916                    "SYNTHETIC",
7917                ));
7918                // Average rate = 98% of closing (approximation).
7919                // 0.98 = 98/100 = Decimal::new(98, 2)
7920                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7921                rate_table.add_rate(FxRate::new(
7922                    ccy,
7923                    "USD",
7924                    RateType::Average,
7925                    end_date,
7926                    avg,
7927                    "SYNTHETIC",
7928                ));
7929            }
7930
7931            let mut translation_results = Vec::new();
7932            for company in &self.config.companies {
7933                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7934                // to ensure the translation produces non-trivial CTA amounts.
7935                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7936                    .max(rust_decimal::Decimal::from(100_000_u32));
7937
7938                let func_ccy = company
7939                    .functional_currency
7940                    .clone()
7941                    .unwrap_or_else(|| company.currency.clone());
7942
7943                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7944                    &company.code,
7945                    &func_ccy,
7946                    &presentation_currency,
7947                    &ias21_period_label,
7948                    end_date,
7949                    company_revenue,
7950                    &rate_table,
7951                );
7952                translation_results.push(result);
7953            }
7954
7955            snapshot.currency_translation_count = translation_results.len();
7956            snapshot.currency_translation_results = translation_results;
7957        }
7958
7959        stats.revenue_contract_count = snapshot.revenue_contract_count;
7960        stats.impairment_test_count = snapshot.impairment_test_count;
7961        stats.business_combination_count = snapshot.business_combination_count;
7962        stats.ecl_model_count = snapshot.ecl_model_count;
7963        stats.provision_count = snapshot.provision_count;
7964
7965        // ------------------------------------------------------------
7966        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7967        // ------------------------------------------------------------
7968        if self.config.accounting_standards.leases.enabled {
7969            use datasynth_generators::standards::LeaseGenerator;
7970            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7971                .unwrap_or_else(|_| {
7972                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7973                });
7974            let framework =
7975                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7976            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7977            for company in &self.config.companies {
7978                let leases = lease_gen.generate(
7979                    &company.code,
7980                    start_date,
7981                    &self.config.accounting_standards.leases,
7982                    framework,
7983                );
7984                snapshot.lease_count += leases.len();
7985                snapshot.leases.extend(leases);
7986            }
7987            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7988        }
7989
7990        // ------------------------------------------------------------
7991        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7992        // ------------------------------------------------------------
7993        if self.config.accounting_standards.fair_value.enabled {
7994            use datasynth_generators::standards::FairValueGenerator;
7995            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7996                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7997                + chrono::Months::new(self.config.global.period_months);
7998            let framework =
7999                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8000            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8001            for company in &self.config.companies {
8002                let measurements = fv_gen.generate(
8003                    &company.code,
8004                    end_date,
8005                    &company.currency,
8006                    &self.config.accounting_standards.fair_value,
8007                    framework,
8008                );
8009                snapshot.fair_value_measurement_count += measurements.len();
8010                snapshot.fair_value_measurements.extend(measurements);
8011            }
8012            info!(
8013                "v3.3.1 fair value measurements: {}",
8014                snapshot.fair_value_measurement_count
8015            );
8016        }
8017
8018        // ------------------------------------------------------------
8019        // v3.3.1: Framework reconciliation (dual reporting only)
8020        // ------------------------------------------------------------
8021        if self.config.accounting_standards.generate_differences
8022            && matches!(
8023                self.config.accounting_standards.framework,
8024                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8025            )
8026        {
8027            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8028            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8029                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8030                + chrono::Months::new(self.config.global.period_months);
8031            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8032            for company in &self.config.companies {
8033                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8034                snapshot.framework_difference_count += records.len();
8035                snapshot.framework_differences.extend(records);
8036                snapshot.framework_reconciliations.push(reconciliation);
8037            }
8038            info!(
8039                "v3.3.1 framework reconciliation: {} differences across {} entities",
8040                snapshot.framework_difference_count,
8041                snapshot.framework_reconciliations.len()
8042            );
8043        }
8044
8045        info!(
8046            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8047            snapshot.revenue_contract_count,
8048            snapshot.impairment_test_count,
8049            snapshot.business_combination_count,
8050            snapshot.ecl_model_count,
8051            snapshot.provision_count,
8052            snapshot.currency_translation_count,
8053            snapshot.lease_count,
8054            snapshot.fair_value_measurement_count,
8055            snapshot.framework_difference_count,
8056        );
8057        self.check_resources_with_log("post-accounting-standards")?;
8058
8059        Ok(snapshot)
8060    }
8061
8062    /// v3.3.1: helper to resolve the accounting-standards framework enum
8063    /// from config into the `datasynth_standards::framework::AccountingFramework`
8064    /// type expected by standards generators. Falls back to US GAAP.
8065    fn resolve_accounting_framework(
8066        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8067    ) -> datasynth_standards::framework::AccountingFramework {
8068        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8069        use datasynth_standards::framework::AccountingFramework as Fw;
8070        match cfg {
8071            Some(Cfg::Ifrs) => Fw::Ifrs,
8072            Some(Cfg::DualReporting) => Fw::DualReporting,
8073            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8074            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8075            _ => Fw::UsGaap,
8076        }
8077    }
8078
8079    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8080    fn phase_manufacturing(
8081        &mut self,
8082        stats: &mut EnhancedGenerationStatistics,
8083    ) -> SynthResult<ManufacturingSnapshot> {
8084        if !self.phase_config.generate_manufacturing {
8085            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8086            return Ok(ManufacturingSnapshot::default());
8087        }
8088        info!("Phase 18: Generating Manufacturing Data");
8089
8090        let seed = self.seed;
8091        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8092            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8093        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8094        let company_code = self
8095            .config
8096            .companies
8097            .first()
8098            .map(|c| c.code.as_str())
8099            .unwrap_or("1000");
8100
8101        let material_data: Vec<(String, String)> = self
8102            .master_data
8103            .materials
8104            .iter()
8105            .map(|m| (m.material_id.clone(), m.description.clone()))
8106            .collect();
8107
8108        if material_data.is_empty() {
8109            debug!("Phase 18: Skipped (no materials available)");
8110            return Ok(ManufacturingSnapshot::default());
8111        }
8112
8113        let mut snapshot = ManufacturingSnapshot::default();
8114
8115        // Generate production orders
8116        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8117        // v3.4.3: snap planned / actual / operation dates to business days.
8118        if let Some(ctx) = &self.temporal_context {
8119            prod_gen.set_temporal_context(Arc::clone(ctx));
8120        }
8121        let production_orders = prod_gen.generate(
8122            company_code,
8123            &material_data,
8124            start_date,
8125            end_date,
8126            &self.config.manufacturing.production_orders,
8127            &self.config.manufacturing.costing,
8128            &self.config.manufacturing.routing,
8129        );
8130        snapshot.production_order_count = production_orders.len();
8131
8132        // Generate quality inspections from production orders
8133        let inspection_data: Vec<(String, String, String)> = production_orders
8134            .iter()
8135            .map(|po| {
8136                (
8137                    po.order_id.clone(),
8138                    po.material_id.clone(),
8139                    po.material_description.clone(),
8140                )
8141            })
8142            .collect();
8143
8144        snapshot.production_orders = production_orders;
8145
8146        if !inspection_data.is_empty() {
8147            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8148            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8149            snapshot.quality_inspection_count = inspections.len();
8150            snapshot.quality_inspections = inspections;
8151        }
8152
8153        // Generate cycle counts (one per month)
8154        let storage_locations: Vec<(String, String)> = material_data
8155            .iter()
8156            .enumerate()
8157            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8158            .collect();
8159
8160        let employee_ids: Vec<String> = self
8161            .master_data
8162            .employees
8163            .iter()
8164            .map(|e| e.employee_id.clone())
8165            .collect();
8166        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8167            .with_employee_pool(employee_ids);
8168        let mut cycle_count_total = 0usize;
8169        for month in 0..self.config.global.period_months {
8170            let count_date = start_date + chrono::Months::new(month);
8171            let items_per_count = storage_locations.len().clamp(10, 50);
8172            let cc = cc_gen.generate(
8173                company_code,
8174                &storage_locations,
8175                count_date,
8176                items_per_count,
8177            );
8178            snapshot.cycle_counts.push(cc);
8179            cycle_count_total += 1;
8180        }
8181        snapshot.cycle_count_count = cycle_count_total;
8182
8183        // Generate BOM components
8184        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8185        let bom_components = bom_gen.generate(company_code, &material_data);
8186        snapshot.bom_component_count = bom_components.len();
8187        snapshot.bom_components = bom_components;
8188
8189        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8190        let currency = self
8191            .config
8192            .companies
8193            .first()
8194            .map(|c| c.currency.as_str())
8195            .unwrap_or("USD");
8196        let production_order_ids: Vec<String> = snapshot
8197            .production_orders
8198            .iter()
8199            .map(|po| po.order_id.clone())
8200            .collect();
8201        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8202        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8203            company_code,
8204            &material_data,
8205            start_date,
8206            end_date,
8207            2,
8208            currency,
8209            &production_order_ids,
8210        );
8211        snapshot.inventory_movement_count = inventory_movements.len();
8212        snapshot.inventory_movements = inventory_movements;
8213
8214        stats.production_order_count = snapshot.production_order_count;
8215        stats.quality_inspection_count = snapshot.quality_inspection_count;
8216        stats.cycle_count_count = snapshot.cycle_count_count;
8217        stats.bom_component_count = snapshot.bom_component_count;
8218        stats.inventory_movement_count = snapshot.inventory_movement_count;
8219
8220        info!(
8221            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8222            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8223            snapshot.bom_component_count, snapshot.inventory_movement_count
8224        );
8225        self.check_resources_with_log("post-manufacturing")?;
8226
8227        Ok(snapshot)
8228    }
8229
8230    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8231    fn phase_sales_kpi_budgets(
8232        &mut self,
8233        coa: &Arc<ChartOfAccounts>,
8234        financial_reporting: &FinancialReportingSnapshot,
8235        stats: &mut EnhancedGenerationStatistics,
8236    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8237        if !self.phase_config.generate_sales_kpi_budgets {
8238            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8239            return Ok(SalesKpiBudgetsSnapshot::default());
8240        }
8241        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8242
8243        let seed = self.seed;
8244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8245            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8246        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8247        let company_code = self
8248            .config
8249            .companies
8250            .first()
8251            .map(|c| c.code.as_str())
8252            .unwrap_or("1000");
8253
8254        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8255
8256        // Sales Quotes
8257        if self.config.sales_quotes.enabled {
8258            let customer_data: Vec<(String, String)> = self
8259                .master_data
8260                .customers
8261                .iter()
8262                .map(|c| (c.customer_id.clone(), c.name.clone()))
8263                .collect();
8264            let material_data: Vec<(String, String)> = self
8265                .master_data
8266                .materials
8267                .iter()
8268                .map(|m| (m.material_id.clone(), m.description.clone()))
8269                .collect();
8270
8271            if !customer_data.is_empty() && !material_data.is_empty() {
8272                let employee_ids: Vec<String> = self
8273                    .master_data
8274                    .employees
8275                    .iter()
8276                    .map(|e| e.employee_id.clone())
8277                    .collect();
8278                let customer_ids: Vec<String> = self
8279                    .master_data
8280                    .customers
8281                    .iter()
8282                    .map(|c| c.customer_id.clone())
8283                    .collect();
8284                let company_currency = self
8285                    .config
8286                    .companies
8287                    .first()
8288                    .map(|c| c.currency.as_str())
8289                    .unwrap_or("USD");
8290
8291                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8292                    .with_pools(employee_ids, customer_ids);
8293                let quotes = quote_gen.generate_with_currency(
8294                    company_code,
8295                    &customer_data,
8296                    &material_data,
8297                    start_date,
8298                    end_date,
8299                    &self.config.sales_quotes,
8300                    company_currency,
8301                );
8302                snapshot.sales_quote_count = quotes.len();
8303                snapshot.sales_quotes = quotes;
8304            }
8305        }
8306
8307        // Management KPIs
8308        if self.config.financial_reporting.management_kpis.enabled {
8309            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8310            let mut kpis = kpi_gen.generate(
8311                company_code,
8312                start_date,
8313                end_date,
8314                &self.config.financial_reporting.management_kpis,
8315            );
8316
8317            // Override financial KPIs with actual data from financial statements
8318            {
8319                use rust_decimal::Decimal;
8320
8321                if let Some(income_stmt) =
8322                    financial_reporting.financial_statements.iter().find(|fs| {
8323                        fs.statement_type == StatementType::IncomeStatement
8324                            && fs.company_code == company_code
8325                    })
8326                {
8327                    // Extract revenue and COGS from income statement line items
8328                    let total_revenue: Decimal = income_stmt
8329                        .line_items
8330                        .iter()
8331                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8332                        .map(|li| li.amount)
8333                        .sum();
8334                    let total_cogs: Decimal = income_stmt
8335                        .line_items
8336                        .iter()
8337                        .filter(|li| {
8338                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8339                                && !li.is_total
8340                        })
8341                        .map(|li| li.amount.abs())
8342                        .sum();
8343                    let total_opex: Decimal = income_stmt
8344                        .line_items
8345                        .iter()
8346                        .filter(|li| {
8347                            li.section.contains("Expense")
8348                                && !li.is_total
8349                                && !li.section.contains("Cost")
8350                        })
8351                        .map(|li| li.amount.abs())
8352                        .sum();
8353
8354                    if total_revenue > Decimal::ZERO {
8355                        let hundred = Decimal::from(100);
8356                        let gross_margin_pct =
8357                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8358                        let operating_income = total_revenue - total_cogs - total_opex;
8359                        let op_margin_pct =
8360                            (operating_income * hundred / total_revenue).round_dp(2);
8361
8362                        // Override gross margin and operating margin KPIs
8363                        for kpi in &mut kpis {
8364                            if kpi.name == "Gross Margin" {
8365                                kpi.value = gross_margin_pct;
8366                            } else if kpi.name == "Operating Margin" {
8367                                kpi.value = op_margin_pct;
8368                            }
8369                        }
8370                    }
8371                }
8372
8373                // Override Current Ratio from balance sheet
8374                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8375                    fs.statement_type == StatementType::BalanceSheet
8376                        && fs.company_code == company_code
8377                }) {
8378                    let current_assets: Decimal = bs
8379                        .line_items
8380                        .iter()
8381                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8382                        .map(|li| li.amount)
8383                        .sum();
8384                    let current_liabilities: Decimal = bs
8385                        .line_items
8386                        .iter()
8387                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8388                        .map(|li| li.amount.abs())
8389                        .sum();
8390
8391                    if current_liabilities > Decimal::ZERO {
8392                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8393                        for kpi in &mut kpis {
8394                            if kpi.name == "Current Ratio" {
8395                                kpi.value = current_ratio;
8396                            }
8397                        }
8398                    }
8399                }
8400            }
8401
8402            snapshot.kpi_count = kpis.len();
8403            snapshot.kpis = kpis;
8404        }
8405
8406        // Budgets
8407        if self.config.financial_reporting.budgets.enabled {
8408            let account_data: Vec<(String, String)> = coa
8409                .accounts
8410                .iter()
8411                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8412                .collect();
8413
8414            if !account_data.is_empty() {
8415                let fiscal_year = start_date.year() as u32;
8416                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8417                let budget = budget_gen.generate(
8418                    company_code,
8419                    fiscal_year,
8420                    &account_data,
8421                    &self.config.financial_reporting.budgets,
8422                );
8423                snapshot.budget_line_count = budget.line_items.len();
8424                snapshot.budgets.push(budget);
8425            }
8426        }
8427
8428        stats.sales_quote_count = snapshot.sales_quote_count;
8429        stats.kpi_count = snapshot.kpi_count;
8430        stats.budget_line_count = snapshot.budget_line_count;
8431
8432        info!(
8433            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8434            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8435        );
8436        self.check_resources_with_log("post-sales-kpi-budgets")?;
8437
8438        Ok(snapshot)
8439    }
8440
8441    /// Compute pre-tax income for a single company from actual journal entries.
8442    ///
8443    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8444    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8445    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8446    /// and the period-close engine so that all three use a consistent definition.
8447    fn compute_pre_tax_income(
8448        company_code: &str,
8449        journal_entries: &[JournalEntry],
8450    ) -> rust_decimal::Decimal {
8451        use datasynth_core::accounts::AccountCategory;
8452        use rust_decimal::Decimal;
8453
8454        let mut total_revenue = Decimal::ZERO;
8455        let mut total_expenses = Decimal::ZERO;
8456
8457        for je in journal_entries {
8458            if je.header.company_code != company_code {
8459                continue;
8460            }
8461            for line in &je.lines {
8462                let cat = AccountCategory::from_account(&line.gl_account);
8463                match cat {
8464                    AccountCategory::Revenue => {
8465                        total_revenue += line.credit_amount - line.debit_amount;
8466                    }
8467                    AccountCategory::Cogs
8468                    | AccountCategory::OperatingExpense
8469                    | AccountCategory::OtherIncomeExpense => {
8470                        total_expenses += line.debit_amount - line.credit_amount;
8471                    }
8472                    _ => {}
8473                }
8474            }
8475        }
8476
8477        let pti = (total_revenue - total_expenses).round_dp(2);
8478        if pti == rust_decimal::Decimal::ZERO {
8479            // No income statement activity yet — fall back to a synthetic value so the
8480            // tax provision generator can still produce meaningful output.
8481            rust_decimal::Decimal::from(1_000_000u32)
8482        } else {
8483            pti
8484        }
8485    }
8486
8487    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8488    fn phase_tax_generation(
8489        &mut self,
8490        document_flows: &DocumentFlowSnapshot,
8491        journal_entries: &[JournalEntry],
8492        stats: &mut EnhancedGenerationStatistics,
8493    ) -> SynthResult<TaxSnapshot> {
8494        if !self.phase_config.generate_tax {
8495            debug!("Phase 20: Skipped (tax generation disabled)");
8496            return Ok(TaxSnapshot::default());
8497        }
8498        info!("Phase 20: Generating Tax Data");
8499
8500        let seed = self.seed;
8501        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8502            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8503        let fiscal_year = start_date.year();
8504        let company_code = self
8505            .config
8506            .companies
8507            .first()
8508            .map(|c| c.code.as_str())
8509            .unwrap_or("1000");
8510
8511        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8512            seed + 370,
8513            self.config.tax.clone(),
8514        );
8515
8516        let pack = self.primary_pack().clone();
8517        let (jurisdictions, codes) =
8518            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8519
8520        // Generate tax provisions for each company
8521        let mut provisions = Vec::new();
8522        if self.config.tax.provisions.enabled {
8523            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8524            for company in &self.config.companies {
8525                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8526                let statutory_rate = rust_decimal::Decimal::new(
8527                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8528                    2,
8529                );
8530                let provision = provision_gen.generate(
8531                    &company.code,
8532                    start_date,
8533                    pre_tax_income,
8534                    statutory_rate,
8535                );
8536                provisions.push(provision);
8537            }
8538        }
8539
8540        // Generate tax lines from document invoices
8541        let mut tax_lines = Vec::new();
8542        if !codes.is_empty() {
8543            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8544                datasynth_generators::TaxLineGeneratorConfig::default(),
8545                codes.clone(),
8546                seed + 372,
8547            );
8548
8549            // Tax lines from vendor invoices (input tax)
8550            // Use the first company's country as buyer country
8551            let buyer_country = self
8552                .config
8553                .companies
8554                .first()
8555                .map(|c| c.country.as_str())
8556                .unwrap_or("US");
8557            for vi in &document_flows.vendor_invoices {
8558                let lines = tax_line_gen.generate_for_document(
8559                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8560                    &vi.header.document_id,
8561                    buyer_country, // seller approx same country
8562                    buyer_country,
8563                    vi.payable_amount,
8564                    vi.header.document_date,
8565                    None,
8566                );
8567                tax_lines.extend(lines);
8568            }
8569
8570            // Tax lines from customer invoices (output tax)
8571            for ci in &document_flows.customer_invoices {
8572                let lines = tax_line_gen.generate_for_document(
8573                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8574                    &ci.header.document_id,
8575                    buyer_country, // seller is the company
8576                    buyer_country,
8577                    ci.total_gross_amount,
8578                    ci.header.document_date,
8579                    None,
8580                );
8581                tax_lines.extend(lines);
8582            }
8583        }
8584
8585        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8586        let deferred_tax = {
8587            let companies: Vec<(&str, &str)> = self
8588                .config
8589                .companies
8590                .iter()
8591                .map(|c| (c.code.as_str(), c.country.as_str()))
8592                .collect();
8593            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8594            deferred_gen.generate(&companies, start_date, journal_entries)
8595        };
8596
8597        // Build a document_id → posting_date map so each tax JE uses its
8598        // source document's date rather than a blanket period-end date.
8599        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8600            std::collections::HashMap::new();
8601        for vi in &document_flows.vendor_invoices {
8602            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8603        }
8604        for ci in &document_flows.customer_invoices {
8605            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8606        }
8607
8608        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8609        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8611            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8612                &tax_lines,
8613                company_code,
8614                &doc_dates,
8615                end_date,
8616            );
8617            debug!("Generated {} tax posting JEs", jes.len());
8618            jes
8619        } else {
8620            Vec::new()
8621        };
8622
8623        let snapshot = TaxSnapshot {
8624            jurisdiction_count: jurisdictions.len(),
8625            code_count: codes.len(),
8626            jurisdictions,
8627            codes,
8628            tax_provisions: provisions,
8629            tax_lines,
8630            tax_returns: Vec::new(),
8631            withholding_records: Vec::new(),
8632            tax_anomaly_labels: Vec::new(),
8633            deferred_tax,
8634            tax_posting_journal_entries,
8635        };
8636
8637        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8638        stats.tax_code_count = snapshot.code_count;
8639        stats.tax_provision_count = snapshot.tax_provisions.len();
8640        stats.tax_line_count = snapshot.tax_lines.len();
8641
8642        info!(
8643            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8644            snapshot.jurisdiction_count,
8645            snapshot.code_count,
8646            snapshot.tax_provisions.len(),
8647            snapshot.deferred_tax.temporary_differences.len(),
8648            snapshot.deferred_tax.journal_entries.len(),
8649            snapshot.tax_posting_journal_entries.len(),
8650        );
8651        self.check_resources_with_log("post-tax")?;
8652
8653        Ok(snapshot)
8654    }
8655
8656    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8657    fn phase_esg_generation(
8658        &mut self,
8659        document_flows: &DocumentFlowSnapshot,
8660        manufacturing: &ManufacturingSnapshot,
8661        stats: &mut EnhancedGenerationStatistics,
8662    ) -> SynthResult<EsgSnapshot> {
8663        if !self.phase_config.generate_esg {
8664            debug!("Phase 21: Skipped (ESG generation disabled)");
8665            return Ok(EsgSnapshot::default());
8666        }
8667        let degradation = self.check_resources()?;
8668        if degradation >= DegradationLevel::Reduced {
8669            debug!(
8670                "Phase skipped due to resource pressure (degradation: {:?})",
8671                degradation
8672            );
8673            return Ok(EsgSnapshot::default());
8674        }
8675        info!("Phase 21: Generating ESG Data");
8676
8677        let seed = self.seed;
8678        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8679            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8680        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8681        let entity_id = self
8682            .config
8683            .companies
8684            .first()
8685            .map(|c| c.code.as_str())
8686            .unwrap_or("1000");
8687
8688        let esg_cfg = &self.config.esg;
8689        let mut snapshot = EsgSnapshot::default();
8690
8691        // Energy consumption (feeds into scope 1 & 2 emissions)
8692        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8693            esg_cfg.environmental.energy.clone(),
8694            seed + 80,
8695        );
8696        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8697
8698        // Water usage
8699        let facility_count = esg_cfg.environmental.energy.facility_count;
8700        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8701        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8702
8703        // Waste
8704        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8705            seed + 82,
8706            esg_cfg.environmental.waste.diversion_target,
8707            facility_count,
8708        );
8709        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8710
8711        // Emissions (scope 1, 2, 3)
8712        let mut emission_gen =
8713            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8714
8715        // Build EnergyInput from energy_records
8716        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8717            .iter()
8718            .map(|e| datasynth_generators::EnergyInput {
8719                facility_id: e.facility_id.clone(),
8720                energy_type: match e.energy_source {
8721                    EnergySourceType::NaturalGas => {
8722                        datasynth_generators::EnergyInputType::NaturalGas
8723                    }
8724                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8725                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8726                    _ => datasynth_generators::EnergyInputType::Electricity,
8727                },
8728                consumption_kwh: e.consumption_kwh,
8729                period: e.period,
8730            })
8731            .collect();
8732
8733        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8734        if !manufacturing.production_orders.is_empty() {
8735            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8736                &manufacturing.production_orders,
8737                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8738                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8739            );
8740            if !mfg_energy.is_empty() {
8741                info!(
8742                    "ESG: {} energy inputs derived from {} production orders",
8743                    mfg_energy.len(),
8744                    manufacturing.production_orders.len(),
8745                );
8746                energy_inputs.extend(mfg_energy);
8747            }
8748        }
8749
8750        let mut emissions = Vec::new();
8751        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8752        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8753
8754        // Scope 3: use vendor spend data from actual payments
8755        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8756            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8757            for payment in &document_flows.payments {
8758                if payment.is_vendor {
8759                    *totals
8760                        .entry(payment.business_partner_id.clone())
8761                        .or_default() += payment.amount;
8762                }
8763            }
8764            totals
8765        };
8766        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8767            .master_data
8768            .vendors
8769            .iter()
8770            .map(|v| {
8771                let spend = vendor_payment_totals
8772                    .get(&v.vendor_id)
8773                    .copied()
8774                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8775                datasynth_generators::VendorSpendInput {
8776                    vendor_id: v.vendor_id.clone(),
8777                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8778                    spend,
8779                    country: v.country.clone(),
8780                }
8781            })
8782            .collect();
8783        if !vendor_spend.is_empty() {
8784            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8785                entity_id,
8786                &vendor_spend,
8787                start_date,
8788                end_date,
8789            ));
8790        }
8791
8792        // Business travel & commuting (scope 3)
8793        let headcount = self.master_data.employees.len() as u32;
8794        if headcount > 0 {
8795            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8796            emissions.extend(emission_gen.generate_scope3_business_travel(
8797                entity_id,
8798                travel_spend,
8799                start_date,
8800            ));
8801            emissions
8802                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8803        }
8804
8805        snapshot.emission_count = emissions.len();
8806        snapshot.emissions = emissions;
8807        snapshot.energy = energy_records;
8808
8809        // Social: Workforce diversity, pay equity, safety
8810        let mut workforce_gen =
8811            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8812        let total_headcount = headcount.max(100);
8813        snapshot.diversity =
8814            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8815        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8816
8817        // v2.4: Derive additional workforce diversity metrics from actual employee data
8818        if !self.master_data.employees.is_empty() {
8819            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8820                entity_id,
8821                &self.master_data.employees,
8822                end_date,
8823            );
8824            if !hr_diversity.is_empty() {
8825                info!(
8826                    "ESG: {} diversity metrics derived from {} actual employees",
8827                    hr_diversity.len(),
8828                    self.master_data.employees.len(),
8829                );
8830                snapshot.diversity.extend(hr_diversity);
8831            }
8832        }
8833
8834        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8835            entity_id,
8836            facility_count,
8837            start_date,
8838            end_date,
8839        );
8840
8841        // Compute safety metrics
8842        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8843        let safety_metric = workforce_gen.compute_safety_metrics(
8844            entity_id,
8845            &snapshot.safety_incidents,
8846            total_hours,
8847            start_date,
8848        );
8849        snapshot.safety_metrics = vec![safety_metric];
8850
8851        // Governance
8852        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8853            seed + 85,
8854            esg_cfg.governance.board_size,
8855            esg_cfg.governance.independence_target,
8856        );
8857        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8858
8859        // Supplier ESG assessments
8860        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8861            esg_cfg.supply_chain_esg.clone(),
8862            seed + 86,
8863        );
8864        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8865            .master_data
8866            .vendors
8867            .iter()
8868            .map(|v| datasynth_generators::VendorInput {
8869                vendor_id: v.vendor_id.clone(),
8870                country: v.country.clone(),
8871                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8872                quality_score: None,
8873            })
8874            .collect();
8875        snapshot.supplier_assessments =
8876            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8877
8878        // Disclosures
8879        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8880            seed + 87,
8881            esg_cfg.reporting.clone(),
8882            esg_cfg.climate_scenarios.clone(),
8883        );
8884        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8885        snapshot.disclosures = disclosure_gen.generate_disclosures(
8886            entity_id,
8887            &snapshot.materiality,
8888            start_date,
8889            end_date,
8890        );
8891        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8892        snapshot.disclosure_count = snapshot.disclosures.len();
8893
8894        // Anomaly injection
8895        if esg_cfg.anomaly_rate > 0.0 {
8896            let mut anomaly_injector =
8897                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8898            let mut labels = Vec::new();
8899            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8900            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8901            labels.extend(
8902                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8903            );
8904            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8905            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8906            snapshot.anomaly_labels = labels;
8907        }
8908
8909        stats.esg_emission_count = snapshot.emission_count;
8910        stats.esg_disclosure_count = snapshot.disclosure_count;
8911
8912        info!(
8913            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8914            snapshot.emission_count,
8915            snapshot.disclosure_count,
8916            snapshot.supplier_assessments.len()
8917        );
8918        self.check_resources_with_log("post-esg")?;
8919
8920        Ok(snapshot)
8921    }
8922
8923    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8924    fn phase_treasury_data(
8925        &mut self,
8926        document_flows: &DocumentFlowSnapshot,
8927        subledger: &SubledgerSnapshot,
8928        intercompany: &IntercompanySnapshot,
8929        stats: &mut EnhancedGenerationStatistics,
8930    ) -> SynthResult<TreasurySnapshot> {
8931        if !self.phase_config.generate_treasury {
8932            debug!("Phase 22: Skipped (treasury generation disabled)");
8933            return Ok(TreasurySnapshot::default());
8934        }
8935        let degradation = self.check_resources()?;
8936        if degradation >= DegradationLevel::Reduced {
8937            debug!(
8938                "Phase skipped due to resource pressure (degradation: {:?})",
8939                degradation
8940            );
8941            return Ok(TreasurySnapshot::default());
8942        }
8943        info!("Phase 22: Generating Treasury Data");
8944
8945        let seed = self.seed;
8946        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948        let currency = self
8949            .config
8950            .companies
8951            .first()
8952            .map(|c| c.currency.as_str())
8953            .unwrap_or("USD");
8954        let entity_id = self
8955            .config
8956            .companies
8957            .first()
8958            .map(|c| c.code.as_str())
8959            .unwrap_or("1000");
8960
8961        let mut snapshot = TreasurySnapshot::default();
8962
8963        // Generate debt instruments
8964        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8965            self.config.treasury.debt.clone(),
8966            seed + 90,
8967        );
8968        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8969
8970        // Generate hedging instruments (IR swaps for floating-rate debt)
8971        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8972            self.config.treasury.hedging.clone(),
8973            seed + 91,
8974        );
8975        for debt in &snapshot.debt_instruments {
8976            if debt.rate_type == InterestRateType::Variable {
8977                let swap = hedge_gen.generate_ir_swap(
8978                    currency,
8979                    debt.principal,
8980                    debt.origination_date,
8981                    debt.maturity_date,
8982                );
8983                snapshot.hedging_instruments.push(swap);
8984            }
8985        }
8986
8987        // Build FX exposures from foreign-currency payments and generate
8988        // FX forwards + hedge relationship designations via generate() API.
8989        {
8990            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8991            for payment in &document_flows.payments {
8992                if payment.currency != currency {
8993                    let entry = fx_map
8994                        .entry(payment.currency.clone())
8995                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8996                    entry.0 += payment.amount;
8997                    // Use the latest settlement date among grouped payments
8998                    if payment.header.document_date > entry.1 {
8999                        entry.1 = payment.header.document_date;
9000                    }
9001                }
9002            }
9003            if !fx_map.is_empty() {
9004                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9005                    .into_iter()
9006                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9007                        datasynth_generators::treasury::FxExposure {
9008                            currency_pair: format!("{foreign_ccy}/{currency}"),
9009                            foreign_currency: foreign_ccy,
9010                            net_amount,
9011                            settlement_date,
9012                            description: "AP payment FX exposure".to_string(),
9013                        }
9014                    })
9015                    .collect();
9016                let (fx_instruments, fx_relationships) =
9017                    hedge_gen.generate(start_date, &fx_exposures);
9018                snapshot.hedging_instruments.extend(fx_instruments);
9019                snapshot.hedge_relationships.extend(fx_relationships);
9020            }
9021        }
9022
9023        // Inject anomalies if configured
9024        if self.config.treasury.anomaly_rate > 0.0 {
9025            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9026                seed + 92,
9027                self.config.treasury.anomaly_rate,
9028            );
9029            let mut labels = Vec::new();
9030            labels.extend(
9031                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9032            );
9033            snapshot.treasury_anomaly_labels = labels;
9034        }
9035
9036        // Generate cash positions from payment flows
9037        if self.config.treasury.cash_positioning.enabled {
9038            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9039
9040            // AP payments as outflows
9041            for payment in &document_flows.payments {
9042                cash_flows.push(datasynth_generators::treasury::CashFlow {
9043                    date: payment.header.document_date,
9044                    account_id: format!("{entity_id}-MAIN"),
9045                    amount: payment.amount,
9046                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9047                });
9048            }
9049
9050            // Customer receipts (from O2C chains) as inflows
9051            for chain in &document_flows.o2c_chains {
9052                if let Some(ref receipt) = chain.customer_receipt {
9053                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9054                        date: receipt.header.document_date,
9055                        account_id: format!("{entity_id}-MAIN"),
9056                        amount: receipt.amount,
9057                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9058                    });
9059                }
9060                // Remainder receipts (follow-up to partial payments)
9061                for receipt in &chain.remainder_receipts {
9062                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9063                        date: receipt.header.document_date,
9064                        account_id: format!("{entity_id}-MAIN"),
9065                        amount: receipt.amount,
9066                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9067                    });
9068                }
9069            }
9070
9071            if !cash_flows.is_empty() {
9072                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9073                    self.config.treasury.cash_positioning.clone(),
9074                    seed + 93,
9075                );
9076                let account_id = format!("{entity_id}-MAIN");
9077                snapshot.cash_positions = cash_gen.generate(
9078                    entity_id,
9079                    &account_id,
9080                    currency,
9081                    &cash_flows,
9082                    start_date,
9083                    start_date + chrono::Months::new(self.config.global.period_months),
9084                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9085                );
9086            }
9087        }
9088
9089        // Generate cash forecasts from AR/AP aging
9090        if self.config.treasury.cash_forecasting.enabled {
9091            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9092
9093            // Build AR aging items from subledger AR invoices
9094            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9095                .ar_invoices
9096                .iter()
9097                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9098                .map(|inv| {
9099                    let days_past_due = if inv.due_date < end_date {
9100                        (end_date - inv.due_date).num_days().max(0) as u32
9101                    } else {
9102                        0
9103                    };
9104                    datasynth_generators::treasury::ArAgingItem {
9105                        expected_date: inv.due_date,
9106                        amount: inv.amount_remaining,
9107                        days_past_due,
9108                        document_id: inv.invoice_number.clone(),
9109                    }
9110                })
9111                .collect();
9112
9113            // Build AP aging items from subledger AP invoices
9114            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9115                .ap_invoices
9116                .iter()
9117                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9118                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9119                    payment_date: inv.due_date,
9120                    amount: inv.amount_remaining,
9121                    document_id: inv.invoice_number.clone(),
9122                })
9123                .collect();
9124
9125            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9126                self.config.treasury.cash_forecasting.clone(),
9127                seed + 94,
9128            );
9129            let forecast = forecast_gen.generate(
9130                entity_id,
9131                currency,
9132                end_date,
9133                &ar_items,
9134                &ap_items,
9135                &[], // scheduled disbursements - empty for now
9136            );
9137            snapshot.cash_forecasts.push(forecast);
9138        }
9139
9140        // Generate cash pools and sweeps
9141        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9142            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9143            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9144                self.config.treasury.cash_pooling.clone(),
9145                seed + 95,
9146            );
9147
9148            // Create a pool from available accounts
9149            let account_ids: Vec<String> = snapshot
9150                .cash_positions
9151                .iter()
9152                .map(|cp| cp.bank_account_id.clone())
9153                .collect::<std::collections::HashSet<_>>()
9154                .into_iter()
9155                .collect();
9156
9157            if let Some(pool) =
9158                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9159            {
9160                // Generate sweeps - build participant balances from last cash position per account
9161                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9162                for cp in &snapshot.cash_positions {
9163                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9164                }
9165
9166                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9167                    latest_balances
9168                        .into_iter()
9169                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9170                        .map(
9171                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9172                                account_id: id,
9173                                balance,
9174                            },
9175                        )
9176                        .collect();
9177
9178                let sweeps =
9179                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9180                snapshot.cash_pool_sweeps = sweeps;
9181                snapshot.cash_pools.push(pool);
9182            }
9183        }
9184
9185        // Generate bank guarantees
9186        if self.config.treasury.bank_guarantees.enabled {
9187            let vendor_names: Vec<String> = self
9188                .master_data
9189                .vendors
9190                .iter()
9191                .map(|v| v.name.clone())
9192                .collect();
9193            if !vendor_names.is_empty() {
9194                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9195                    self.config.treasury.bank_guarantees.clone(),
9196                    seed + 96,
9197                );
9198                snapshot.bank_guarantees =
9199                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9200            }
9201        }
9202
9203        // Generate netting runs from intercompany matched pairs
9204        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9205            let entity_ids: Vec<String> = self
9206                .config
9207                .companies
9208                .iter()
9209                .map(|c| c.code.clone())
9210                .collect();
9211            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9212                .matched_pairs
9213                .iter()
9214                .map(|mp| {
9215                    (
9216                        mp.seller_company.clone(),
9217                        mp.buyer_company.clone(),
9218                        mp.amount,
9219                    )
9220                })
9221                .collect();
9222            if entity_ids.len() >= 2 {
9223                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9224                    self.config.treasury.netting.clone(),
9225                    seed + 97,
9226                );
9227                snapshot.netting_runs = netting_gen.generate(
9228                    &entity_ids,
9229                    currency,
9230                    start_date,
9231                    self.config.global.period_months,
9232                    &ic_amounts,
9233                );
9234            }
9235        }
9236
9237        // Generate treasury journal entries from the instruments we just created.
9238        {
9239            use datasynth_generators::treasury::TreasuryAccounting;
9240
9241            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9242            let mut treasury_jes = Vec::new();
9243
9244            // Debt interest accrual JEs
9245            if !snapshot.debt_instruments.is_empty() {
9246                let debt_jes =
9247                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9248                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9249                treasury_jes.extend(debt_jes);
9250            }
9251
9252            // Hedge mark-to-market JEs
9253            if !snapshot.hedging_instruments.is_empty() {
9254                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9255                    &snapshot.hedging_instruments,
9256                    &snapshot.hedge_relationships,
9257                    end_date,
9258                    entity_id,
9259                );
9260                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9261                treasury_jes.extend(hedge_jes);
9262            }
9263
9264            // Cash pool sweep JEs
9265            if !snapshot.cash_pool_sweeps.is_empty() {
9266                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9267                    &snapshot.cash_pool_sweeps,
9268                    entity_id,
9269                );
9270                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9271                treasury_jes.extend(sweep_jes);
9272            }
9273
9274            if !treasury_jes.is_empty() {
9275                debug!("Total treasury journal entries: {}", treasury_jes.len());
9276            }
9277            snapshot.journal_entries = treasury_jes;
9278        }
9279
9280        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9281        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9282        stats.cash_position_count = snapshot.cash_positions.len();
9283        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9284        stats.cash_pool_count = snapshot.cash_pools.len();
9285
9286        info!(
9287            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9288            snapshot.debt_instruments.len(),
9289            snapshot.hedging_instruments.len(),
9290            snapshot.cash_positions.len(),
9291            snapshot.cash_forecasts.len(),
9292            snapshot.cash_pools.len(),
9293            snapshot.bank_guarantees.len(),
9294            snapshot.netting_runs.len(),
9295            snapshot.journal_entries.len(),
9296        );
9297        self.check_resources_with_log("post-treasury")?;
9298
9299        Ok(snapshot)
9300    }
9301
9302    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9303    fn phase_project_accounting(
9304        &mut self,
9305        document_flows: &DocumentFlowSnapshot,
9306        hr: &HrSnapshot,
9307        stats: &mut EnhancedGenerationStatistics,
9308    ) -> SynthResult<ProjectAccountingSnapshot> {
9309        if !self.phase_config.generate_project_accounting {
9310            debug!("Phase 23: Skipped (project accounting disabled)");
9311            return Ok(ProjectAccountingSnapshot::default());
9312        }
9313        let degradation = self.check_resources()?;
9314        if degradation >= DegradationLevel::Reduced {
9315            debug!(
9316                "Phase skipped due to resource pressure (degradation: {:?})",
9317                degradation
9318            );
9319            return Ok(ProjectAccountingSnapshot::default());
9320        }
9321        info!("Phase 23: Generating Project Accounting Data");
9322
9323        let seed = self.seed;
9324        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9325            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9326        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9327        let company_code = self
9328            .config
9329            .companies
9330            .first()
9331            .map(|c| c.code.as_str())
9332            .unwrap_or("1000");
9333
9334        let mut snapshot = ProjectAccountingSnapshot::default();
9335
9336        // Generate projects with WBS hierarchies
9337        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9338            self.config.project_accounting.clone(),
9339            seed + 95,
9340        );
9341        let pool = project_gen.generate(company_code, start_date, end_date);
9342        snapshot.projects = pool.projects.clone();
9343
9344        // Link source documents to projects for cost allocation
9345        {
9346            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9347                Vec::new();
9348
9349            // Time entries
9350            for te in &hr.time_entries {
9351                let total_hours = te.hours_regular + te.hours_overtime;
9352                if total_hours > 0.0 {
9353                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9354                        id: te.entry_id.clone(),
9355                        entity_id: company_code.to_string(),
9356                        date: te.date,
9357                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9358                            .unwrap_or(rust_decimal::Decimal::ZERO),
9359                        source_type: CostSourceType::TimeEntry,
9360                        hours: Some(
9361                            rust_decimal::Decimal::from_f64_retain(total_hours)
9362                                .unwrap_or(rust_decimal::Decimal::ZERO),
9363                        ),
9364                    });
9365                }
9366            }
9367
9368            // Expense reports
9369            for er in &hr.expense_reports {
9370                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9371                    id: er.report_id.clone(),
9372                    entity_id: company_code.to_string(),
9373                    date: er.submission_date,
9374                    amount: er.total_amount,
9375                    source_type: CostSourceType::ExpenseReport,
9376                    hours: None,
9377                });
9378            }
9379
9380            // Purchase orders
9381            for po in &document_flows.purchase_orders {
9382                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9383                    id: po.header.document_id.clone(),
9384                    entity_id: company_code.to_string(),
9385                    date: po.header.document_date,
9386                    amount: po.total_net_amount,
9387                    source_type: CostSourceType::PurchaseOrder,
9388                    hours: None,
9389                });
9390            }
9391
9392            // Vendor invoices
9393            for vi in &document_flows.vendor_invoices {
9394                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9395                    id: vi.header.document_id.clone(),
9396                    entity_id: company_code.to_string(),
9397                    date: vi.header.document_date,
9398                    amount: vi.payable_amount,
9399                    source_type: CostSourceType::VendorInvoice,
9400                    hours: None,
9401                });
9402            }
9403
9404            if !source_docs.is_empty() && !pool.projects.is_empty() {
9405                let mut cost_gen =
9406                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9407                        self.config.project_accounting.cost_allocation.clone(),
9408                        seed + 99,
9409                    );
9410                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9411            }
9412        }
9413
9414        // Generate change orders
9415        if self.config.project_accounting.change_orders.enabled {
9416            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9417                self.config.project_accounting.change_orders.clone(),
9418                seed + 96,
9419            );
9420            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9421        }
9422
9423        // Generate milestones
9424        if self.config.project_accounting.milestones.enabled {
9425            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9426                self.config.project_accounting.milestones.clone(),
9427                seed + 97,
9428            );
9429            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9430        }
9431
9432        // Generate earned value metrics (needs cost lines, so only if we have projects)
9433        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9434            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9435                self.config.project_accounting.earned_value.clone(),
9436                seed + 98,
9437            );
9438            snapshot.earned_value_metrics =
9439                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9440        }
9441
9442        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9443        if self.config.project_accounting.revenue_recognition.enabled
9444            && !snapshot.projects.is_empty()
9445            && !snapshot.cost_lines.is_empty()
9446        {
9447            use datasynth_generators::project_accounting::RevenueGenerator;
9448            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9449            let avg_contract_value =
9450                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9451                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9452
9453            // Build contract value tuples: only customer-type projects get revenue recognition.
9454            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9455            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9456                snapshot
9457                    .projects
9458                    .iter()
9459                    .filter(|p| {
9460                        matches!(
9461                            p.project_type,
9462                            datasynth_core::models::ProjectType::Customer
9463                        )
9464                    })
9465                    .map(|p| {
9466                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9467                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9468                        // budget × 1.25 → contract value
9469                        } else {
9470                            avg_contract_value
9471                        };
9472                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9473                        (p.project_id.clone(), cv, etc)
9474                    })
9475                    .collect();
9476
9477            if !contract_values.is_empty() {
9478                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9479                snapshot.revenue_records = rev_gen.generate(
9480                    &snapshot.projects,
9481                    &snapshot.cost_lines,
9482                    &contract_values,
9483                    start_date,
9484                    end_date,
9485                );
9486                debug!(
9487                    "Generated {} revenue recognition records for {} customer projects",
9488                    snapshot.revenue_records.len(),
9489                    contract_values.len()
9490                );
9491            }
9492        }
9493
9494        stats.project_count = snapshot.projects.len();
9495        stats.project_change_order_count = snapshot.change_orders.len();
9496        stats.project_cost_line_count = snapshot.cost_lines.len();
9497
9498        info!(
9499            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9500            snapshot.projects.len(),
9501            snapshot.change_orders.len(),
9502            snapshot.milestones.len(),
9503            snapshot.earned_value_metrics.len()
9504        );
9505        self.check_resources_with_log("post-project-accounting")?;
9506
9507        Ok(snapshot)
9508    }
9509
9510    /// Phase 24: Generate process evolution and organizational events.
9511    fn phase_evolution_events(
9512        &mut self,
9513        stats: &mut EnhancedGenerationStatistics,
9514    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9515        if !self.phase_config.generate_evolution_events {
9516            debug!("Phase 24: Skipped (evolution events disabled)");
9517            return Ok((Vec::new(), Vec::new()));
9518        }
9519        info!("Phase 24: Generating Process Evolution + Organizational Events");
9520
9521        let seed = self.seed;
9522        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9523            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9524        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9525
9526        // Process evolution events
9527        let mut proc_gen =
9528            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9529                seed + 100,
9530            );
9531        let process_events = proc_gen.generate_events(start_date, end_date);
9532
9533        // Organizational events
9534        let company_codes: Vec<String> = self
9535            .config
9536            .companies
9537            .iter()
9538            .map(|c| c.code.clone())
9539            .collect();
9540        let mut org_gen =
9541            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9542                seed + 101,
9543            );
9544        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9545
9546        stats.process_evolution_event_count = process_events.len();
9547        stats.organizational_event_count = org_events.len();
9548
9549        info!(
9550            "Evolution events generated: {} process evolution, {} organizational",
9551            process_events.len(),
9552            org_events.len()
9553        );
9554        self.check_resources_with_log("post-evolution-events")?;
9555
9556        Ok((process_events, org_events))
9557    }
9558
9559    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9560    /// data recovery, and regulatory changes).
9561    fn phase_disruption_events(
9562        &self,
9563        stats: &mut EnhancedGenerationStatistics,
9564    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9565        if !self.config.organizational_events.enabled {
9566            debug!("Phase 24b: Skipped (organizational events disabled)");
9567            return Ok(Vec::new());
9568        }
9569        info!("Phase 24b: Generating Disruption Events");
9570
9571        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9572            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9573        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9574
9575        let company_codes: Vec<String> = self
9576            .config
9577            .companies
9578            .iter()
9579            .map(|c| c.code.clone())
9580            .collect();
9581
9582        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9583        let events = gen.generate(start_date, end_date, &company_codes);
9584
9585        stats.disruption_event_count = events.len();
9586        info!("Disruption events generated: {} events", events.len());
9587        self.check_resources_with_log("post-disruption-events")?;
9588
9589        Ok(events)
9590    }
9591
9592    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9593    ///
9594    /// Produces paired examples where each pair contains the original clean JE
9595    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9596    /// split transaction). Useful for training anomaly detection models with
9597    /// known ground truth.
9598    fn phase_counterfactuals(
9599        &self,
9600        journal_entries: &[JournalEntry],
9601        stats: &mut EnhancedGenerationStatistics,
9602    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9603        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9604            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9605            return Ok(Vec::new());
9606        }
9607        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9608
9609        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9610
9611        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9612
9613        // Rotating set of specs to produce diverse mutation types
9614        let specs = [
9615            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9616            CounterfactualSpec::ShiftDate { days: -14 },
9617            CounterfactualSpec::SelfApprove,
9618            CounterfactualSpec::SplitTransaction { split_count: 3 },
9619        ];
9620
9621        let pairs: Vec<_> = journal_entries
9622            .iter()
9623            .enumerate()
9624            .map(|(i, je)| {
9625                let spec = &specs[i % specs.len()];
9626                gen.generate(je, spec)
9627            })
9628            .collect();
9629
9630        stats.counterfactual_pair_count = pairs.len();
9631        info!(
9632            "Counterfactual pairs generated: {} pairs from {} journal entries",
9633            pairs.len(),
9634            journal_entries.len()
9635        );
9636        self.check_resources_with_log("post-counterfactuals")?;
9637
9638        Ok(pairs)
9639    }
9640
9641    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9642    ///
9643    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9644    /// fraudulent, then generates probabilistic red flags on all chain documents.
9645    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9646    /// to produce realistic ML training data.
9647    fn phase_red_flags(
9648        &self,
9649        anomaly_labels: &AnomalyLabels,
9650        document_flows: &DocumentFlowSnapshot,
9651        stats: &mut EnhancedGenerationStatistics,
9652    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9653        if !self.config.fraud.enabled {
9654            debug!("Phase 26: Skipped (fraud generation disabled)");
9655            return Ok(Vec::new());
9656        }
9657        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9658
9659        use datasynth_generators::fraud::RedFlagGenerator;
9660
9661        let generator = RedFlagGenerator::new();
9662        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9663
9664        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9665        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9666            .labels
9667            .iter()
9668            .filter(|label| label.anomaly_type.is_intentional())
9669            .map(|label| label.document_id.as_str())
9670            .collect();
9671
9672        let mut flags = Vec::new();
9673
9674        // Iterate P2P chains: use the purchase order document ID as the chain key.
9675        for chain in &document_flows.p2p_chains {
9676            let doc_id = &chain.purchase_order.header.document_id;
9677            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9678            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9679        }
9680
9681        // Iterate O2C chains: use the sales order document ID as the chain key.
9682        for chain in &document_flows.o2c_chains {
9683            let doc_id = &chain.sales_order.header.document_id;
9684            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9685            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9686        }
9687
9688        stats.red_flag_count = flags.len();
9689        info!(
9690            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9691            flags.len(),
9692            document_flows.p2p_chains.len(),
9693            document_flows.o2c_chains.len(),
9694            fraud_doc_ids.len()
9695        );
9696        self.check_resources_with_log("post-red-flags")?;
9697
9698        Ok(flags)
9699    }
9700
9701    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9702    ///
9703    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9704    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9705    /// advance them over the simulation period.
9706    fn phase_collusion_rings(
9707        &mut self,
9708        stats: &mut EnhancedGenerationStatistics,
9709    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9710        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9711            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9712            return Ok(Vec::new());
9713        }
9714        info!("Phase 26b: Generating Collusion Rings");
9715
9716        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9717            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9718        let months = self.config.global.period_months;
9719
9720        let employee_ids: Vec<String> = self
9721            .master_data
9722            .employees
9723            .iter()
9724            .map(|e| e.employee_id.clone())
9725            .collect();
9726        let vendor_ids: Vec<String> = self
9727            .master_data
9728            .vendors
9729            .iter()
9730            .map(|v| v.vendor_id.clone())
9731            .collect();
9732
9733        let mut generator =
9734            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9735        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9736
9737        stats.collusion_ring_count = rings.len();
9738        info!(
9739            "Collusion rings generated: {} rings, total members: {}",
9740            rings.len(),
9741            rings
9742                .iter()
9743                .map(datasynth_generators::fraud::CollusionRing::size)
9744                .sum::<usize>()
9745        );
9746        self.check_resources_with_log("post-collusion-rings")?;
9747
9748        Ok(rings)
9749    }
9750
9751    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9752    ///
9753    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9754    /// master data changes over time, supporting bi-temporal audit queries.
9755    fn phase_temporal_attributes(
9756        &mut self,
9757        stats: &mut EnhancedGenerationStatistics,
9758    ) -> SynthResult<
9759        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9760    > {
9761        if !self.config.temporal_attributes.enabled {
9762            debug!("Phase 27: Skipped (temporal attributes disabled)");
9763            return Ok(Vec::new());
9764        }
9765        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9766
9767        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9768            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9769
9770        // Build a TemporalAttributeConfig from the user's config.
9771        // Since Phase 27 is already gated on temporal_attributes.enabled,
9772        // default to enabling version chains so users get actual mutations.
9773        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9774            || self.config.temporal_attributes.enabled;
9775        let temporal_config = {
9776            let ta = &self.config.temporal_attributes;
9777            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9778                .enabled(ta.enabled)
9779                .closed_probability(ta.valid_time.closed_probability)
9780                .avg_validity_days(ta.valid_time.avg_validity_days)
9781                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9782                .with_version_chains(if generate_version_chains {
9783                    ta.avg_versions_per_entity
9784                } else {
9785                    1.0
9786                })
9787                .build()
9788        };
9789        // Apply backdating settings if configured
9790        let temporal_config = if self
9791            .config
9792            .temporal_attributes
9793            .transaction_time
9794            .allow_backdating
9795        {
9796            let mut c = temporal_config;
9797            c.transaction_time.allow_backdating = true;
9798            c.transaction_time.backdating_probability = self
9799                .config
9800                .temporal_attributes
9801                .transaction_time
9802                .backdating_probability;
9803            c.transaction_time.max_backdate_days = self
9804                .config
9805                .temporal_attributes
9806                .transaction_time
9807                .max_backdate_days;
9808            c
9809        } else {
9810            temporal_config
9811        };
9812        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9813            temporal_config,
9814            self.seed + 130,
9815            start_date,
9816        );
9817
9818        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9819            self.seed + 130,
9820            datasynth_core::GeneratorType::Vendor,
9821        );
9822
9823        let chains: Vec<_> = self
9824            .master_data
9825            .vendors
9826            .iter()
9827            .map(|vendor| {
9828                let id = uuid_factory.next();
9829                gen.generate_version_chain(vendor.clone(), id)
9830            })
9831            .collect();
9832
9833        stats.temporal_version_chain_count = chains.len();
9834        info!("Temporal version chains generated: {} chains", chains.len());
9835        self.check_resources_with_log("post-temporal-attributes")?;
9836
9837        Ok(chains)
9838    }
9839
9840    /// Phase 28: Build entity relationship graph and cross-process links.
9841    ///
9842    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9843    /// `EntityGraph` from master-data vendor/customer entities and
9844    /// journal-entry-derived transaction summaries.
9845    ///
9846    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9847    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9848    /// generates inventory-movement cross-process links.
9849    fn phase_entity_relationships(
9850        &self,
9851        journal_entries: &[JournalEntry],
9852        document_flows: &DocumentFlowSnapshot,
9853        stats: &mut EnhancedGenerationStatistics,
9854    ) -> SynthResult<(
9855        Option<datasynth_core::models::EntityGraph>,
9856        Vec<datasynth_core::models::CrossProcessLink>,
9857    )> {
9858        use datasynth_generators::relationships::{
9859            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9860            TransactionSummary,
9861        };
9862
9863        let rs_enabled = self.config.relationship_strength.enabled;
9864        let cpl_enabled = self.config.cross_process_links.enabled
9865            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9866
9867        if !rs_enabled && !cpl_enabled {
9868            debug!(
9869                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9870            );
9871            return Ok((None, Vec::new()));
9872        }
9873
9874        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9875
9876        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9877            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9878
9879        let company_code = self
9880            .config
9881            .companies
9882            .first()
9883            .map(|c| c.code.as_str())
9884            .unwrap_or("1000");
9885
9886        // Build the generator with matching config flags
9887        let gen_config = EntityGraphConfig {
9888            enabled: rs_enabled,
9889            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9890                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9891                enable_return_flows: false,
9892                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9893                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9894                // Use higher link rate for small datasets to avoid probabilistic empty results
9895                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9896                    1.0
9897                } else {
9898                    0.30
9899                },
9900                ..Default::default()
9901            },
9902            strength_config: datasynth_generators::relationships::StrengthConfig {
9903                transaction_volume_weight: self
9904                    .config
9905                    .relationship_strength
9906                    .calculation
9907                    .transaction_volume_weight,
9908                transaction_count_weight: self
9909                    .config
9910                    .relationship_strength
9911                    .calculation
9912                    .transaction_count_weight,
9913                duration_weight: self
9914                    .config
9915                    .relationship_strength
9916                    .calculation
9917                    .relationship_duration_weight,
9918                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9919                mutual_connections_weight: self
9920                    .config
9921                    .relationship_strength
9922                    .calculation
9923                    .mutual_connections_weight,
9924                recency_half_life_days: self
9925                    .config
9926                    .relationship_strength
9927                    .calculation
9928                    .recency_half_life_days,
9929            },
9930            ..Default::default()
9931        };
9932
9933        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9934
9935        // --- Part 1: Entity Relationship Graph ---
9936        let entity_graph = if rs_enabled {
9937            // Build EntitySummary lists from master data
9938            let vendor_summaries: Vec<EntitySummary> = self
9939                .master_data
9940                .vendors
9941                .iter()
9942                .map(|v| {
9943                    EntitySummary::new(
9944                        &v.vendor_id,
9945                        &v.name,
9946                        datasynth_core::models::GraphEntityType::Vendor,
9947                        start_date,
9948                    )
9949                })
9950                .collect();
9951
9952            let customer_summaries: Vec<EntitySummary> = self
9953                .master_data
9954                .customers
9955                .iter()
9956                .map(|c| {
9957                    EntitySummary::new(
9958                        &c.customer_id,
9959                        &c.name,
9960                        datasynth_core::models::GraphEntityType::Customer,
9961                        start_date,
9962                    )
9963                })
9964                .collect();
9965
9966            // Build transaction summaries from journal entries.
9967            // Key = (company_code, trading_partner) for entries that have a
9968            // trading partner.  This captures intercompany flows and any JE
9969            // whose line items carry a trading_partner reference.
9970            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9971                std::collections::HashMap::new();
9972
9973            for je in journal_entries {
9974                let cc = je.header.company_code.clone();
9975                let posting_date = je.header.posting_date;
9976                for line in &je.lines {
9977                    if let Some(ref tp) = line.trading_partner {
9978                        let amount = if line.debit_amount > line.credit_amount {
9979                            line.debit_amount
9980                        } else {
9981                            line.credit_amount
9982                        };
9983                        let entry = txn_summaries
9984                            .entry((cc.clone(), tp.clone()))
9985                            .or_insert_with(|| TransactionSummary {
9986                                total_volume: rust_decimal::Decimal::ZERO,
9987                                transaction_count: 0,
9988                                first_transaction_date: posting_date,
9989                                last_transaction_date: posting_date,
9990                                related_entities: std::collections::HashSet::new(),
9991                            });
9992                        entry.total_volume += amount;
9993                        entry.transaction_count += 1;
9994                        if posting_date < entry.first_transaction_date {
9995                            entry.first_transaction_date = posting_date;
9996                        }
9997                        if posting_date > entry.last_transaction_date {
9998                            entry.last_transaction_date = posting_date;
9999                        }
10000                        entry.related_entities.insert(cc.clone());
10001                    }
10002                }
10003            }
10004
10005            // Also extract transaction relationships from document flow chains.
10006            // P2P chains: Company → Vendor relationships
10007            for chain in &document_flows.p2p_chains {
10008                let cc = chain.purchase_order.header.company_code.clone();
10009                let vendor_id = chain.purchase_order.vendor_id.clone();
10010                let po_date = chain.purchase_order.header.document_date;
10011                let amount = chain.purchase_order.total_net_amount;
10012
10013                let entry = txn_summaries
10014                    .entry((cc.clone(), vendor_id))
10015                    .or_insert_with(|| TransactionSummary {
10016                        total_volume: rust_decimal::Decimal::ZERO,
10017                        transaction_count: 0,
10018                        first_transaction_date: po_date,
10019                        last_transaction_date: po_date,
10020                        related_entities: std::collections::HashSet::new(),
10021                    });
10022                entry.total_volume += amount;
10023                entry.transaction_count += 1;
10024                if po_date < entry.first_transaction_date {
10025                    entry.first_transaction_date = po_date;
10026                }
10027                if po_date > entry.last_transaction_date {
10028                    entry.last_transaction_date = po_date;
10029                }
10030                entry.related_entities.insert(cc);
10031            }
10032
10033            // O2C chains: Company → Customer relationships
10034            for chain in &document_flows.o2c_chains {
10035                let cc = chain.sales_order.header.company_code.clone();
10036                let customer_id = chain.sales_order.customer_id.clone();
10037                let so_date = chain.sales_order.header.document_date;
10038                let amount = chain.sales_order.total_net_amount;
10039
10040                let entry = txn_summaries
10041                    .entry((cc.clone(), customer_id))
10042                    .or_insert_with(|| TransactionSummary {
10043                        total_volume: rust_decimal::Decimal::ZERO,
10044                        transaction_count: 0,
10045                        first_transaction_date: so_date,
10046                        last_transaction_date: so_date,
10047                        related_entities: std::collections::HashSet::new(),
10048                    });
10049                entry.total_volume += amount;
10050                entry.transaction_count += 1;
10051                if so_date < entry.first_transaction_date {
10052                    entry.first_transaction_date = so_date;
10053                }
10054                if so_date > entry.last_transaction_date {
10055                    entry.last_transaction_date = so_date;
10056                }
10057                entry.related_entities.insert(cc);
10058            }
10059
10060            let as_of_date = journal_entries
10061                .last()
10062                .map(|je| je.header.posting_date)
10063                .unwrap_or(start_date);
10064
10065            let graph = gen.generate_entity_graph(
10066                company_code,
10067                as_of_date,
10068                &vendor_summaries,
10069                &customer_summaries,
10070                &txn_summaries,
10071            );
10072
10073            info!(
10074                "Entity relationship graph: {} nodes, {} edges",
10075                graph.nodes.len(),
10076                graph.edges.len()
10077            );
10078            stats.entity_relationship_node_count = graph.nodes.len();
10079            stats.entity_relationship_edge_count = graph.edges.len();
10080            Some(graph)
10081        } else {
10082            None
10083        };
10084
10085        // --- Part 2: Cross-Process Links ---
10086        let cross_process_links = if cpl_enabled {
10087            // Build GoodsReceiptRef from P2P chains
10088            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10089                .p2p_chains
10090                .iter()
10091                .flat_map(|chain| {
10092                    let vendor_id = chain.purchase_order.vendor_id.clone();
10093                    let cc = chain.purchase_order.header.company_code.clone();
10094                    chain.goods_receipts.iter().flat_map(move |gr| {
10095                        gr.items.iter().filter_map({
10096                            let doc_id = gr.header.document_id.clone();
10097                            let v_id = vendor_id.clone();
10098                            let company = cc.clone();
10099                            let receipt_date = gr.header.document_date;
10100                            move |item| {
10101                                item.base
10102                                    .material_id
10103                                    .as_ref()
10104                                    .map(|mat_id| GoodsReceiptRef {
10105                                        document_id: doc_id.clone(),
10106                                        material_id: mat_id.clone(),
10107                                        quantity: item.base.quantity,
10108                                        receipt_date,
10109                                        vendor_id: v_id.clone(),
10110                                        company_code: company.clone(),
10111                                    })
10112                            }
10113                        })
10114                    })
10115                })
10116                .collect();
10117
10118            // Build DeliveryRef from O2C chains
10119            let del_refs: Vec<DeliveryRef> = document_flows
10120                .o2c_chains
10121                .iter()
10122                .flat_map(|chain| {
10123                    let customer_id = chain.sales_order.customer_id.clone();
10124                    let cc = chain.sales_order.header.company_code.clone();
10125                    chain.deliveries.iter().flat_map(move |del| {
10126                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10127                        del.items.iter().filter_map({
10128                            let doc_id = del.header.document_id.clone();
10129                            let c_id = customer_id.clone();
10130                            let company = cc.clone();
10131                            move |item| {
10132                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10133                                    document_id: doc_id.clone(),
10134                                    material_id: mat_id.clone(),
10135                                    quantity: item.base.quantity,
10136                                    delivery_date,
10137                                    customer_id: c_id.clone(),
10138                                    company_code: company.clone(),
10139                                })
10140                            }
10141                        })
10142                    })
10143                })
10144                .collect();
10145
10146            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10147            info!("Cross-process links generated: {} links", links.len());
10148            stats.cross_process_link_count = links.len();
10149            links
10150        } else {
10151            Vec::new()
10152        };
10153
10154        self.check_resources_with_log("post-entity-relationships")?;
10155        Ok((entity_graph, cross_process_links))
10156    }
10157
10158    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10159    fn phase_industry_data(
10160        &self,
10161        stats: &mut EnhancedGenerationStatistics,
10162    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10163        if !self.config.industry_specific.enabled {
10164            return None;
10165        }
10166        info!("Phase 29: Generating industry-specific data");
10167        let output = datasynth_generators::industry::factory::generate_industry_output(
10168            self.config.global.industry,
10169        );
10170        stats.industry_gl_account_count = output.gl_accounts.len();
10171        info!(
10172            "Industry data generated: {} GL accounts for {:?}",
10173            output.gl_accounts.len(),
10174            self.config.global.industry
10175        );
10176        Some(output)
10177    }
10178
10179    /// Phase 3b: Generate opening balances for each company.
10180    fn phase_opening_balances(
10181        &mut self,
10182        coa: &Arc<ChartOfAccounts>,
10183        stats: &mut EnhancedGenerationStatistics,
10184    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10185        if !self.config.balance.generate_opening_balances {
10186            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10187            return Ok(Vec::new());
10188        }
10189        info!("Phase 3b: Generating Opening Balances");
10190
10191        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10192            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10193        let fiscal_year = start_date.year();
10194
10195        // **v5.3** — When the shard context supplies prior-period
10196        // opening-balance carryovers, use them directly instead of
10197        // calling `OpeningBalanceGenerator`.  This implements multi-
10198        // period continuity: period N+1 opens with period N's closing
10199        // BS positions exactly, rather than re-rolling the industry-
10200        // mix generator and losing the audit trail.
10201        //
10202        // Empty `opening_balances` (the v5.0–v5.2 default) falls
10203        // through to the generator path — byte-identical behaviour
10204        // for single-period engagements.
10205        if let Some(ctx) = &self.shard_context {
10206            if !ctx.opening_balances.is_empty() {
10207                debug!(
10208                    "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10209                    ctx.opening_balances.len()
10210                );
10211                let mut results = Vec::new();
10212                for company in &self.config.companies {
10213                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10214                        .opening_balances
10215                        .iter()
10216                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10217                        .collect();
10218                    let total_assets = ctx
10219                        .opening_balances
10220                        .iter()
10221                        .filter(|ob| {
10222                            matches!(
10223                                ob.account_type,
10224                                AccountType::Asset | AccountType::ContraAsset
10225                            )
10226                        })
10227                        .map(|ob| ob.net_balance())
10228                        .sum::<rust_decimal::Decimal>();
10229                    let total_liabilities = ctx
10230                        .opening_balances
10231                        .iter()
10232                        .filter(|ob| {
10233                            matches!(
10234                                ob.account_type,
10235                                AccountType::Liability | AccountType::ContraLiability
10236                            )
10237                        })
10238                        .map(|ob| ob.net_balance())
10239                        .sum::<rust_decimal::Decimal>();
10240                    let total_equity = ctx
10241                        .opening_balances
10242                        .iter()
10243                        .filter(|ob| {
10244                            matches!(
10245                                ob.account_type,
10246                                AccountType::Equity | AccountType::ContraEquity
10247                            )
10248                        })
10249                        .map(|ob| ob.net_balance())
10250                        .sum::<rust_decimal::Decimal>();
10251                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10252                        < rust_decimal::Decimal::ONE;
10253                    results.push(GeneratedOpeningBalance {
10254                        company_code: company.code.clone(),
10255                        as_of_date: start_date,
10256                        balances,
10257                        total_assets,
10258                        total_liabilities,
10259                        total_equity,
10260                        is_balanced,
10261                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10262                            current_ratio: None,
10263                            quick_ratio: None,
10264                            debt_to_equity: None,
10265                            working_capital: rust_decimal::Decimal::ZERO,
10266                        },
10267                    });
10268                }
10269                stats.opening_balance_count = results.len();
10270                info!(
10271                    "Phase 3b: opening-balance carryover applied ({} companies)",
10272                    results.len()
10273                );
10274                self.check_resources_with_log("post-opening-balances")?;
10275                return Ok(results);
10276            }
10277        }
10278
10279        let industry = match self.config.global.industry {
10280            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10281            IndustrySector::Retail => IndustryType::Retail,
10282            IndustrySector::FinancialServices => IndustryType::Financial,
10283            IndustrySector::Healthcare => IndustryType::Healthcare,
10284            IndustrySector::Technology => IndustryType::Technology,
10285            _ => IndustryType::Manufacturing,
10286        };
10287
10288        let config = datasynth_generators::OpeningBalanceConfig {
10289            industry,
10290            ..Default::default()
10291        };
10292        let mut gen =
10293            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10294
10295        let mut results = Vec::new();
10296        for company in &self.config.companies {
10297            let spec = OpeningBalanceSpec::new(
10298                company.code.clone(),
10299                start_date,
10300                fiscal_year,
10301                company.currency.clone(),
10302                rust_decimal::Decimal::new(10_000_000, 0),
10303                industry,
10304            );
10305            let ob = gen.generate(&spec, coa, start_date, &company.code);
10306            results.push(ob);
10307        }
10308
10309        stats.opening_balance_count = results.len();
10310        info!("Opening balances generated: {} companies", results.len());
10311        self.check_resources_with_log("post-opening-balances")?;
10312
10313        Ok(results)
10314    }
10315
10316    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10317    fn phase_subledger_reconciliation(
10318        &mut self,
10319        subledger: &SubledgerSnapshot,
10320        entries: &[JournalEntry],
10321        stats: &mut EnhancedGenerationStatistics,
10322    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10323        if !self.config.balance.reconcile_subledgers {
10324            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10325            return Ok(Vec::new());
10326        }
10327        info!("Phase 9b: Reconciling GL to subledger balances");
10328
10329        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10330            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10331            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10332
10333        // Build GL balance map from journal entries using a balance tracker
10334        let tracker_config = BalanceTrackerConfig {
10335            validate_on_each_entry: false,
10336            track_history: false,
10337            fail_on_validation_error: false,
10338            ..Default::default()
10339        };
10340        let recon_currency = self
10341            .config
10342            .companies
10343            .first()
10344            .map(|c| c.currency.clone())
10345            .unwrap_or_else(|| "USD".to_string());
10346        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10347        let validation_errors = tracker.apply_entries(entries);
10348        if !validation_errors.is_empty() {
10349            warn!(
10350                error_count = validation_errors.len(),
10351                "Balance tracker encountered validation errors during subledger reconciliation"
10352            );
10353            for err in &validation_errors {
10354                debug!("Balance validation error: {:?}", err);
10355            }
10356        }
10357
10358        let mut engine = datasynth_generators::ReconciliationEngine::new(
10359            datasynth_generators::ReconciliationConfig::default(),
10360        );
10361
10362        let mut results = Vec::new();
10363        let company_code = self
10364            .config
10365            .companies
10366            .first()
10367            .map(|c| c.code.as_str())
10368            .unwrap_or("1000");
10369
10370        // Reconcile AR
10371        if !subledger.ar_invoices.is_empty() {
10372            let gl_balance = tracker
10373                .get_account_balance(
10374                    company_code,
10375                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10376                )
10377                .map(|b| b.closing_balance)
10378                .unwrap_or_default();
10379            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10380            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10381        }
10382
10383        // Reconcile AP
10384        if !subledger.ap_invoices.is_empty() {
10385            let gl_balance = tracker
10386                .get_account_balance(
10387                    company_code,
10388                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10389                )
10390                .map(|b| b.closing_balance)
10391                .unwrap_or_default();
10392            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10393            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10394        }
10395
10396        // Reconcile FA
10397        if !subledger.fa_records.is_empty() {
10398            let gl_asset_balance = tracker
10399                .get_account_balance(
10400                    company_code,
10401                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10402                )
10403                .map(|b| b.closing_balance)
10404                .unwrap_or_default();
10405            let gl_accum_depr_balance = tracker
10406                .get_account_balance(
10407                    company_code,
10408                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10409                )
10410                .map(|b| b.closing_balance)
10411                .unwrap_or_default();
10412            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10413                subledger.fa_records.iter().collect();
10414            let (asset_recon, depr_recon) = engine.reconcile_fa(
10415                company_code,
10416                end_date,
10417                gl_asset_balance,
10418                gl_accum_depr_balance,
10419                &fa_refs,
10420            );
10421            results.push(asset_recon);
10422            results.push(depr_recon);
10423        }
10424
10425        // Reconcile Inventory
10426        if !subledger.inventory_positions.is_empty() {
10427            let gl_balance = tracker
10428                .get_account_balance(
10429                    company_code,
10430                    datasynth_core::accounts::control_accounts::INVENTORY,
10431                )
10432                .map(|b| b.closing_balance)
10433                .unwrap_or_default();
10434            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10435                subledger.inventory_positions.iter().collect();
10436            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10437        }
10438
10439        stats.subledger_reconciliation_count = results.len();
10440        let passed = results.iter().filter(|r| r.is_balanced()).count();
10441        let failed = results.len() - passed;
10442        info!(
10443            "Subledger reconciliation: {} checks, {} passed, {} failed",
10444            results.len(),
10445            passed,
10446            failed
10447        );
10448        self.check_resources_with_log("post-subledger-reconciliation")?;
10449
10450        Ok(results)
10451    }
10452
10453    /// Generate the chart of accounts.
10454    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10455        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10456
10457        let coa_framework = self.resolve_coa_framework();
10458
10459        let mut gen = ChartOfAccountsGenerator::new(
10460            self.config.chart_of_accounts.complexity,
10461            self.config.global.industry,
10462            self.seed,
10463        )
10464        .with_coa_framework(coa_framework)
10465        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10466        .with_expand_industry_subaccounts(
10467            self.config.chart_of_accounts.expand_industry_subaccounts,
10468        );
10469
10470        let mut built = gen.generate();
10471        // v4.4.1: propagate the accounting framework label from config
10472        // onto the CoA struct so SDK consumers can read it without
10473        // cross-referencing the config (they previously saw null).
10474        if self.config.accounting_standards.enabled {
10475            use datasynth_config::schema::AccountingFrameworkConfig;
10476            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10477                match f {
10478                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10479                    AccountingFrameworkConfig::Ifrs => "ifrs",
10480                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10481                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10482                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10483                }
10484                .to_string()
10485            });
10486        }
10487        let coa = Arc::new(built);
10488        self.coa = Some(Arc::clone(&coa));
10489
10490        if let Some(pb) = pb {
10491            pb.finish_with_message("Chart of Accounts complete");
10492        }
10493
10494        Ok(coa)
10495    }
10496
10497    /// Generate master data entities.
10498    fn generate_master_data(&mut self) -> SynthResult<()> {
10499        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10500            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10501        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10502
10503        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10504        let pb = self.create_progress_bar(total, "Generating Master Data");
10505
10506        // Resolve country pack once for all companies (uses primary company's country)
10507        let pack = self.primary_pack().clone();
10508
10509        // Capture config values needed inside the parallel closure
10510        let vendors_per_company = self.phase_config.vendors_per_company;
10511        let customers_per_company = self.phase_config.customers_per_company;
10512        let materials_per_company = self.phase_config.materials_per_company;
10513        let assets_per_company = self.phase_config.assets_per_company;
10514        let coa_framework = self.resolve_coa_framework();
10515
10516        // Generate all master data in parallel across companies.
10517        // Each company's data is independent, making this embarrassingly parallel.
10518        let per_company_results: Vec<_> = self
10519            .config
10520            .companies
10521            .par_iter()
10522            .enumerate()
10523            .map(|(i, company)| {
10524                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10525                let pack = pack.clone();
10526
10527                // Generate vendors (offset counter so IDs are globally unique across companies)
10528                let mut vendor_gen = VendorGenerator::new(company_seed);
10529                vendor_gen.set_country_pack(pack.clone());
10530                vendor_gen.set_coa_framework(coa_framework);
10531                vendor_gen.set_counter_offset(i * vendors_per_company);
10532                // v3.2.0+: user-supplied bank names (and future template
10533                // strings) flow through the shared provider.
10534                vendor_gen.set_template_provider(self.template_provider.clone());
10535                // Wire vendor network config when enabled
10536                if self.config.vendor_network.enabled {
10537                    let vn = &self.config.vendor_network;
10538                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10539                        enabled: true,
10540                        depth: vn.depth,
10541                        tier1_count: datasynth_generators::TierCountConfig::new(
10542                            vn.tier1.min,
10543                            vn.tier1.max,
10544                        ),
10545                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10546                            vn.tier2_per_parent.min,
10547                            vn.tier2_per_parent.max,
10548                        ),
10549                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10550                            vn.tier3_per_parent.min,
10551                            vn.tier3_per_parent.max,
10552                        ),
10553                        cluster_distribution: datasynth_generators::ClusterDistribution {
10554                            reliable_strategic: vn.clusters.reliable_strategic,
10555                            standard_operational: vn.clusters.standard_operational,
10556                            transactional: vn.clusters.transactional,
10557                            problematic: vn.clusters.problematic,
10558                        },
10559                        concentration_limits: datasynth_generators::ConcentrationLimits {
10560                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10561                            max_top5: vn.dependencies.top_5_concentration,
10562                        },
10563                        ..datasynth_generators::VendorNetworkConfig::default()
10564                    });
10565                }
10566                let vendor_pool =
10567                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10568
10569                // Generate customers (offset counter so IDs are globally unique across companies)
10570                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10571                customer_gen.set_country_pack(pack.clone());
10572                customer_gen.set_coa_framework(coa_framework);
10573                customer_gen.set_counter_offset(i * customers_per_company);
10574                // v3.2.0+: user-supplied customer names flow through the shared provider.
10575                customer_gen.set_template_provider(self.template_provider.clone());
10576                // Wire customer segmentation config when enabled
10577                if self.config.customer_segmentation.enabled {
10578                    let cs = &self.config.customer_segmentation;
10579                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10580                        enabled: true,
10581                        segment_distribution: datasynth_generators::SegmentDistribution {
10582                            enterprise: cs.value_segments.enterprise.customer_share,
10583                            mid_market: cs.value_segments.mid_market.customer_share,
10584                            smb: cs.value_segments.smb.customer_share,
10585                            consumer: cs.value_segments.consumer.customer_share,
10586                        },
10587                        referral_config: datasynth_generators::ReferralConfig {
10588                            enabled: cs.networks.referrals.enabled,
10589                            referral_rate: cs.networks.referrals.referral_rate,
10590                            ..Default::default()
10591                        },
10592                        hierarchy_config: datasynth_generators::HierarchyConfig {
10593                            enabled: cs.networks.corporate_hierarchies.enabled,
10594                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10595                            ..Default::default()
10596                        },
10597                        ..Default::default()
10598                    };
10599                    customer_gen.set_segmentation_config(seg_cfg);
10600                }
10601                let customer_pool = customer_gen.generate_customer_pool(
10602                    customers_per_company,
10603                    &company.code,
10604                    start_date,
10605                );
10606
10607                // Generate materials (offset counter so IDs are globally unique across companies)
10608                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10609                material_gen.set_country_pack(pack.clone());
10610                material_gen.set_counter_offset(i * materials_per_company);
10611                // v3.2.1+: user-supplied material descriptions flow through shared provider
10612                material_gen.set_template_provider(self.template_provider.clone());
10613                let material_pool = material_gen.generate_material_pool(
10614                    materials_per_company,
10615                    &company.code,
10616                    start_date,
10617                );
10618
10619                // Generate fixed assets
10620                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10621                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10622                asset_gen.set_template_provider(self.template_provider.clone());
10623                let asset_pool = asset_gen.generate_asset_pool(
10624                    assets_per_company,
10625                    &company.code,
10626                    (start_date, end_date),
10627                );
10628
10629                // Generate employees
10630                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10631                employee_gen.set_country_pack(pack);
10632                // v3.2.1+: user-supplied department names flow through shared provider
10633                employee_gen.set_template_provider(self.template_provider.clone());
10634                let employee_pool =
10635                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10636
10637                // Generate employee change history (2-5 events per employee)
10638                let employee_change_history =
10639                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10640
10641                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10642                let employee_ids: Vec<String> = employee_pool
10643                    .employees
10644                    .iter()
10645                    .map(|e| e.employee_id.clone())
10646                    .collect();
10647                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10648                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10649
10650                // v5.1: profit centre hierarchy (two-level: top-level
10651                // segment / region / product-group nodes + sub-units).
10652                let mut pc_gen =
10653                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10654                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10655
10656                (
10657                    vendor_pool.vendors,
10658                    customer_pool.customers,
10659                    material_pool.materials,
10660                    asset_pool.assets,
10661                    employee_pool.employees,
10662                    employee_change_history,
10663                    cost_centers,
10664                    profit_centers,
10665                )
10666            })
10667            .collect();
10668
10669        // Aggregate results from all companies
10670        for (
10671            vendors,
10672            customers,
10673            materials,
10674            assets,
10675            employees,
10676            change_history,
10677            cost_centers,
10678            profit_centers,
10679        ) in per_company_results
10680        {
10681            self.master_data.vendors.extend(vendors);
10682            self.master_data.customers.extend(customers);
10683            self.master_data.materials.extend(materials);
10684            self.master_data.assets.extend(assets);
10685            self.master_data.employees.extend(employees);
10686            self.master_data.cost_centers.extend(cost_centers);
10687            self.master_data.profit_centers.extend(profit_centers);
10688            self.master_data
10689                .employee_change_history
10690                .extend(change_history);
10691        }
10692
10693        // v3.3.0: one OrganizationalProfile per company. Cheap to
10694        // generate (derived from industry + company_code) so we
10695        // always emit when master data runs; no separate config flag.
10696        {
10697            use datasynth_core::models::IndustrySector;
10698            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10699            let industry = match self.config.global.industry {
10700                IndustrySector::Manufacturing => "manufacturing",
10701                IndustrySector::Retail => "retail",
10702                IndustrySector::FinancialServices => "financial_services",
10703                IndustrySector::Technology => "technology",
10704                IndustrySector::Healthcare => "healthcare",
10705                _ => "other",
10706            };
10707            for (i, company) in self.config.companies.iter().enumerate() {
10708                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10709                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10710                let profile = profile_gen.generate(&company.code, industry);
10711                self.master_data.organizational_profiles.push(profile);
10712            }
10713        }
10714
10715        if let Some(pb) = &pb {
10716            pb.inc(total);
10717        }
10718        if let Some(pb) = pb {
10719            pb.finish_with_message("Master data generation complete");
10720        }
10721
10722        Ok(())
10723    }
10724
10725    /// Generate document flows (P2P and O2C).
10726    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10727        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10728            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10729
10730        // Generate P2P chains
10731        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10732        let months = (self.config.global.period_months as usize).max(1);
10733        let p2p_count = self
10734            .phase_config
10735            .p2p_chains
10736            .min(self.master_data.vendors.len() * 2 * months);
10737        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10738
10739        // Convert P2P config from schema to generator config
10740        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10741        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10742        p2p_gen.set_country_pack(self.primary_pack().clone());
10743        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10744        // to business days. No-op when `temporal_patterns.business_days.
10745        // enabled = false`.
10746        if let Some(ctx) = &self.temporal_context {
10747            p2p_gen.set_temporal_context(Arc::clone(ctx));
10748        }
10749
10750        for i in 0..p2p_count {
10751            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10752            let materials: Vec<&Material> = self
10753                .master_data
10754                .materials
10755                .iter()
10756                .skip(i % self.master_data.materials.len().max(1))
10757                .take(2.min(self.master_data.materials.len()))
10758                .collect();
10759
10760            if materials.is_empty() {
10761                continue;
10762            }
10763
10764            let company = &self.config.companies[i % self.config.companies.len()];
10765            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10766            let fiscal_period = po_date.month() as u8;
10767            let created_by = if self.master_data.employees.is_empty() {
10768                "SYSTEM"
10769            } else {
10770                self.master_data.employees[i % self.master_data.employees.len()]
10771                    .user_id
10772                    .as_str()
10773            };
10774
10775            let chain = p2p_gen.generate_chain(
10776                &company.code,
10777                vendor,
10778                &materials,
10779                po_date,
10780                start_date.year() as u16,
10781                fiscal_period,
10782                created_by,
10783            );
10784
10785            // Flatten documents
10786            flows.purchase_orders.push(chain.purchase_order.clone());
10787            flows.goods_receipts.extend(chain.goods_receipts.clone());
10788            if let Some(vi) = &chain.vendor_invoice {
10789                flows.vendor_invoices.push(vi.clone());
10790            }
10791            if let Some(payment) = &chain.payment {
10792                flows.payments.push(payment.clone());
10793            }
10794            for remainder in &chain.remainder_payments {
10795                flows.payments.push(remainder.clone());
10796            }
10797            flows.p2p_chains.push(chain);
10798
10799            if let Some(pb) = &pb {
10800                pb.inc(1);
10801            }
10802        }
10803
10804        if let Some(pb) = pb {
10805            pb.finish_with_message("P2P document flows complete");
10806        }
10807
10808        // Generate O2C chains
10809        // Cap at ~2 SOs per customer per month to keep order volume realistic
10810        let o2c_count = self
10811            .phase_config
10812            .o2c_chains
10813            .min(self.master_data.customers.len() * 2 * months);
10814        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10815
10816        // Convert O2C config from schema to generator config
10817        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10818        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10819        o2c_gen.set_country_pack(self.primary_pack().clone());
10820        // v3.4.1: wire temporal context (no-op when business_days disabled).
10821        if let Some(ctx) = &self.temporal_context {
10822            o2c_gen.set_temporal_context(Arc::clone(ctx));
10823        }
10824
10825        for i in 0..o2c_count {
10826            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10827            let materials: Vec<&Material> = self
10828                .master_data
10829                .materials
10830                .iter()
10831                .skip(i % self.master_data.materials.len().max(1))
10832                .take(2.min(self.master_data.materials.len()))
10833                .collect();
10834
10835            if materials.is_empty() {
10836                continue;
10837            }
10838
10839            let company = &self.config.companies[i % self.config.companies.len()];
10840            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10841            let fiscal_period = so_date.month() as u8;
10842            let created_by = if self.master_data.employees.is_empty() {
10843                "SYSTEM"
10844            } else {
10845                self.master_data.employees[i % self.master_data.employees.len()]
10846                    .user_id
10847                    .as_str()
10848            };
10849
10850            let chain = o2c_gen.generate_chain(
10851                &company.code,
10852                customer,
10853                &materials,
10854                so_date,
10855                start_date.year() as u16,
10856                fiscal_period,
10857                created_by,
10858            );
10859
10860            // Flatten documents
10861            flows.sales_orders.push(chain.sales_order.clone());
10862            flows.deliveries.extend(chain.deliveries.clone());
10863            if let Some(ci) = &chain.customer_invoice {
10864                flows.customer_invoices.push(ci.clone());
10865            }
10866            if let Some(receipt) = &chain.customer_receipt {
10867                flows.payments.push(receipt.clone());
10868            }
10869            // Extract remainder receipts (follow-up to partial payments)
10870            for receipt in &chain.remainder_receipts {
10871                flows.payments.push(receipt.clone());
10872            }
10873            flows.o2c_chains.push(chain);
10874
10875            if let Some(pb) = &pb {
10876                pb.inc(1);
10877            }
10878        }
10879
10880        if let Some(pb) = pb {
10881            pb.finish_with_message("O2C document flows complete");
10882        }
10883
10884        // Collect all document cross-references from document headers.
10885        // Each document embeds references to its predecessor(s) via add_reference(); here we
10886        // denormalise them into a flat list for the document_references.json output file.
10887        {
10888            let mut refs = Vec::new();
10889            for doc in &flows.purchase_orders {
10890                refs.extend(doc.header.document_references.iter().cloned());
10891            }
10892            for doc in &flows.goods_receipts {
10893                refs.extend(doc.header.document_references.iter().cloned());
10894            }
10895            for doc in &flows.vendor_invoices {
10896                refs.extend(doc.header.document_references.iter().cloned());
10897            }
10898            for doc in &flows.sales_orders {
10899                refs.extend(doc.header.document_references.iter().cloned());
10900            }
10901            for doc in &flows.deliveries {
10902                refs.extend(doc.header.document_references.iter().cloned());
10903            }
10904            for doc in &flows.customer_invoices {
10905                refs.extend(doc.header.document_references.iter().cloned());
10906            }
10907            for doc in &flows.payments {
10908                refs.extend(doc.header.document_references.iter().cloned());
10909            }
10910            debug!(
10911                "Collected {} document cross-references from document headers",
10912                refs.len()
10913            );
10914            flows.document_references = refs;
10915        }
10916
10917        Ok(())
10918    }
10919
10920    /// Generate journal entries using parallel generation across multiple cores.
10921    fn generate_journal_entries(
10922        &mut self,
10923        coa: &Arc<ChartOfAccounts>,
10924    ) -> SynthResult<Vec<JournalEntry>> {
10925        use datasynth_core::traits::ParallelGenerator;
10926
10927        let total = self.calculate_total_transactions();
10928        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10929
10930        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10931            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10932        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10933
10934        let company_codes: Vec<String> = self
10935            .config
10936            .companies
10937            .iter()
10938            .map(|c| c.code.clone())
10939            .collect();
10940
10941        let mut generator = JournalEntryGenerator::new_with_params(
10942            self.config.transactions.clone(),
10943            Arc::clone(coa),
10944            company_codes,
10945            start_date,
10946            end_date,
10947            self.seed,
10948        );
10949        // Wire the `business_processes.*_weight` config through (phantom knob
10950        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10951        let bp = &self.config.business_processes;
10952        generator.set_business_process_weights(
10953            bp.o2c_weight,
10954            bp.p2p_weight,
10955            bp.r2r_weight,
10956            bp.h2r_weight,
10957            bp.a2r_weight,
10958        );
10959        // v3.4.0: wire advanced distributions (mixture models + industry
10960        // profiles). No-op when `distributions.enabled = false` or
10961        // `distributions.amounts.enabled = false`, preserving v3.3.2
10962        // byte-identical output on default configs.
10963        generator
10964            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10965            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10966        let generator = generator;
10967
10968        // Connect generated master data to ensure JEs reference real entities
10969        // Enable persona-based error injection for realistic human behavior
10970        // Pass fraud configuration for fraud injection
10971        let je_pack = self.primary_pack();
10972
10973        // Master-data CC / PC pools so JE.cost_center and
10974        // JE.profit_center join back to `cost_centers.id` and
10975        // `profit_centers.id` (closes the v5.9.0 linkage gap that
10976        // had `JE.cost_center = "CC1000"` while master used
10977        // `CC-1000-FIN` etc.).  Empty when no master is present —
10978        // the generator falls back to its hardcoded constants.
10979        let cc_pool: Vec<String> = self
10980            .master_data
10981            .cost_centers
10982            .iter()
10983            .map(|c| c.id.clone())
10984            .collect();
10985        let pc_pool: Vec<String> = self
10986            .master_data
10987            .profit_centers
10988            .iter()
10989            .map(|p| p.id.clone())
10990            .collect();
10991
10992        // Build a UserPool from the generated employee master so
10993        // JE.created_by lines join back to `employees.user_id`.  v5.9.0:
10994        // closes the third linkage gap (the previous behaviour had
10995        // JeGenerator generate its own UserPool internally with
10996        // ids disjoint from the employee master).
10997        let user_pool_from_employees =
10998            datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
10999
11000        let mut generator = generator
11001            .with_master_data(
11002                &self.master_data.vendors,
11003                &self.master_data.customers,
11004                &self.master_data.materials,
11005            )
11006            .with_cost_center_pool(cc_pool)
11007            .with_profit_center_pool(pc_pool)
11008            .with_country_pack_names(je_pack)
11009            .with_user_pool(user_pool_from_employees)
11010            .with_country_pack_temporal(
11011                self.config.temporal_patterns.clone(),
11012                self.seed + 200,
11013                je_pack,
11014            )
11015            .with_persona_errors(true)
11016            .with_fraud_config(self.config.fraud.clone());
11017
11018        // Apply temporal drift if configured. v3.5.2+: also merge
11019        // `distributions.regime_changes` (regime events, economic
11020        // cycles, parameter drifts) into the same DriftConfig so both
11021        // knobs flow through the shared DriftController.
11022        let temporal_enabled = self.config.temporal.enabled;
11023        let regimes_enabled = self.config.distributions.regime_changes.enabled;
11024        if temporal_enabled || regimes_enabled {
11025            let mut drift_config = if temporal_enabled {
11026                self.config.temporal.to_core_config()
11027            } else {
11028                // regime-changes only: start from default (drift OFF),
11029                // apply_to flips `enabled = true`.
11030                datasynth_core::distributions::DriftConfig::default()
11031            };
11032            if regimes_enabled {
11033                self.config
11034                    .distributions
11035                    .regime_changes
11036                    .apply_to(&mut drift_config, start_date);
11037            }
11038            generator = generator.with_drift_config(drift_config, self.seed + 100);
11039        }
11040
11041        // Check memory limit at start
11042        self.check_memory_limit()?;
11043
11044        // Determine parallelism: use available cores, but cap at total entries
11045        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11046
11047        // Use parallel generation for datasets with 10K+ entries.
11048        // Below this threshold, the statistical properties of a single-seeded
11049        // generator (e.g. Benford compliance) are better preserved.
11050        let entries = if total >= 10_000 && num_threads > 1 {
11051            // Parallel path: split the generator across cores and generate in parallel.
11052            // Each sub-generator gets a unique seed for deterministic, independent generation.
11053            let sub_generators = generator.split(num_threads);
11054            let entries_per_thread = total as usize / num_threads;
11055            let remainder = total as usize % num_threads;
11056
11057            let batches: Vec<Vec<JournalEntry>> = sub_generators
11058                .into_par_iter()
11059                .enumerate()
11060                .map(|(i, mut gen)| {
11061                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11062                    gen.generate_batch(count)
11063                })
11064                .collect();
11065
11066            // Merge all batches into a single Vec
11067            let entries = JournalEntryGenerator::merge_results(batches);
11068
11069            if let Some(pb) = &pb {
11070                pb.inc(total);
11071            }
11072            entries
11073        } else {
11074            // Sequential path for small datasets (< 1000 entries)
11075            let mut entries = Vec::with_capacity(total as usize);
11076            for _ in 0..total {
11077                let entry = generator.generate();
11078                entries.push(entry);
11079                if let Some(pb) = &pb {
11080                    pb.inc(1);
11081                }
11082            }
11083            entries
11084        };
11085
11086        if let Some(pb) = pb {
11087            pb.finish_with_message("Journal entries complete");
11088        }
11089
11090        Ok(entries)
11091    }
11092
11093    /// Generate journal entries from document flows.
11094    ///
11095    /// This creates proper GL entries for each document in the P2P and O2C flows,
11096    /// ensuring that document activity is reflected in the general ledger.
11097    fn generate_jes_from_document_flows(
11098        &mut self,
11099        flows: &DocumentFlowSnapshot,
11100    ) -> SynthResult<Vec<JournalEntry>> {
11101        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11102        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11103
11104        let je_config = match self.resolve_coa_framework() {
11105            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11106            CoAFramework::GermanSkr04 => {
11107                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11108                DocumentFlowJeConfig::from(&fa)
11109            }
11110            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11111        };
11112
11113        let populate_fec = je_config.populate_fec_fields;
11114        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11115
11116        // Master-data CC / PC pools so document-flow-derived JEs
11117        // (P2P / O2C postings) reference IDs that join back to the
11118        // cost-centers / profit-centers masters.  Same plumbing as
11119        // for `JeGenerator` above; falls back to hardcoded const
11120        // pools when masters are absent.
11121        let cc_pool: Vec<String> = self
11122            .master_data
11123            .cost_centers
11124            .iter()
11125            .map(|c| c.id.clone())
11126            .collect();
11127        let pc_pool: Vec<String> = self
11128            .master_data
11129            .profit_centers
11130            .iter()
11131            .map(|p| p.id.clone())
11132            .collect();
11133        if !cc_pool.is_empty() {
11134            generator.set_cost_center_pool(cc_pool);
11135        }
11136        if !pc_pool.is_empty() {
11137            generator.set_profit_center_pool(pc_pool);
11138        }
11139
11140        // Build auxiliary account lookup from vendor/customer master data so that
11141        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11142        // PCG "4010001") instead of raw partner IDs.
11143        if populate_fec {
11144            let mut aux_lookup = std::collections::HashMap::new();
11145            for vendor in &self.master_data.vendors {
11146                if let Some(ref aux) = vendor.auxiliary_gl_account {
11147                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11148                }
11149            }
11150            for customer in &self.master_data.customers {
11151                if let Some(ref aux) = customer.auxiliary_gl_account {
11152                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11153                }
11154            }
11155            if !aux_lookup.is_empty() {
11156                generator.set_auxiliary_account_lookup(aux_lookup);
11157            }
11158        }
11159
11160        let mut entries = Vec::new();
11161
11162        // Generate JEs from P2P chains
11163        for chain in &flows.p2p_chains {
11164            let chain_entries = generator.generate_from_p2p_chain(chain);
11165            entries.extend(chain_entries);
11166            if let Some(pb) = &pb {
11167                pb.inc(1);
11168            }
11169        }
11170
11171        // Generate JEs from O2C chains
11172        for chain in &flows.o2c_chains {
11173            let chain_entries = generator.generate_from_o2c_chain(chain);
11174            entries.extend(chain_entries);
11175            if let Some(pb) = &pb {
11176                pb.inc(1);
11177            }
11178        }
11179
11180        if let Some(pb) = pb {
11181            pb.finish_with_message(format!(
11182                "Generated {} JEs from document flows",
11183                entries.len()
11184            ));
11185        }
11186
11187        Ok(entries)
11188    }
11189
11190    /// Generate journal entries from payroll runs.
11191    ///
11192    /// Creates one JE per payroll run:
11193    /// - DR Salaries & Wages (6100) for gross pay
11194    /// - CR Payroll Clearing (9100) for gross pay
11195    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11196        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11197
11198        let mut jes = Vec::with_capacity(payroll_runs.len());
11199
11200        for run in payroll_runs {
11201            let mut je = JournalEntry::new_simple(
11202                format!("JE-PAYROLL-{}", run.payroll_id),
11203                run.company_code.clone(),
11204                run.run_date,
11205                format!("Payroll {}", run.payroll_id),
11206            );
11207
11208            // Debit Salaries & Wages for gross pay
11209            je.add_line(JournalEntryLine {
11210                line_number: 1,
11211                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11212                debit_amount: run.total_gross,
11213                reference: Some(run.payroll_id.clone()),
11214                text: Some(format!(
11215                    "Payroll {} ({} employees)",
11216                    run.payroll_id, run.employee_count
11217                )),
11218                ..Default::default()
11219            });
11220
11221            // Credit Payroll Clearing for gross pay
11222            je.add_line(JournalEntryLine {
11223                line_number: 2,
11224                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11225                credit_amount: run.total_gross,
11226                reference: Some(run.payroll_id.clone()),
11227                ..Default::default()
11228            });
11229
11230            jes.push(je);
11231        }
11232
11233        jes
11234    }
11235
11236    /// Link document flows to subledger records.
11237    ///
11238    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11239    /// ensuring subledger data is coherent with document flow data.
11240    fn link_document_flows_to_subledgers(
11241        &mut self,
11242        flows: &DocumentFlowSnapshot,
11243    ) -> SynthResult<SubledgerSnapshot> {
11244        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11245        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11246
11247        // Build vendor/customer name maps from master data for realistic subledger names
11248        let vendor_names: std::collections::HashMap<String, String> = self
11249            .master_data
11250            .vendors
11251            .iter()
11252            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11253            .collect();
11254        let customer_names: std::collections::HashMap<String, String> = self
11255            .master_data
11256            .customers
11257            .iter()
11258            .map(|c| (c.customer_id.clone(), c.name.clone()))
11259            .collect();
11260
11261        let mut linker = DocumentFlowLinker::new()
11262            .with_vendor_names(vendor_names)
11263            .with_customer_names(customer_names);
11264
11265        // Convert vendor invoices to AP invoices
11266        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11267        if let Some(pb) = &pb {
11268            pb.inc(flows.vendor_invoices.len() as u64);
11269        }
11270
11271        // Convert customer invoices to AR invoices
11272        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11273        if let Some(pb) = &pb {
11274            pb.inc(flows.customer_invoices.len() as u64);
11275        }
11276
11277        if let Some(pb) = pb {
11278            pb.finish_with_message(format!(
11279                "Linked {} AP and {} AR invoices",
11280                ap_invoices.len(),
11281                ar_invoices.len()
11282            ));
11283        }
11284
11285        Ok(SubledgerSnapshot {
11286            ap_invoices,
11287            ar_invoices,
11288            fa_records: Vec::new(),
11289            inventory_positions: Vec::new(),
11290            inventory_movements: Vec::new(),
11291            // Aging reports are computed after payment settlement in phase_document_flows.
11292            ar_aging_reports: Vec::new(),
11293            ap_aging_reports: Vec::new(),
11294            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11295            depreciation_runs: Vec::new(),
11296            inventory_valuations: Vec::new(),
11297            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11298            dunning_runs: Vec::new(),
11299            dunning_letters: Vec::new(),
11300        })
11301    }
11302
11303    /// Generate OCPM events from document flows.
11304    ///
11305    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11306    /// capturing the object-centric process perspective.
11307    #[allow(clippy::too_many_arguments)]
11308    fn generate_ocpm_events(
11309        &mut self,
11310        flows: &DocumentFlowSnapshot,
11311        sourcing: &SourcingSnapshot,
11312        hr: &HrSnapshot,
11313        manufacturing: &ManufacturingSnapshot,
11314        banking: &BankingSnapshot,
11315        audit: &AuditSnapshot,
11316        financial_reporting: &FinancialReportingSnapshot,
11317    ) -> SynthResult<OcpmSnapshot> {
11318        let total_chains = flows.p2p_chains.len()
11319            + flows.o2c_chains.len()
11320            + sourcing.sourcing_projects.len()
11321            + hr.payroll_runs.len()
11322            + manufacturing.production_orders.len()
11323            + banking.customers.len()
11324            + audit.engagements.len()
11325            + financial_reporting.bank_reconciliations.len();
11326        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11327
11328        // Create OCPM event log with standard types
11329        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11330        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11331
11332        // Configure the OCPM generator
11333        let ocpm_config = OcpmGeneratorConfig {
11334            generate_p2p: true,
11335            generate_o2c: true,
11336            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11337            generate_h2r: !hr.payroll_runs.is_empty(),
11338            generate_mfg: !manufacturing.production_orders.is_empty(),
11339            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11340            generate_bank: !banking.customers.is_empty(),
11341            generate_audit: !audit.engagements.is_empty(),
11342            happy_path_rate: 0.75,
11343            exception_path_rate: 0.20,
11344            error_path_rate: 0.05,
11345            add_duration_variability: true,
11346            duration_std_dev_factor: 0.3,
11347        };
11348        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11349        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11350
11351        // Get available users for resource assignment
11352        let available_users: Vec<String> = self
11353            .master_data
11354            .employees
11355            .iter()
11356            .take(20)
11357            .map(|e| e.user_id.clone())
11358            .collect();
11359
11360        // Deterministic base date from config (avoids Utc::now() non-determinism)
11361        let fallback_date =
11362            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11363        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11364            .unwrap_or(fallback_date);
11365        let base_midnight = base_date
11366            .and_hms_opt(0, 0, 0)
11367            .expect("midnight is always valid");
11368        let base_datetime =
11369            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11370
11371        // Helper closure to add case results to event log
11372        let add_result = |event_log: &mut OcpmEventLog,
11373                          result: datasynth_ocpm::CaseGenerationResult| {
11374            for event in result.events {
11375                event_log.add_event(event);
11376            }
11377            for object in result.objects {
11378                event_log.add_object(object);
11379            }
11380            for relationship in result.relationships {
11381                event_log.add_relationship(relationship);
11382            }
11383            for corr in result.correlation_events {
11384                event_log.add_correlation_event(corr);
11385            }
11386            event_log.add_case(result.case_trace);
11387        };
11388
11389        // Generate events from P2P chains
11390        for chain in &flows.p2p_chains {
11391            let po = &chain.purchase_order;
11392            let documents = P2pDocuments::new(
11393                &po.header.document_id,
11394                &po.vendor_id,
11395                &po.header.company_code,
11396                po.total_net_amount,
11397                &po.header.currency,
11398                &ocpm_uuid_factory,
11399            )
11400            .with_goods_receipt(
11401                chain
11402                    .goods_receipts
11403                    .first()
11404                    .map(|gr| gr.header.document_id.as_str())
11405                    .unwrap_or(""),
11406                &ocpm_uuid_factory,
11407            )
11408            .with_invoice(
11409                chain
11410                    .vendor_invoice
11411                    .as_ref()
11412                    .map(|vi| vi.header.document_id.as_str())
11413                    .unwrap_or(""),
11414                &ocpm_uuid_factory,
11415            )
11416            .with_payment(
11417                chain
11418                    .payment
11419                    .as_ref()
11420                    .map(|p| p.header.document_id.as_str())
11421                    .unwrap_or(""),
11422                &ocpm_uuid_factory,
11423            );
11424
11425            let start_time =
11426                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11427            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11428            add_result(&mut event_log, result);
11429
11430            if let Some(pb) = &pb {
11431                pb.inc(1);
11432            }
11433        }
11434
11435        // Generate events from O2C chains
11436        for chain in &flows.o2c_chains {
11437            let so = &chain.sales_order;
11438            let documents = O2cDocuments::new(
11439                &so.header.document_id,
11440                &so.customer_id,
11441                &so.header.company_code,
11442                so.total_net_amount,
11443                &so.header.currency,
11444                &ocpm_uuid_factory,
11445            )
11446            .with_delivery(
11447                chain
11448                    .deliveries
11449                    .first()
11450                    .map(|d| d.header.document_id.as_str())
11451                    .unwrap_or(""),
11452                &ocpm_uuid_factory,
11453            )
11454            .with_invoice(
11455                chain
11456                    .customer_invoice
11457                    .as_ref()
11458                    .map(|ci| ci.header.document_id.as_str())
11459                    .unwrap_or(""),
11460                &ocpm_uuid_factory,
11461            )
11462            .with_receipt(
11463                chain
11464                    .customer_receipt
11465                    .as_ref()
11466                    .map(|r| r.header.document_id.as_str())
11467                    .unwrap_or(""),
11468                &ocpm_uuid_factory,
11469            );
11470
11471            let start_time =
11472                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11473            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11474            add_result(&mut event_log, result);
11475
11476            if let Some(pb) = &pb {
11477                pb.inc(1);
11478            }
11479        }
11480
11481        // Generate events from S2C sourcing projects
11482        for project in &sourcing.sourcing_projects {
11483            // Find vendor from contracts or qualifications
11484            let vendor_id = sourcing
11485                .contracts
11486                .iter()
11487                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11488                .map(|c| c.vendor_id.clone())
11489                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11490                .or_else(|| {
11491                    self.master_data
11492                        .vendors
11493                        .first()
11494                        .map(|v| v.vendor_id.clone())
11495                })
11496                .unwrap_or_else(|| "V000".to_string());
11497            let mut docs = S2cDocuments::new(
11498                &project.project_id,
11499                &vendor_id,
11500                &project.company_code,
11501                project.estimated_annual_spend,
11502                &ocpm_uuid_factory,
11503            );
11504            // Link RFx if available
11505            if let Some(rfx) = sourcing
11506                .rfx_events
11507                .iter()
11508                .find(|r| r.sourcing_project_id == project.project_id)
11509            {
11510                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11511                // Link winning bid (status == Accepted)
11512                if let Some(bid) = sourcing.bids.iter().find(|b| {
11513                    b.rfx_id == rfx.rfx_id
11514                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11515                }) {
11516                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11517                }
11518            }
11519            // Link contract
11520            if let Some(contract) = sourcing
11521                .contracts
11522                .iter()
11523                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11524            {
11525                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11526            }
11527            let start_time = base_datetime - chrono::Duration::days(90);
11528            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11529            add_result(&mut event_log, result);
11530
11531            if let Some(pb) = &pb {
11532                pb.inc(1);
11533            }
11534        }
11535
11536        // Generate events from H2R payroll runs
11537        for run in &hr.payroll_runs {
11538            // Use first matching payroll line item's employee, or fallback
11539            let employee_id = hr
11540                .payroll_line_items
11541                .iter()
11542                .find(|li| li.payroll_id == run.payroll_id)
11543                .map(|li| li.employee_id.as_str())
11544                .unwrap_or("EMP000");
11545            let docs = H2rDocuments::new(
11546                &run.payroll_id,
11547                employee_id,
11548                &run.company_code,
11549                run.total_gross,
11550                &ocpm_uuid_factory,
11551            )
11552            .with_time_entries(
11553                hr.time_entries
11554                    .iter()
11555                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11556                    .take(5)
11557                    .map(|t| t.entry_id.as_str())
11558                    .collect(),
11559            );
11560            let start_time = base_datetime - chrono::Duration::days(30);
11561            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11562            add_result(&mut event_log, result);
11563
11564            if let Some(pb) = &pb {
11565                pb.inc(1);
11566            }
11567        }
11568
11569        // Generate events from MFG production orders
11570        for order in &manufacturing.production_orders {
11571            let mut docs = MfgDocuments::new(
11572                &order.order_id,
11573                &order.material_id,
11574                &order.company_code,
11575                order.planned_quantity,
11576                &ocpm_uuid_factory,
11577            )
11578            .with_operations(
11579                order
11580                    .operations
11581                    .iter()
11582                    .map(|o| format!("OP-{:04}", o.operation_number))
11583                    .collect::<Vec<_>>()
11584                    .iter()
11585                    .map(std::string::String::as_str)
11586                    .collect(),
11587            );
11588            // Link quality inspection if available (via reference_id matching order_id)
11589            if let Some(insp) = manufacturing
11590                .quality_inspections
11591                .iter()
11592                .find(|i| i.reference_id == order.order_id)
11593            {
11594                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11595            }
11596            // Link cycle count if available (match by material_id in items)
11597            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11598                cc.items
11599                    .iter()
11600                    .any(|item| item.material_id == order.material_id)
11601            }) {
11602                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11603            }
11604            let start_time = base_datetime - chrono::Duration::days(60);
11605            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11606            add_result(&mut event_log, result);
11607
11608            if let Some(pb) = &pb {
11609                pb.inc(1);
11610            }
11611        }
11612
11613        // Generate events from Banking customers
11614        for customer in &banking.customers {
11615            let customer_id_str = customer.customer_id.to_string();
11616            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11617            // Link accounts (primary_owner_id matches customer_id)
11618            if let Some(account) = banking
11619                .accounts
11620                .iter()
11621                .find(|a| a.primary_owner_id == customer.customer_id)
11622            {
11623                let account_id_str = account.account_id.to_string();
11624                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11625                // Link transactions for this account
11626                let txn_strs: Vec<String> = banking
11627                    .transactions
11628                    .iter()
11629                    .filter(|t| t.account_id == account.account_id)
11630                    .take(10)
11631                    .map(|t| t.transaction_id.to_string())
11632                    .collect();
11633                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11634                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11635                    .transactions
11636                    .iter()
11637                    .filter(|t| t.account_id == account.account_id)
11638                    .take(10)
11639                    .map(|t| t.amount)
11640                    .collect();
11641                if !txn_ids.is_empty() {
11642                    docs = docs.with_transactions(txn_ids, txn_amounts);
11643                }
11644            }
11645            let start_time = base_datetime - chrono::Duration::days(180);
11646            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11647            add_result(&mut event_log, result);
11648
11649            if let Some(pb) = &pb {
11650                pb.inc(1);
11651            }
11652        }
11653
11654        // Generate events from Audit engagements
11655        for engagement in &audit.engagements {
11656            let engagement_id_str = engagement.engagement_id.to_string();
11657            let docs = AuditDocuments::new(
11658                &engagement_id_str,
11659                &engagement.client_entity_id,
11660                &ocpm_uuid_factory,
11661            )
11662            .with_workpapers(
11663                audit
11664                    .workpapers
11665                    .iter()
11666                    .filter(|w| w.engagement_id == engagement.engagement_id)
11667                    .take(10)
11668                    .map(|w| w.workpaper_id.to_string())
11669                    .collect::<Vec<_>>()
11670                    .iter()
11671                    .map(std::string::String::as_str)
11672                    .collect(),
11673            )
11674            .with_evidence(
11675                audit
11676                    .evidence
11677                    .iter()
11678                    .filter(|e| e.engagement_id == engagement.engagement_id)
11679                    .take(10)
11680                    .map(|e| e.evidence_id.to_string())
11681                    .collect::<Vec<_>>()
11682                    .iter()
11683                    .map(std::string::String::as_str)
11684                    .collect(),
11685            )
11686            .with_risks(
11687                audit
11688                    .risk_assessments
11689                    .iter()
11690                    .filter(|r| r.engagement_id == engagement.engagement_id)
11691                    .take(5)
11692                    .map(|r| r.risk_id.to_string())
11693                    .collect::<Vec<_>>()
11694                    .iter()
11695                    .map(std::string::String::as_str)
11696                    .collect(),
11697            )
11698            .with_findings(
11699                audit
11700                    .findings
11701                    .iter()
11702                    .filter(|f| f.engagement_id == engagement.engagement_id)
11703                    .take(5)
11704                    .map(|f| f.finding_id.to_string())
11705                    .collect::<Vec<_>>()
11706                    .iter()
11707                    .map(std::string::String::as_str)
11708                    .collect(),
11709            )
11710            .with_judgments(
11711                audit
11712                    .judgments
11713                    .iter()
11714                    .filter(|j| j.engagement_id == engagement.engagement_id)
11715                    .take(5)
11716                    .map(|j| j.judgment_id.to_string())
11717                    .collect::<Vec<_>>()
11718                    .iter()
11719                    .map(std::string::String::as_str)
11720                    .collect(),
11721            );
11722            let start_time = base_datetime - chrono::Duration::days(120);
11723            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11724            add_result(&mut event_log, result);
11725
11726            if let Some(pb) = &pb {
11727                pb.inc(1);
11728            }
11729        }
11730
11731        // Generate events from Bank Reconciliations
11732        for recon in &financial_reporting.bank_reconciliations {
11733            let docs = BankReconDocuments::new(
11734                &recon.reconciliation_id,
11735                &recon.bank_account_id,
11736                &recon.company_code,
11737                recon.bank_ending_balance,
11738                &ocpm_uuid_factory,
11739            )
11740            .with_statement_lines(
11741                recon
11742                    .statement_lines
11743                    .iter()
11744                    .take(20)
11745                    .map(|l| l.line_id.as_str())
11746                    .collect(),
11747            )
11748            .with_reconciling_items(
11749                recon
11750                    .reconciling_items
11751                    .iter()
11752                    .take(10)
11753                    .map(|i| i.item_id.as_str())
11754                    .collect(),
11755            );
11756            let start_time = base_datetime - chrono::Duration::days(30);
11757            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11758            add_result(&mut event_log, result);
11759
11760            if let Some(pb) = &pb {
11761                pb.inc(1);
11762            }
11763        }
11764
11765        // Compute process variants
11766        event_log.compute_variants();
11767
11768        let summary = event_log.summary();
11769
11770        if let Some(pb) = pb {
11771            pb.finish_with_message(format!(
11772                "Generated {} OCPM events, {} objects",
11773                summary.event_count, summary.object_count
11774            ));
11775        }
11776
11777        Ok(OcpmSnapshot {
11778            event_count: summary.event_count,
11779            object_count: summary.object_count,
11780            case_count: summary.case_count,
11781            event_log: Some(event_log),
11782        })
11783    }
11784
11785    /// Inject anomalies into journal entries.
11786    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11787        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11788
11789        // Read anomaly rates from config instead of using hardcoded values.
11790        // Priority: anomaly_injection config > fraud config > default 0.02
11791        let total_rate = if self.config.anomaly_injection.enabled {
11792            self.config.anomaly_injection.rates.total_rate
11793        } else if self.config.fraud.enabled {
11794            self.config.fraud.fraud_rate
11795        } else {
11796            0.02
11797        };
11798
11799        let fraud_rate = if self.config.anomaly_injection.enabled {
11800            self.config.anomaly_injection.rates.fraud_rate
11801        } else {
11802            AnomalyRateConfig::default().fraud_rate
11803        };
11804
11805        let error_rate = if self.config.anomaly_injection.enabled {
11806            self.config.anomaly_injection.rates.error_rate
11807        } else {
11808            AnomalyRateConfig::default().error_rate
11809        };
11810
11811        let process_issue_rate = if self.config.anomaly_injection.enabled {
11812            self.config.anomaly_injection.rates.process_rate
11813        } else {
11814            AnomalyRateConfig::default().process_issue_rate
11815        };
11816
11817        let anomaly_config = AnomalyInjectorConfig {
11818            rates: AnomalyRateConfig {
11819                total_rate,
11820                fraud_rate,
11821                error_rate,
11822                process_issue_rate,
11823                ..Default::default()
11824            },
11825            seed: self.seed + 5000,
11826            ..Default::default()
11827        };
11828
11829        let mut injector = AnomalyInjector::new(anomaly_config);
11830        let result = injector.process_entries(entries);
11831
11832        if let Some(pb) = &pb {
11833            pb.inc(entries.len() as u64);
11834            pb.finish_with_message("Anomaly injection complete");
11835        }
11836
11837        let mut by_type = HashMap::new();
11838        for label in &result.labels {
11839            *by_type
11840                .entry(format!("{:?}", label.anomaly_type))
11841                .or_insert(0) += 1;
11842        }
11843
11844        Ok(AnomalyLabels {
11845            labels: result.labels,
11846            summary: Some(result.summary),
11847            by_type,
11848        })
11849    }
11850
11851    /// Validate journal entries using running balance tracker.
11852    ///
11853    /// Applies all entries to the balance tracker and validates:
11854    /// - Each entry is internally balanced (debits = credits)
11855    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11856    ///
11857    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11858    /// excluded from balance validation as they may be intentionally unbalanced.
11859    fn validate_journal_entries(
11860        &mut self,
11861        entries: &[JournalEntry],
11862    ) -> SynthResult<BalanceValidationResult> {
11863        // Filter out entries with human errors as they may be intentionally unbalanced
11864        let clean_entries: Vec<&JournalEntry> = entries
11865            .iter()
11866            .filter(|e| {
11867                e.header
11868                    .header_text
11869                    .as_ref()
11870                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11871                    .unwrap_or(true)
11872            })
11873            .collect();
11874
11875        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11876
11877        // Configure tracker to not fail on errors (collect them instead)
11878        let config = BalanceTrackerConfig {
11879            validate_on_each_entry: false,   // We'll validate at the end
11880            track_history: false,            // Skip history for performance
11881            fail_on_validation_error: false, // Collect errors, don't fail
11882            ..Default::default()
11883        };
11884        let validation_currency = self
11885            .config
11886            .companies
11887            .first()
11888            .map(|c| c.currency.clone())
11889            .unwrap_or_else(|| "USD".to_string());
11890
11891        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11892
11893        // Apply clean entries (without human errors)
11894        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11895        let errors = tracker.apply_entries(&clean_refs);
11896
11897        if let Some(pb) = &pb {
11898            pb.inc(entries.len() as u64);
11899        }
11900
11901        // Check if any entries were unbalanced
11902        // Note: When fail_on_validation_error is false, errors are stored in tracker
11903        let has_unbalanced = tracker
11904            .get_validation_errors()
11905            .iter()
11906            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11907
11908        // Validate balance sheet for each company
11909        // Include both returned errors and collected validation errors
11910        let mut all_errors = errors;
11911        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11912        let company_codes: Vec<String> = self
11913            .config
11914            .companies
11915            .iter()
11916            .map(|c| c.code.clone())
11917            .collect();
11918
11919        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11920            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11921            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11922
11923        for company_code in &company_codes {
11924            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11925                all_errors.push(e);
11926            }
11927        }
11928
11929        // Get statistics after all mutable operations are done
11930        let stats = tracker.get_statistics();
11931
11932        // Determine if balanced overall
11933        let is_balanced = all_errors.is_empty();
11934
11935        if let Some(pb) = pb {
11936            let msg = if is_balanced {
11937                "Balance validation passed"
11938            } else {
11939                "Balance validation completed with errors"
11940            };
11941            pb.finish_with_message(msg);
11942        }
11943
11944        Ok(BalanceValidationResult {
11945            validated: true,
11946            is_balanced,
11947            entries_processed: stats.entries_processed,
11948            total_debits: stats.total_debits,
11949            total_credits: stats.total_credits,
11950            accounts_tracked: stats.accounts_tracked,
11951            companies_tracked: stats.companies_tracked,
11952            validation_errors: all_errors,
11953            has_unbalanced_entries: has_unbalanced,
11954        })
11955    }
11956
11957    /// Inject data quality variations into journal entries.
11958    ///
11959    /// Applies typos, missing values, and format variations to make
11960    /// the synthetic data more realistic for testing data cleaning pipelines.
11961    fn inject_data_quality(
11962        &mut self,
11963        entries: &mut [JournalEntry],
11964    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11965        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11966
11967        // Build config from user-specified schema settings when data_quality is enabled;
11968        // otherwise fall back to the low-rate minimal() preset.
11969        let config = if self.config.data_quality.enabled {
11970            let dq = &self.config.data_quality;
11971            // Propagate per-field rates and protected fields from the schema
11972            // so users can dial in real-production NULL profiles per field
11973            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
11974            let field_rates = dq.missing_values.field_rates.clone();
11975            let mut required_fields: std::collections::HashSet<String> =
11976                dq.missing_values.protected_fields.iter().cloned().collect();
11977            // Always preserve audit-critical identifiers regardless of
11978            // user config — losing these breaks downstream joins.
11979            for f in [
11980                "document_id",
11981                "company_code",
11982                "posting_date",
11983                "fiscal_year",
11984                "fiscal_period",
11985                "gl_account",
11986                "line_number",
11987                "transaction_id",
11988            ] {
11989                required_fields.insert(f.to_string());
11990            }
11991            DataQualityConfig {
11992                enable_missing_values: dq.missing_values.enabled,
11993                missing_values: datasynth_generators::MissingValueConfig {
11994                    global_rate: dq.effective_missing_rate(),
11995                    field_rates,
11996                    required_fields,
11997                    ..Default::default()
11998                },
11999                enable_format_variations: dq.format_variations.enabled,
12000                format_variations: datasynth_generators::FormatVariationConfig {
12001                    date_variation_rate: dq.format_variations.dates.rate,
12002                    amount_variation_rate: dq.format_variations.amounts.rate,
12003                    identifier_variation_rate: dq.format_variations.identifiers.rate,
12004                    ..Default::default()
12005                },
12006                enable_duplicates: dq.duplicates.enabled,
12007                duplicates: datasynth_generators::DuplicateConfig {
12008                    duplicate_rate: dq.effective_duplicate_rate(),
12009                    ..Default::default()
12010                },
12011                enable_typos: dq.typos.enabled,
12012                typos: datasynth_generators::TypoConfig {
12013                    char_error_rate: dq.effective_typo_rate(),
12014                    ..Default::default()
12015                },
12016                enable_encoding_issues: dq.encoding_issues.enabled,
12017                encoding_issue_rate: dq.encoding_issues.rate,
12018                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
12019                track_statistics: true,
12020            }
12021        } else {
12022            DataQualityConfig::minimal()
12023        };
12024        let mut injector = DataQualityInjector::new(config);
12025
12026        // Wire country pack for locale-aware format baselines
12027        injector.set_country_pack(self.primary_pack().clone());
12028
12029        // Build context for missing value decisions
12030        let context = HashMap::new();
12031
12032        for entry in entries.iter_mut() {
12033            // Process header_text field (common target for typos)
12034            if let Some(text) = &entry.header.header_text {
12035                let processed = injector.process_text_field(
12036                    "header_text",
12037                    text,
12038                    &entry.header.document_id.to_string(),
12039                    &context,
12040                );
12041                match processed {
12042                    Some(new_text) if new_text != *text => {
12043                        entry.header.header_text = Some(new_text);
12044                    }
12045                    None => {
12046                        entry.header.header_text = None; // Missing value
12047                    }
12048                    _ => {}
12049                }
12050            }
12051
12052            // Process reference field
12053            if let Some(ref_text) = &entry.header.reference {
12054                let processed = injector.process_text_field(
12055                    "reference",
12056                    ref_text,
12057                    &entry.header.document_id.to_string(),
12058                    &context,
12059                );
12060                match processed {
12061                    Some(new_text) if new_text != *ref_text => {
12062                        entry.header.reference = Some(new_text);
12063                    }
12064                    None => {
12065                        entry.header.reference = None;
12066                    }
12067                    _ => {}
12068                }
12069            }
12070
12071            // Process user_persona field (potential for typos in user IDs)
12072            let user_persona = entry.header.user_persona.clone();
12073            if let Some(processed) = injector.process_text_field(
12074                "user_persona",
12075                &user_persona,
12076                &entry.header.document_id.to_string(),
12077                &context,
12078            ) {
12079                if processed != user_persona {
12080                    entry.header.user_persona = processed;
12081                }
12082            }
12083
12084            // Process line items
12085            for line in &mut entry.lines {
12086                // Process line description if present
12087                if let Some(ref text) = line.line_text {
12088                    let processed = injector.process_text_field(
12089                        "line_text",
12090                        text,
12091                        &entry.header.document_id.to_string(),
12092                        &context,
12093                    );
12094                    match processed {
12095                        Some(new_text) if new_text != *text => {
12096                            line.line_text = Some(new_text);
12097                        }
12098                        None => {
12099                            line.line_text = None;
12100                        }
12101                        _ => {}
12102                    }
12103                }
12104
12105                // Process cost_center if present
12106                if let Some(cc) = &line.cost_center {
12107                    let processed = injector.process_text_field(
12108                        "cost_center",
12109                        cc,
12110                        &entry.header.document_id.to_string(),
12111                        &context,
12112                    );
12113                    match processed {
12114                        Some(new_cc) if new_cc != *cc => {
12115                            line.cost_center = Some(new_cc);
12116                        }
12117                        None => {
12118                            line.cost_center = None;
12119                        }
12120                        _ => {}
12121                    }
12122                }
12123
12124                // Extended field coverage (v5.6+): apply NULL injection to
12125                // every Option<String> on the line so users can match
12126                // arbitrary real-production NULL profiles via
12127                // `data_quality.missing_values.field_rates`.
12128                //
12129                // Macro-free helper: process_field returns the new value
12130                // ({Some, None, unchanged}) and we apply it back.
12131                macro_rules! process_opt_field {
12132                    ($field_name:expr, $opt:expr) => {
12133                        if let Some(val) = $opt.as_ref() {
12134                            match injector.process_text_field(
12135                                $field_name,
12136                                val,
12137                                &entry.header.document_id.to_string(),
12138                                &context,
12139                            ) {
12140                                Some(new_val) if new_val != *val => {
12141                                    *$opt = Some(new_val);
12142                                }
12143                                None => {
12144                                    *$opt = None;
12145                                }
12146                                _ => {}
12147                            }
12148                        }
12149                    };
12150                }
12151
12152                process_opt_field!("profit_center", &mut line.profit_center);
12153                process_opt_field!("assignment", &mut line.assignment);
12154                process_opt_field!("tax_code", &mut line.tax_code);
12155                process_opt_field!("account_description", &mut line.account_description);
12156                process_opt_field!(
12157                    "auxiliary_account_number",
12158                    &mut line.auxiliary_account_number
12159                );
12160                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12161                process_opt_field!("lettrage", &mut line.lettrage);
12162            }
12163
12164            if let Some(pb) = &pb {
12165                pb.inc(1);
12166            }
12167        }
12168
12169        if let Some(pb) = pb {
12170            pb.finish_with_message("Data quality injection complete");
12171        }
12172
12173        let quality_issues = injector.issues().to_vec();
12174        Ok((injector.stats().clone(), quality_issues))
12175    }
12176
12177    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12178    ///
12179    /// Creates complete audit documentation for each company in the configuration,
12180    /// following ISA standards:
12181    /// - ISA 210/220: Engagement acceptance and terms
12182    /// - ISA 230: Audit documentation (workpapers)
12183    /// - ISA 265: Control deficiencies (findings)
12184    /// - ISA 315/330: Risk assessment and response
12185    /// - ISA 500: Audit evidence
12186    /// - ISA 200: Professional judgment
12187    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12188        // Check if FSM-driven audit generation is enabled
12189        let use_fsm = self
12190            .config
12191            .audit
12192            .fsm
12193            .as_ref()
12194            .map(|f| f.enabled)
12195            .unwrap_or(false);
12196
12197        if use_fsm {
12198            return self.generate_audit_data_with_fsm(entries);
12199        }
12200
12201        // --- Legacy (non-FSM) audit generation follows ---
12202        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12203            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12204        let fiscal_year = start_date.year() as u16;
12205        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12206
12207        // Calculate rough total revenue from entries for materiality
12208        let total_revenue: rust_decimal::Decimal = entries
12209            .iter()
12210            .flat_map(|e| e.lines.iter())
12211            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12212            .map(|l| l.credit_amount)
12213            .sum();
12214
12215        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12216        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12217
12218        let mut snapshot = AuditSnapshot::default();
12219
12220        // Initialize generators
12221        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12222        // v3.3.2: thread the user-facing audit schema config into the
12223        // engagement generator (team size range).
12224        engagement_gen.set_team_config(&self.config.audit.team);
12225
12226        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12227        // v3.3.2: thread workpaper + review workflow schema config into
12228        // the workpaper generator (per-section count range + review
12229        // delay ranges).
12230        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12231        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12232        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12233        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12234        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12235        finding_gen.set_template_provider(self.template_provider.clone());
12236        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12237        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12238        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12239        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12240        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12241        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12242        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12243
12244        // Get list of accounts from CoA for risk assessment
12245        let accounts: Vec<String> = self
12246            .coa
12247            .as_ref()
12248            .map(|coa| {
12249                coa.get_postable_accounts()
12250                    .iter()
12251                    .map(|acc| acc.account_code().to_string())
12252                    .collect()
12253            })
12254            .unwrap_or_default();
12255
12256        // Generate engagements for each company
12257        for (i, company) in self.config.companies.iter().enumerate() {
12258            // Calculate company-specific revenue (proportional to volume weight)
12259            let company_revenue = total_revenue
12260                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12261
12262            // Generate engagements for this company
12263            let engagements_for_company =
12264                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12265            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12266                1
12267            } else {
12268                0
12269            };
12270
12271            for _eng_idx in 0..(engagements_for_company + extra) {
12272                // v3.3.2: draw engagement type from the user-configured
12273                // distribution instead of always using the default
12274                // (AnnualAudit). Falls back to the default when all
12275                // probabilities are zero.
12276                let eng_type =
12277                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12278
12279                // Generate the engagement
12280                let mut engagement = engagement_gen.generate_engagement(
12281                    &company.code,
12282                    &company.name,
12283                    fiscal_year,
12284                    period_end,
12285                    company_revenue,
12286                    Some(eng_type),
12287                );
12288
12289                // Replace synthetic team IDs with real employee IDs from master data
12290                if !self.master_data.employees.is_empty() {
12291                    let emp_count = self.master_data.employees.len();
12292                    // Use employee IDs deterministically based on engagement index
12293                    let base = (i * 10 + _eng_idx) % emp_count;
12294                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12295                        .employee_id
12296                        .clone();
12297                    engagement.engagement_manager_id = self.master_data.employees
12298                        [(base + 1) % emp_count]
12299                        .employee_id
12300                        .clone();
12301                    let real_team: Vec<String> = engagement
12302                        .team_member_ids
12303                        .iter()
12304                        .enumerate()
12305                        .map(|(j, _)| {
12306                            self.master_data.employees[(base + 2 + j) % emp_count]
12307                                .employee_id
12308                                .clone()
12309                        })
12310                        .collect();
12311                    engagement.team_member_ids = real_team;
12312                }
12313
12314                if let Some(pb) = &pb {
12315                    pb.inc(1);
12316                }
12317
12318                // Get team members from the engagement
12319                let team_members: Vec<String> = engagement.team_member_ids.clone();
12320
12321                // Generate workpapers for the engagement.
12322                // v3.3.2: honor `audit.generate_workpapers` — when false,
12323                // workpapers (and dependent evidence) are skipped while
12324                // the engagement itself, risk assessments, findings, etc.
12325                // still generate normally.
12326                let workpapers = if self.config.audit.generate_workpapers {
12327                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12328                } else {
12329                    Vec::new()
12330                };
12331
12332                for wp in &workpapers {
12333                    if let Some(pb) = &pb {
12334                        pb.inc(1);
12335                    }
12336
12337                    // Generate evidence for each workpaper
12338                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12339                        wp,
12340                        &team_members,
12341                        wp.preparer_date,
12342                    );
12343
12344                    for _ in &evidence {
12345                        if let Some(pb) = &pb {
12346                            pb.inc(1);
12347                        }
12348                    }
12349
12350                    snapshot.evidence.extend(evidence);
12351                }
12352
12353                // Generate risk assessments for the engagement
12354                let risks =
12355                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12356
12357                for _ in &risks {
12358                    if let Some(pb) = &pb {
12359                        pb.inc(1);
12360                    }
12361                }
12362                snapshot.risk_assessments.extend(risks);
12363
12364                // Generate findings for the engagement
12365                let findings = finding_gen.generate_findings_for_engagement(
12366                    &engagement,
12367                    &workpapers,
12368                    &team_members,
12369                );
12370
12371                for _ in &findings {
12372                    if let Some(pb) = &pb {
12373                        pb.inc(1);
12374                    }
12375                }
12376                snapshot.findings.extend(findings);
12377
12378                // Generate professional judgments for the engagement
12379                let judgments =
12380                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12381
12382                for _ in &judgments {
12383                    if let Some(pb) = &pb {
12384                        pb.inc(1);
12385                    }
12386                }
12387                snapshot.judgments.extend(judgments);
12388
12389                // ISA 505: External confirmations and responses
12390                let (confs, resps) =
12391                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12392                snapshot.confirmations.extend(confs);
12393                snapshot.confirmation_responses.extend(resps);
12394
12395                // ISA 330: Procedure steps per workpaper
12396                let team_pairs: Vec<(String, String)> = team_members
12397                    .iter()
12398                    .map(|id| {
12399                        let name = self
12400                            .master_data
12401                            .employees
12402                            .iter()
12403                            .find(|e| e.employee_id == *id)
12404                            .map(|e| e.display_name.clone())
12405                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12406                        (id.clone(), name)
12407                    })
12408                    .collect();
12409                for wp in &workpapers {
12410                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12411                    snapshot.procedure_steps.extend(steps);
12412                }
12413
12414                // ISA 530: Samples per workpaper
12415                for wp in &workpapers {
12416                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12417                        snapshot.samples.push(sample);
12418                    }
12419                }
12420
12421                // ISA 520: Analytical procedures
12422                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12423                snapshot.analytical_results.extend(analytical);
12424
12425                // ISA 610: Internal audit function and reports
12426                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12427                snapshot.ia_functions.push(ia_func);
12428                snapshot.ia_reports.extend(ia_reports);
12429
12430                // ISA 550: Related parties and transactions
12431                let vendor_names: Vec<String> = self
12432                    .master_data
12433                    .vendors
12434                    .iter()
12435                    .map(|v| v.name.clone())
12436                    .collect();
12437                let customer_names: Vec<String> = self
12438                    .master_data
12439                    .customers
12440                    .iter()
12441                    .map(|c| c.name.clone())
12442                    .collect();
12443                let (parties, rp_txns) =
12444                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12445                snapshot.related_parties.extend(parties);
12446                snapshot.related_party_transactions.extend(rp_txns);
12447
12448                // Add workpapers after findings since findings need them
12449                snapshot.workpapers.extend(workpapers);
12450
12451                // Generate audit scope record for this engagement (one per engagement)
12452                {
12453                    let scope_id = format!(
12454                        "SCOPE-{}-{}",
12455                        engagement.engagement_id.simple(),
12456                        &engagement.client_entity_id
12457                    );
12458                    let scope = datasynth_core::models::audit::AuditScope::new(
12459                        scope_id.clone(),
12460                        engagement.engagement_id.to_string(),
12461                        engagement.client_entity_id.clone(),
12462                        engagement.materiality,
12463                    );
12464                    // Wire scope_id back to engagement
12465                    let mut eng = engagement;
12466                    eng.scope_id = Some(scope_id);
12467                    snapshot.audit_scopes.push(scope);
12468                    snapshot.engagements.push(eng);
12469                }
12470            }
12471        }
12472
12473        // ----------------------------------------------------------------
12474        // ISA 600: Group audit — component auditors, plan, instructions, reports
12475        // ----------------------------------------------------------------
12476        if self.config.companies.len() > 1 {
12477            // Use materiality from the first engagement if available, otherwise
12478            // derive a reasonable figure from total revenue.
12479            let group_materiality = snapshot
12480                .engagements
12481                .first()
12482                .map(|e| e.materiality)
12483                .unwrap_or_else(|| {
12484                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12485                    total_revenue * pct
12486                });
12487
12488            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12489            let group_engagement_id = snapshot
12490                .engagements
12491                .first()
12492                .map(|e| e.engagement_id.to_string())
12493                .unwrap_or_else(|| "GROUP-ENG".to_string());
12494
12495            let component_snapshot = component_gen.generate(
12496                &self.config.companies,
12497                group_materiality,
12498                &group_engagement_id,
12499                period_end,
12500            );
12501
12502            snapshot.component_auditors = component_snapshot.component_auditors;
12503            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12504            snapshot.component_instructions = component_snapshot.component_instructions;
12505            snapshot.component_reports = component_snapshot.component_reports;
12506
12507            info!(
12508                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12509                snapshot.component_auditors.len(),
12510                snapshot.component_instructions.len(),
12511                snapshot.component_reports.len(),
12512            );
12513        }
12514
12515        // ----------------------------------------------------------------
12516        // ISA 210: Engagement letters — one per engagement
12517        // ----------------------------------------------------------------
12518        {
12519            let applicable_framework = self
12520                .config
12521                .accounting_standards
12522                .framework
12523                .as_ref()
12524                .map(|f| format!("{f:?}"))
12525                .unwrap_or_else(|| "IFRS".to_string());
12526
12527            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12528            let entity_count = self.config.companies.len();
12529
12530            for engagement in &snapshot.engagements {
12531                let company = self
12532                    .config
12533                    .companies
12534                    .iter()
12535                    .find(|c| c.code == engagement.client_entity_id);
12536                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12537                let letter_date = engagement.planning_start;
12538                let letter = letter_gen.generate(
12539                    &engagement.engagement_id.to_string(),
12540                    &engagement.client_name,
12541                    entity_count,
12542                    engagement.period_end_date,
12543                    currency,
12544                    &applicable_framework,
12545                    letter_date,
12546                );
12547                snapshot.engagement_letters.push(letter);
12548            }
12549
12550            info!(
12551                "ISA 210 engagement letters: {} generated",
12552                snapshot.engagement_letters.len()
12553            );
12554        }
12555
12556        // ----------------------------------------------------------------
12557        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
12558        // ----------------------------------------------------------------
12559        if self.phase_config.generate_legal_documents {
12560            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12561            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12562            for engagement in &snapshot.engagements {
12563                // Build an employee name list for signatory drawing —
12564                // prefer employees from the engaged entity, fall back to
12565                // all employees.
12566                let employee_names: Vec<String> = self
12567                    .master_data
12568                    .employees
12569                    .iter()
12570                    .filter(|e| e.company_code == engagement.client_entity_id)
12571                    .map(|e| e.display_name.clone())
12572                    .collect();
12573                let names_to_use = if !employee_names.is_empty() {
12574                    employee_names
12575                } else {
12576                    self.master_data
12577                        .employees
12578                        .iter()
12579                        .take(10)
12580                        .map(|e| e.display_name.clone())
12581                        .collect()
12582                };
12583                let docs = legal_gen.generate(
12584                    &engagement.client_entity_id,
12585                    engagement.fiscal_year as i32,
12586                    &names_to_use,
12587                );
12588                snapshot.legal_documents.extend(docs);
12589            }
12590            info!(
12591                "v3.3.0 legal documents: {} emitted across {} engagements",
12592                snapshot.legal_documents.len(),
12593                snapshot.engagements.len()
12594            );
12595        }
12596
12597        // ----------------------------------------------------------------
12598        // v3.3.0: IT general controls — access logs + change records
12599        //
12600        // `ItControlsGenerator` runs one pass per company (not per
12601        // engagement) so employee sets and system catalogs stay
12602        // coherent. We derive the period from the earliest engagement's
12603        // planning_start through the latest engagement's period_end_date
12604        // for each company.
12605        // ----------------------------------------------------------------
12606        if self.phase_config.generate_it_controls {
12607            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12608            use std::collections::HashMap;
12609            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12610
12611            // Group engagements by company to produce one IT-controls
12612            // window per entity.
12613            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12614                HashMap::new();
12615            for engagement in &snapshot.engagements {
12616                let entry = by_company
12617                    .entry(engagement.client_entity_id.clone())
12618                    .or_insert((engagement.planning_start, engagement.period_end_date));
12619                if engagement.planning_start < entry.0 {
12620                    entry.0 = engagement.planning_start;
12621                }
12622                if engagement.period_end_date > entry.1 {
12623                    entry.1 = engagement.period_end_date;
12624                }
12625            }
12626
12627            // Standard system catalog — populated from known ERP / app
12628            // names. Keeps the generator's data shape stable when the
12629            // user hasn't configured IT-system naming separately.
12630            let systems: Vec<String> = vec![
12631                "SAP ECC",
12632                "SAP S/4 HANA",
12633                "Oracle EBS",
12634                "Workday",
12635                "NetSuite",
12636                "Active Directory",
12637                "SharePoint",
12638                "Salesforce",
12639                "ServiceNow",
12640                "Jira",
12641                "GitHub Enterprise",
12642                "AWS Console",
12643                "Okta",
12644            ]
12645            .into_iter()
12646            .map(String::from)
12647            .collect();
12648
12649            for (company_code, (start, end)) in by_company {
12650                let emps: Vec<(String, String)> = self
12651                    .master_data
12652                    .employees
12653                    .iter()
12654                    .filter(|e| e.company_code == company_code)
12655                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12656                    .collect();
12657                if emps.is_empty() {
12658                    continue;
12659                }
12660                // Compute period in months, rounded up to the nearest
12661                // whole month (min 1).
12662                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12663                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12664                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12665                snapshot.it_controls_access_logs.extend(access_logs);
12666                snapshot.it_controls_change_records.extend(change_records);
12667            }
12668
12669            info!(
12670                "v3.3.0 IT controls: {} access logs, {} change records",
12671                snapshot.it_controls_access_logs.len(),
12672                snapshot.it_controls_change_records.len()
12673            );
12674        }
12675
12676        // ----------------------------------------------------------------
12677        // ISA 560 / IAS 10: Subsequent events
12678        // ----------------------------------------------------------------
12679        {
12680            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12681            let entity_codes: Vec<String> = self
12682                .config
12683                .companies
12684                .iter()
12685                .map(|c| c.code.clone())
12686                .collect();
12687            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12688            info!(
12689                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12690                subsequent.len(),
12691                subsequent
12692                    .iter()
12693                    .filter(|e| matches!(
12694                        e.classification,
12695                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12696                    ))
12697                    .count(),
12698                subsequent
12699                    .iter()
12700                    .filter(|e| matches!(
12701                        e.classification,
12702                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12703                    ))
12704                    .count(),
12705            );
12706            snapshot.subsequent_events = subsequent;
12707        }
12708
12709        // ----------------------------------------------------------------
12710        // ISA 402: Service organization controls
12711        // ----------------------------------------------------------------
12712        {
12713            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12714            let entity_codes: Vec<String> = self
12715                .config
12716                .companies
12717                .iter()
12718                .map(|c| c.code.clone())
12719                .collect();
12720            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12721            info!(
12722                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12723                soc_snapshot.service_organizations.len(),
12724                soc_snapshot.soc_reports.len(),
12725                soc_snapshot.user_entity_controls.len(),
12726            );
12727            snapshot.service_organizations = soc_snapshot.service_organizations;
12728            snapshot.soc_reports = soc_snapshot.soc_reports;
12729            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12730        }
12731
12732        // ----------------------------------------------------------------
12733        // ISA 570: Going concern assessments
12734        // ----------------------------------------------------------------
12735        {
12736            use datasynth_generators::audit::going_concern_generator::{
12737                GoingConcernGenerator, GoingConcernInput,
12738            };
12739            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12740            let entity_codes: Vec<String> = self
12741                .config
12742                .companies
12743                .iter()
12744                .map(|c| c.code.clone())
12745                .collect();
12746            // Assessment date = period end + 75 days (typical sign-off window).
12747            let assessment_date = period_end + chrono::Duration::days(75);
12748            let period_label = format!("FY{}", period_end.year());
12749
12750            // Build financial inputs from actual journal entries.
12751            //
12752            // We derive approximate P&L, working capital, and operating cash flow
12753            // by aggregating GL account balances from the journal entry population.
12754            // Account ranges used (standard chart):
12755            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
12756            //   Expenses:        6xxx (debit-normal)
12757            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
12758            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
12759            //   Operating CF:    net income adjusted for D&A (rough proxy)
12760            let gc_inputs: Vec<GoingConcernInput> = self
12761                .config
12762                .companies
12763                .iter()
12764                .map(|company| {
12765                    let code = &company.code;
12766                    let mut revenue = rust_decimal::Decimal::ZERO;
12767                    let mut expenses = rust_decimal::Decimal::ZERO;
12768                    let mut current_assets = rust_decimal::Decimal::ZERO;
12769                    let mut current_liabs = rust_decimal::Decimal::ZERO;
12770                    let mut total_debt = rust_decimal::Decimal::ZERO;
12771
12772                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
12773                        for line in &je.lines {
12774                            let acct = line.gl_account.as_str();
12775                            let net = line.debit_amount - line.credit_amount;
12776                            if acct.starts_with('4') {
12777                                // Revenue accounts: credit-normal, so negative net = revenue earned
12778                                revenue -= net;
12779                            } else if acct.starts_with('6') {
12780                                // Expense accounts: debit-normal
12781                                expenses += net;
12782                            }
12783                            // Balance sheet accounts for working capital
12784                            if acct.starts_with('1') {
12785                                // Current asset accounts (1000–1499)
12786                                if let Ok(n) = acct.parse::<u32>() {
12787                                    if (1000..=1499).contains(&n) {
12788                                        current_assets += net;
12789                                    }
12790                                }
12791                            } else if acct.starts_with('2') {
12792                                if let Ok(n) = acct.parse::<u32>() {
12793                                    if (2000..=2499).contains(&n) {
12794                                        // Current liabilities
12795                                        current_liabs -= net; // credit-normal
12796                                    } else if (2500..=2999).contains(&n) {
12797                                        // Long-term debt
12798                                        total_debt -= net;
12799                                    }
12800                                }
12801                            }
12802                        }
12803                    }
12804
12805                    let net_income = revenue - expenses;
12806                    let working_capital = current_assets - current_liabs;
12807                    // Rough operating CF proxy: net income (full accrual CF calculation
12808                    // is done separately in the cash flow statement generator)
12809                    let operating_cash_flow = net_income;
12810
12811                    GoingConcernInput {
12812                        entity_code: code.clone(),
12813                        net_income,
12814                        working_capital,
12815                        operating_cash_flow,
12816                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12817                        assessment_date,
12818                    }
12819                })
12820                .collect();
12821
12822            let assessments = if gc_inputs.is_empty() {
12823                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12824            } else {
12825                gc_gen.generate_for_entities_with_inputs(
12826                    &entity_codes,
12827                    &gc_inputs,
12828                    assessment_date,
12829                    &period_label,
12830                )
12831            };
12832            info!(
12833                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12834                assessments.len(),
12835                assessments.iter().filter(|a| matches!(
12836                    a.auditor_conclusion,
12837                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12838                )).count(),
12839                assessments.iter().filter(|a| matches!(
12840                    a.auditor_conclusion,
12841                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12842                )).count(),
12843                assessments.iter().filter(|a| matches!(
12844                    a.auditor_conclusion,
12845                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12846                )).count(),
12847            );
12848            snapshot.going_concern_assessments = assessments;
12849        }
12850
12851        // ----------------------------------------------------------------
12852        // ISA 540: Accounting estimates
12853        // ----------------------------------------------------------------
12854        {
12855            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12856            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12857            let entity_codes: Vec<String> = self
12858                .config
12859                .companies
12860                .iter()
12861                .map(|c| c.code.clone())
12862                .collect();
12863            let estimates = est_gen.generate_for_entities(&entity_codes);
12864            info!(
12865                "ISA 540 accounting estimates: {} estimates across {} entities \
12866                 ({} with retrospective reviews, {} with auditor point estimates)",
12867                estimates.len(),
12868                entity_codes.len(),
12869                estimates
12870                    .iter()
12871                    .filter(|e| e.retrospective_review.is_some())
12872                    .count(),
12873                estimates
12874                    .iter()
12875                    .filter(|e| e.auditor_point_estimate.is_some())
12876                    .count(),
12877            );
12878            snapshot.accounting_estimates = estimates;
12879        }
12880
12881        // ----------------------------------------------------------------
12882        // ISA 700/701/705/706: Audit opinions (one per engagement)
12883        // ----------------------------------------------------------------
12884        {
12885            use datasynth_generators::audit::audit_opinion_generator::{
12886                AuditOpinionGenerator, AuditOpinionInput,
12887            };
12888
12889            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12890
12891            // Build inputs — one per engagement, linking findings and going concern.
12892            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12893                .engagements
12894                .iter()
12895                .map(|eng| {
12896                    // Collect findings for this engagement.
12897                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12898                        .findings
12899                        .iter()
12900                        .filter(|f| f.engagement_id == eng.engagement_id)
12901                        .cloned()
12902                        .collect();
12903
12904                    // Going concern for this entity.
12905                    let gc = snapshot
12906                        .going_concern_assessments
12907                        .iter()
12908                        .find(|g| g.entity_code == eng.client_entity_id)
12909                        .cloned();
12910
12911                    // Component reports relevant to this engagement.
12912                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12913                        snapshot.component_reports.clone();
12914
12915                    let auditor = self
12916                        .master_data
12917                        .employees
12918                        .first()
12919                        .map(|e| e.display_name.clone())
12920                        .unwrap_or_else(|| "Global Audit LLP".into());
12921
12922                    let partner = self
12923                        .master_data
12924                        .employees
12925                        .get(1)
12926                        .map(|e| e.display_name.clone())
12927                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12928
12929                    AuditOpinionInput {
12930                        entity_code: eng.client_entity_id.clone(),
12931                        entity_name: eng.client_name.clone(),
12932                        engagement_id: eng.engagement_id,
12933                        period_end: eng.period_end_date,
12934                        findings: eng_findings,
12935                        going_concern: gc,
12936                        component_reports: comp_reports,
12937                        // Mark as US-listed when audit standards include PCAOB.
12938                        is_us_listed: {
12939                            let fw = &self.config.audit_standards.isa_compliance.framework;
12940                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12941                        },
12942                        auditor_name: auditor,
12943                        engagement_partner: partner,
12944                    }
12945                })
12946                .collect();
12947
12948            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12949
12950            for go in &generated_opinions {
12951                snapshot
12952                    .key_audit_matters
12953                    .extend(go.key_audit_matters.clone());
12954            }
12955            snapshot.audit_opinions = generated_opinions
12956                .into_iter()
12957                .map(|go| go.opinion)
12958                .collect();
12959
12960            info!(
12961                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12962                snapshot.audit_opinions.len(),
12963                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12964                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12965                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12966                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12967            );
12968        }
12969
12970        // ----------------------------------------------------------------
12971        // SOX 302 / 404 assessments
12972        // ----------------------------------------------------------------
12973        {
12974            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12975
12976            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12977
12978            for (i, company) in self.config.companies.iter().enumerate() {
12979                // Collect findings for this company's engagements.
12980                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12981                    .engagements
12982                    .iter()
12983                    .filter(|e| e.client_entity_id == company.code)
12984                    .map(|e| e.engagement_id)
12985                    .collect();
12986
12987                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12988                    .findings
12989                    .iter()
12990                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12991                    .cloned()
12992                    .collect();
12993
12994                // Derive executive names from employee list.
12995                let emp_count = self.master_data.employees.len();
12996                let ceo_name = if emp_count > 0 {
12997                    self.master_data.employees[i % emp_count]
12998                        .display_name
12999                        .clone()
13000                } else {
13001                    format!("CEO of {}", company.name)
13002                };
13003                let cfo_name = if emp_count > 1 {
13004                    self.master_data.employees[(i + 1) % emp_count]
13005                        .display_name
13006                        .clone()
13007                } else {
13008                    format!("CFO of {}", company.name)
13009                };
13010
13011                // Use engagement materiality if available.
13012                let materiality = snapshot
13013                    .engagements
13014                    .iter()
13015                    .find(|e| e.client_entity_id == company.code)
13016                    .map(|e| e.materiality)
13017                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13018
13019                let input = SoxGeneratorInput {
13020                    company_code: company.code.clone(),
13021                    company_name: company.name.clone(),
13022                    fiscal_year,
13023                    period_end,
13024                    findings: company_findings,
13025                    ceo_name,
13026                    cfo_name,
13027                    materiality_threshold: materiality,
13028                    revenue_percent: rust_decimal::Decimal::from(100),
13029                    assets_percent: rust_decimal::Decimal::from(100),
13030                    significant_accounts: vec![
13031                        "Revenue".into(),
13032                        "Accounts Receivable".into(),
13033                        "Inventory".into(),
13034                        "Fixed Assets".into(),
13035                        "Accounts Payable".into(),
13036                    ],
13037                };
13038
13039                let (certs, assessment) = sox_gen.generate(&input);
13040                snapshot.sox_302_certifications.extend(certs);
13041                snapshot.sox_404_assessments.push(assessment);
13042            }
13043
13044            info!(
13045                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13046                snapshot.sox_302_certifications.len(),
13047                snapshot.sox_404_assessments.len(),
13048                snapshot
13049                    .sox_404_assessments
13050                    .iter()
13051                    .filter(|a| a.icfr_effective)
13052                    .count(),
13053                snapshot
13054                    .sox_404_assessments
13055                    .iter()
13056                    .filter(|a| !a.icfr_effective)
13057                    .count(),
13058            );
13059        }
13060
13061        // ----------------------------------------------------------------
13062        // ISA 320: Materiality calculations (one per entity)
13063        // ----------------------------------------------------------------
13064        {
13065            use datasynth_generators::audit::materiality_generator::{
13066                MaterialityGenerator, MaterialityInput,
13067            };
13068
13069            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13070
13071            // Compute per-company financials from JEs.
13072            // Asset accounts start with '1', revenue with '4',
13073            // expense accounts with '5' or '6'.
13074            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13075
13076            for company in &self.config.companies {
13077                let company_code = company.code.clone();
13078
13079                // Revenue: credit-side entries on 4xxx accounts
13080                let company_revenue: rust_decimal::Decimal = entries
13081                    .iter()
13082                    .filter(|e| e.company_code() == company_code)
13083                    .flat_map(|e| e.lines.iter())
13084                    .filter(|l| l.account_code.starts_with('4'))
13085                    .map(|l| l.credit_amount)
13086                    .sum();
13087
13088                // Total assets: debit balances on 1xxx accounts
13089                let total_assets: rust_decimal::Decimal = entries
13090                    .iter()
13091                    .filter(|e| e.company_code() == company_code)
13092                    .flat_map(|e| e.lines.iter())
13093                    .filter(|l| l.account_code.starts_with('1'))
13094                    .map(|l| l.debit_amount)
13095                    .sum();
13096
13097                // Expenses: debit-side entries on 5xxx/6xxx accounts
13098                let total_expenses: rust_decimal::Decimal = entries
13099                    .iter()
13100                    .filter(|e| e.company_code() == company_code)
13101                    .flat_map(|e| e.lines.iter())
13102                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13103                    .map(|l| l.debit_amount)
13104                    .sum();
13105
13106                // Equity: credit balances on 3xxx accounts
13107                let equity: rust_decimal::Decimal = entries
13108                    .iter()
13109                    .filter(|e| e.company_code() == company_code)
13110                    .flat_map(|e| e.lines.iter())
13111                    .filter(|l| l.account_code.starts_with('3'))
13112                    .map(|l| l.credit_amount)
13113                    .sum();
13114
13115                let pretax_income = company_revenue - total_expenses;
13116
13117                // If no company-specific data, fall back to proportional share
13118                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13119                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13120                        .unwrap_or(rust_decimal::Decimal::ONE);
13121                    (
13122                        total_revenue * w,
13123                        total_revenue * w * rust_decimal::Decimal::from(3),
13124                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13125                        total_revenue * w * rust_decimal::Decimal::from(2),
13126                    )
13127                } else {
13128                    (company_revenue, total_assets, pretax_income, equity)
13129                };
13130
13131                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13132
13133                materiality_inputs.push(MaterialityInput {
13134                    entity_code: company_code,
13135                    period: format!("FY{}", fiscal_year),
13136                    revenue: rev,
13137                    pretax_income: pti,
13138                    total_assets: assets,
13139                    equity: eq,
13140                    gross_profit,
13141                });
13142            }
13143
13144            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13145
13146            info!(
13147                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13148                 {} total assets, {} equity benchmarks)",
13149                snapshot.materiality_calculations.len(),
13150                snapshot
13151                    .materiality_calculations
13152                    .iter()
13153                    .filter(|m| matches!(
13154                        m.benchmark,
13155                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13156                    ))
13157                    .count(),
13158                snapshot
13159                    .materiality_calculations
13160                    .iter()
13161                    .filter(|m| matches!(
13162                        m.benchmark,
13163                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13164                    ))
13165                    .count(),
13166                snapshot
13167                    .materiality_calculations
13168                    .iter()
13169                    .filter(|m| matches!(
13170                        m.benchmark,
13171                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13172                    ))
13173                    .count(),
13174                snapshot
13175                    .materiality_calculations
13176                    .iter()
13177                    .filter(|m| matches!(
13178                        m.benchmark,
13179                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13180                    ))
13181                    .count(),
13182            );
13183        }
13184
13185        // ----------------------------------------------------------------
13186        // ISA 315: Combined Risk Assessments (per entity, per account area)
13187        // ----------------------------------------------------------------
13188        {
13189            use datasynth_generators::audit::cra_generator::CraGenerator;
13190
13191            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13192
13193            // Build entity → scope_id map from already-generated scopes
13194            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13195                .audit_scopes
13196                .iter()
13197                .map(|s| (s.entity_code.clone(), s.id.clone()))
13198                .collect();
13199
13200            for company in &self.config.companies {
13201                let cras = cra_gen.generate_for_entity(&company.code, None);
13202                let scope_id = entity_scope_map.get(&company.code).cloned();
13203                let cras_with_scope: Vec<_> = cras
13204                    .into_iter()
13205                    .map(|mut cra| {
13206                        cra.scope_id = scope_id.clone();
13207                        cra
13208                    })
13209                    .collect();
13210                snapshot.combined_risk_assessments.extend(cras_with_scope);
13211            }
13212
13213            let significant_count = snapshot
13214                .combined_risk_assessments
13215                .iter()
13216                .filter(|c| c.significant_risk)
13217                .count();
13218            let high_cra_count = snapshot
13219                .combined_risk_assessments
13220                .iter()
13221                .filter(|c| {
13222                    matches!(
13223                        c.combined_risk,
13224                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13225                    )
13226                })
13227                .count();
13228
13229            info!(
13230                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13231                snapshot.combined_risk_assessments.len(),
13232                significant_count,
13233                high_cra_count,
13234            );
13235        }
13236
13237        // ----------------------------------------------------------------
13238        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13239        // ----------------------------------------------------------------
13240        {
13241            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13242
13243            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13244
13245            // Group CRAs by entity and use per-entity tolerable error from materiality
13246            for company in &self.config.companies {
13247                let entity_code = company.code.clone();
13248
13249                // Find tolerable error for this entity (= performance materiality)
13250                let tolerable_error = snapshot
13251                    .materiality_calculations
13252                    .iter()
13253                    .find(|m| m.entity_code == entity_code)
13254                    .map(|m| m.tolerable_error);
13255
13256                // Collect CRAs for this entity
13257                let entity_cras: Vec<_> = snapshot
13258                    .combined_risk_assessments
13259                    .iter()
13260                    .filter(|c| c.entity_code == entity_code)
13261                    .cloned()
13262                    .collect();
13263
13264                if !entity_cras.is_empty() {
13265                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13266                    snapshot.sampling_plans.extend(plans);
13267                    snapshot.sampled_items.extend(items);
13268                }
13269            }
13270
13271            let misstatement_count = snapshot
13272                .sampled_items
13273                .iter()
13274                .filter(|i| i.misstatement_found)
13275                .count();
13276
13277            info!(
13278                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13279                snapshot.sampling_plans.len(),
13280                snapshot.sampled_items.len(),
13281                misstatement_count,
13282            );
13283        }
13284
13285        // ----------------------------------------------------------------
13286        // ISA 315: Significant Classes of Transactions (SCOTS)
13287        // ----------------------------------------------------------------
13288        {
13289            use datasynth_generators::audit::scots_generator::{
13290                ScotsGenerator, ScotsGeneratorConfig,
13291            };
13292
13293            let ic_enabled = self.config.intercompany.enabled;
13294
13295            let config = ScotsGeneratorConfig {
13296                intercompany_enabled: ic_enabled,
13297                ..ScotsGeneratorConfig::default()
13298            };
13299            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13300
13301            for company in &self.config.companies {
13302                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13303                snapshot
13304                    .significant_transaction_classes
13305                    .extend(entity_scots);
13306            }
13307
13308            let estimation_count = snapshot
13309                .significant_transaction_classes
13310                .iter()
13311                .filter(|s| {
13312                    matches!(
13313                        s.transaction_type,
13314                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13315                    )
13316                })
13317                .count();
13318
13319            info!(
13320                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13321                snapshot.significant_transaction_classes.len(),
13322                estimation_count,
13323            );
13324        }
13325
13326        // ----------------------------------------------------------------
13327        // ISA 520: Unusual Item Markers
13328        // ----------------------------------------------------------------
13329        {
13330            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13331
13332            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13333            let entity_codes: Vec<String> = self
13334                .config
13335                .companies
13336                .iter()
13337                .map(|c| c.code.clone())
13338                .collect();
13339            let unusual_flags =
13340                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13341            info!(
13342                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13343                unusual_flags.len(),
13344                unusual_flags
13345                    .iter()
13346                    .filter(|f| matches!(
13347                        f.severity,
13348                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13349                    ))
13350                    .count(),
13351                unusual_flags
13352                    .iter()
13353                    .filter(|f| matches!(
13354                        f.severity,
13355                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13356                    ))
13357                    .count(),
13358                unusual_flags
13359                    .iter()
13360                    .filter(|f| matches!(
13361                        f.severity,
13362                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13363                    ))
13364                    .count(),
13365            );
13366            snapshot.unusual_items = unusual_flags;
13367        }
13368
13369        // ----------------------------------------------------------------
13370        // ISA 520: Analytical Relationships
13371        // ----------------------------------------------------------------
13372        {
13373            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13374
13375            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13376            let entity_codes: Vec<String> = self
13377                .config
13378                .companies
13379                .iter()
13380                .map(|c| c.code.clone())
13381                .collect();
13382            let current_period_label = format!("FY{fiscal_year}");
13383            let prior_period_label = format!("FY{}", fiscal_year - 1);
13384            let analytical_rels = ar_gen.generate_for_entities(
13385                &entity_codes,
13386                entries,
13387                &current_period_label,
13388                &prior_period_label,
13389            );
13390            let out_of_range = analytical_rels
13391                .iter()
13392                .filter(|r| !r.within_expected_range)
13393                .count();
13394            info!(
13395                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13396                analytical_rels.len(),
13397                out_of_range,
13398            );
13399            snapshot.analytical_relationships = analytical_rels;
13400        }
13401
13402        if let Some(pb) = pb {
13403            pb.finish_with_message(format!(
13404                "Audit data: {} engagements, {} workpapers, {} evidence, \
13405                 {} confirmations, {} procedure steps, {} samples, \
13406                 {} analytical, {} IA funcs, {} related parties, \
13407                 {} component auditors, {} letters, {} subsequent events, \
13408                 {} service orgs, {} going concern, {} accounting estimates, \
13409                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13410                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13411                 {} unusual items, {} analytical relationships",
13412                snapshot.engagements.len(),
13413                snapshot.workpapers.len(),
13414                snapshot.evidence.len(),
13415                snapshot.confirmations.len(),
13416                snapshot.procedure_steps.len(),
13417                snapshot.samples.len(),
13418                snapshot.analytical_results.len(),
13419                snapshot.ia_functions.len(),
13420                snapshot.related_parties.len(),
13421                snapshot.component_auditors.len(),
13422                snapshot.engagement_letters.len(),
13423                snapshot.subsequent_events.len(),
13424                snapshot.service_organizations.len(),
13425                snapshot.going_concern_assessments.len(),
13426                snapshot.accounting_estimates.len(),
13427                snapshot.audit_opinions.len(),
13428                snapshot.key_audit_matters.len(),
13429                snapshot.sox_302_certifications.len(),
13430                snapshot.sox_404_assessments.len(),
13431                snapshot.materiality_calculations.len(),
13432                snapshot.combined_risk_assessments.len(),
13433                snapshot.sampling_plans.len(),
13434                snapshot.significant_transaction_classes.len(),
13435                snapshot.unusual_items.len(),
13436                snapshot.analytical_relationships.len(),
13437            ));
13438        }
13439
13440        // ----------------------------------------------------------------
13441        // PCAOB-ISA cross-reference mappings
13442        // ----------------------------------------------------------------
13443        // Always include the standard PCAOB-ISA mappings when audit generation is
13444        // enabled. These are static reference data (no randomness required) so we
13445        // call standard_mappings() directly.
13446        {
13447            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13448            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13449            debug!(
13450                "PCAOB-ISA mappings generated: {} mappings",
13451                snapshot.isa_pcaob_mappings.len()
13452            );
13453        }
13454
13455        // ----------------------------------------------------------------
13456        // ISA standard reference entries
13457        // ----------------------------------------------------------------
13458        // Emit flat ISA standard reference data (number, title, series) so
13459        // consumers get a machine-readable listing of all 34 ISA standards in
13460        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
13461        {
13462            use datasynth_standards::audit::isa_reference::IsaStandard;
13463            snapshot.isa_mappings = IsaStandard::standard_entries();
13464            debug!(
13465                "ISA standard entries generated: {} standards",
13466                snapshot.isa_mappings.len()
13467            );
13468        }
13469
13470        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
13471        // For each RPT, find the chronologically closest JE for the engagement's entity.
13472        {
13473            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13474                .engagements
13475                .iter()
13476                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13477                .collect();
13478
13479            for rpt in &mut snapshot.related_party_transactions {
13480                if rpt.journal_entry_id.is_some() {
13481                    continue; // already set
13482                }
13483                let entity = engagement_by_id
13484                    .get(&rpt.engagement_id.to_string())
13485                    .copied()
13486                    .unwrap_or("");
13487
13488                // Find closest JE by date in the entity's company
13489                let best_je = entries
13490                    .iter()
13491                    .filter(|je| je.header.company_code == entity)
13492                    .min_by_key(|je| {
13493                        (je.header.posting_date - rpt.transaction_date)
13494                            .num_days()
13495                            .abs()
13496                    });
13497
13498                if let Some(je) = best_je {
13499                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
13500                }
13501            }
13502
13503            let linked = snapshot
13504                .related_party_transactions
13505                .iter()
13506                .filter(|t| t.journal_entry_id.is_some())
13507                .count();
13508            debug!(
13509                "Linked {}/{} related party transactions to journal entries",
13510                linked,
13511                snapshot.related_party_transactions.len()
13512            );
13513        }
13514
13515        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
13516        // One opinion per engagement, derived from that engagement's findings,
13517        // going-concern assessment, and any component-auditor reports. Fills
13518        // `audit_opinions` + a flattened `key_audit_matters` for downstream
13519        // export.
13520        if !snapshot.engagements.is_empty() {
13521            use datasynth_generators::audit_opinion_generator::{
13522                AuditOpinionGenerator, AuditOpinionInput,
13523            };
13524
13525            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13526            let inputs: Vec<AuditOpinionInput> = snapshot
13527                .engagements
13528                .iter()
13529                .map(|eng| {
13530                    let findings = snapshot
13531                        .findings
13532                        .iter()
13533                        .filter(|f| f.engagement_id == eng.engagement_id)
13534                        .cloned()
13535                        .collect();
13536                    let going_concern = snapshot
13537                        .going_concern_assessments
13538                        .iter()
13539                        .find(|gc| gc.entity_code == eng.client_entity_id)
13540                        .cloned();
13541                    // ComponentAuditorReport doesn't carry an engagement id, but
13542                    // component scope is keyed by `entity_code`, so filter on that.
13543                    let component_reports = snapshot
13544                        .component_reports
13545                        .iter()
13546                        .filter(|r| r.entity_code == eng.client_entity_id)
13547                        .cloned()
13548                        .collect();
13549
13550                    AuditOpinionInput {
13551                        entity_code: eng.client_entity_id.clone(),
13552                        entity_name: eng.client_name.clone(),
13553                        engagement_id: eng.engagement_id,
13554                        period_end: eng.period_end_date,
13555                        findings,
13556                        going_concern,
13557                        component_reports,
13558                        is_us_listed: matches!(
13559                            eng.engagement_type,
13560                            datasynth_core::audit::EngagementType::IntegratedAudit
13561                                | datasynth_core::audit::EngagementType::Sox404
13562                        ),
13563                        auditor_name: "DataSynth Audit LLP".to_string(),
13564                        engagement_partner: "Engagement Partner".to_string(),
13565                    }
13566                })
13567                .collect();
13568
13569            let generated = opinion_gen.generate_batch(&inputs);
13570            for g in generated {
13571                snapshot.key_audit_matters.extend(g.key_audit_matters);
13572                snapshot.audit_opinions.push(g.opinion);
13573            }
13574            debug!(
13575                "Generated {} audit opinions with {} key audit matters",
13576                snapshot.audit_opinions.len(),
13577                snapshot.key_audit_matters.len()
13578            );
13579        }
13580
13581        Ok(snapshot)
13582    }
13583
13584    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13585    ///
13586    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13587    /// from the current orchestrator state, runs the FSM engine, and maps the
13588    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13589    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13590    fn generate_audit_data_with_fsm(
13591        &mut self,
13592        entries: &[JournalEntry],
13593    ) -> SynthResult<AuditSnapshot> {
13594        use datasynth_audit_fsm::{
13595            context::EngagementContext,
13596            engine::AuditFsmEngine,
13597            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13598        };
13599        use rand::SeedableRng;
13600        use rand_chacha::ChaCha8Rng;
13601
13602        info!("Audit FSM: generating audit data via FSM engine");
13603
13604        let fsm_config = self
13605            .config
13606            .audit
13607            .fsm
13608            .as_ref()
13609            .expect("FSM config must be present when FSM is enabled");
13610
13611        // 1. Load blueprint from config string.
13612        let bwp = match fsm_config.blueprint.as_str() {
13613            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13614            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13615            _ => {
13616                warn!(
13617                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13618                    fsm_config.blueprint
13619                );
13620                BlueprintWithPreconditions::load_builtin_fsa()
13621            }
13622        }
13623        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13624
13625        // 2. Load overlay from config string.
13626        let overlay = match fsm_config.overlay.as_str() {
13627            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13628            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13629            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13630            _ => {
13631                warn!(
13632                    "Unknown FSM overlay '{}', falling back to builtin:default",
13633                    fsm_config.overlay
13634                );
13635                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13636            }
13637        }
13638        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13639
13640        // 3. Build EngagementContext from orchestrator state.
13641        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13642            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13643        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13644
13645        // Determine the engagement entity early so we can filter JEs.
13646        let company = self.config.companies.first();
13647        let company_code = company
13648            .map(|c| c.code.clone())
13649            .unwrap_or_else(|| "UNKNOWN".to_string());
13650        let company_name = company
13651            .map(|c| c.name.clone())
13652            .unwrap_or_else(|| "Unknown Company".to_string());
13653        let currency = company
13654            .map(|c| c.currency.clone())
13655            .unwrap_or_else(|| "USD".to_string());
13656
13657        // Filter JEs to the engagement entity for single-company coherence.
13658        let entity_entries: Vec<_> = entries
13659            .iter()
13660            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13661            .cloned()
13662            .collect();
13663        let entries = &entity_entries; // Shadow the parameter for remaining usage
13664
13665        // Financial aggregates from journal entries.
13666        let total_revenue: rust_decimal::Decimal = entries
13667            .iter()
13668            .flat_map(|e| e.lines.iter())
13669            .filter(|l| l.account_code.starts_with('4'))
13670            .map(|l| l.credit_amount - l.debit_amount)
13671            .sum();
13672
13673        let total_assets: rust_decimal::Decimal = entries
13674            .iter()
13675            .flat_map(|e| e.lines.iter())
13676            .filter(|l| l.account_code.starts_with('1'))
13677            .map(|l| l.debit_amount - l.credit_amount)
13678            .sum();
13679
13680        let total_expenses: rust_decimal::Decimal = entries
13681            .iter()
13682            .flat_map(|e| e.lines.iter())
13683            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13684            .map(|l| l.debit_amount)
13685            .sum();
13686
13687        let equity: rust_decimal::Decimal = entries
13688            .iter()
13689            .flat_map(|e| e.lines.iter())
13690            .filter(|l| l.account_code.starts_with('3'))
13691            .map(|l| l.credit_amount - l.debit_amount)
13692            .sum();
13693
13694        let total_debt: rust_decimal::Decimal = entries
13695            .iter()
13696            .flat_map(|e| e.lines.iter())
13697            .filter(|l| l.account_code.starts_with('2'))
13698            .map(|l| l.credit_amount - l.debit_amount)
13699            .sum();
13700
13701        let pretax_income = total_revenue - total_expenses;
13702
13703        let cogs: rust_decimal::Decimal = entries
13704            .iter()
13705            .flat_map(|e| e.lines.iter())
13706            .filter(|l| l.account_code.starts_with('5'))
13707            .map(|l| l.debit_amount)
13708            .sum();
13709        let gross_profit = total_revenue - cogs;
13710
13711        let current_assets: rust_decimal::Decimal = entries
13712            .iter()
13713            .flat_map(|e| e.lines.iter())
13714            .filter(|l| {
13715                l.account_code.starts_with("10")
13716                    || l.account_code.starts_with("11")
13717                    || l.account_code.starts_with("12")
13718                    || l.account_code.starts_with("13")
13719            })
13720            .map(|l| l.debit_amount - l.credit_amount)
13721            .sum();
13722        let current_liabilities: rust_decimal::Decimal = entries
13723            .iter()
13724            .flat_map(|e| e.lines.iter())
13725            .filter(|l| {
13726                l.account_code.starts_with("20")
13727                    || l.account_code.starts_with("21")
13728                    || l.account_code.starts_with("22")
13729            })
13730            .map(|l| l.credit_amount - l.debit_amount)
13731            .sum();
13732        let working_capital = current_assets - current_liabilities;
13733
13734        let depreciation: rust_decimal::Decimal = entries
13735            .iter()
13736            .flat_map(|e| e.lines.iter())
13737            .filter(|l| l.account_code.starts_with("60"))
13738            .map(|l| l.debit_amount)
13739            .sum();
13740        let operating_cash_flow = pretax_income + depreciation;
13741
13742        // GL accounts for reference data.
13743        let accounts: Vec<String> = self
13744            .coa
13745            .as_ref()
13746            .map(|coa| {
13747                coa.get_postable_accounts()
13748                    .iter()
13749                    .map(|acc| acc.account_code().to_string())
13750                    .collect()
13751            })
13752            .unwrap_or_default();
13753
13754        // Team member IDs and display names from master data.
13755        let team_member_ids: Vec<String> = self
13756            .master_data
13757            .employees
13758            .iter()
13759            .take(8) // Cap team size
13760            .map(|e| e.employee_id.clone())
13761            .collect();
13762        let team_member_pairs: Vec<(String, String)> = self
13763            .master_data
13764            .employees
13765            .iter()
13766            .take(8)
13767            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13768            .collect();
13769
13770        let vendor_names: Vec<String> = self
13771            .master_data
13772            .vendors
13773            .iter()
13774            .map(|v| v.name.clone())
13775            .collect();
13776        let customer_names: Vec<String> = self
13777            .master_data
13778            .customers
13779            .iter()
13780            .map(|c| c.name.clone())
13781            .collect();
13782
13783        let entity_codes: Vec<String> = self
13784            .config
13785            .companies
13786            .iter()
13787            .map(|c| c.code.clone())
13788            .collect();
13789
13790        // Journal entry IDs for evidence tracing (sample up to 50).
13791        let journal_entry_ids: Vec<String> = entries
13792            .iter()
13793            .take(50)
13794            .map(|e| e.header.document_id.to_string())
13795            .collect();
13796
13797        // Account balances for risk weighting (aggregate debit - credit per account).
13798        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13799        for entry in entries {
13800            for line in &entry.lines {
13801                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13802                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13803                *account_balances
13804                    .entry(line.account_code.clone())
13805                    .or_insert(0.0) += debit_f64 - credit_f64;
13806            }
13807        }
13808
13809        // Internal control IDs and anomaly refs are populated by the
13810        // caller when available; here we default to empty because the
13811        // orchestrator state may not have generated controls/anomalies
13812        // yet at this point in the pipeline.
13813        let control_ids: Vec<String> = Vec::new();
13814        let anomaly_refs: Vec<String> = Vec::new();
13815
13816        let mut context = EngagementContext {
13817            company_code,
13818            company_name,
13819            fiscal_year: start_date.year(),
13820            currency,
13821            total_revenue,
13822            total_assets,
13823            engagement_start: start_date,
13824            report_date: period_end,
13825            pretax_income,
13826            equity,
13827            gross_profit,
13828            working_capital,
13829            operating_cash_flow,
13830            total_debt,
13831            team_member_ids,
13832            team_member_pairs,
13833            accounts,
13834            vendor_names,
13835            customer_names,
13836            journal_entry_ids,
13837            account_balances,
13838            control_ids,
13839            anomaly_refs,
13840            journal_entries: entries.to_vec(),
13841            is_us_listed: false,
13842            entity_codes,
13843            auditor_firm_name: "DataSynth Audit LLP".into(),
13844            accounting_framework: self
13845                .config
13846                .accounting_standards
13847                .framework
13848                .map(|f| match f {
13849                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13850                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13851                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13852                        "French GAAP"
13853                    }
13854                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13855                        "German GAAP"
13856                    }
13857                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13858                        "Dual Reporting"
13859                    }
13860                })
13861                .unwrap_or("IFRS")
13862                .into(),
13863        };
13864
13865        // 4. Create and run the FSM engine.
13866        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13867        let rng = ChaCha8Rng::seed_from_u64(seed);
13868        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13869
13870        let mut result = engine
13871            .run_engagement(&context)
13872            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13873
13874        info!(
13875            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13876             {} phases completed, duration {:.1}h",
13877            result.event_log.len(),
13878            result.artifacts.total_artifacts(),
13879            result.anomalies.len(),
13880            result.phases_completed.len(),
13881            result.total_duration_hours,
13882        );
13883
13884        // 4b. Populate financial data in the artifact bag for downstream consumers.
13885        let tb_entity = context.company_code.clone();
13886        let tb_fy = context.fiscal_year;
13887        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13888        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13889            entries,
13890            &tb_entity,
13891            tb_fy,
13892            self.coa.as_ref().map(|c| c.as_ref()),
13893        );
13894
13895        // 5. Map ArtifactBag fields to AuditSnapshot.
13896        let bag = result.artifacts;
13897        let mut snapshot = AuditSnapshot {
13898            engagements: bag.engagements,
13899            engagement_letters: bag.engagement_letters,
13900            materiality_calculations: bag.materiality_calculations,
13901            risk_assessments: bag.risk_assessments,
13902            combined_risk_assessments: bag.combined_risk_assessments,
13903            workpapers: bag.workpapers,
13904            evidence: bag.evidence,
13905            findings: bag.findings,
13906            judgments: bag.judgments,
13907            sampling_plans: bag.sampling_plans,
13908            sampled_items: bag.sampled_items,
13909            analytical_results: bag.analytical_results,
13910            going_concern_assessments: bag.going_concern_assessments,
13911            subsequent_events: bag.subsequent_events,
13912            audit_opinions: bag.audit_opinions,
13913            key_audit_matters: bag.key_audit_matters,
13914            procedure_steps: bag.procedure_steps,
13915            samples: bag.samples,
13916            confirmations: bag.confirmations,
13917            confirmation_responses: bag.confirmation_responses,
13918            // Store the event trail for downstream export.
13919            fsm_event_trail: Some(result.event_log),
13920            // Fields not produced by the FSM engine remain at their defaults.
13921            ..Default::default()
13922        };
13923
13924        // 6. Add static reference data (same as legacy path).
13925        {
13926            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13927            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13928        }
13929        {
13930            use datasynth_standards::audit::isa_reference::IsaStandard;
13931            snapshot.isa_mappings = IsaStandard::standard_entries();
13932        }
13933
13934        info!(
13935            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13936             {} risk assessments, {} findings, {} materiality calcs",
13937            snapshot.engagements.len(),
13938            snapshot.workpapers.len(),
13939            snapshot.evidence.len(),
13940            snapshot.risk_assessments.len(),
13941            snapshot.findings.len(),
13942            snapshot.materiality_calculations.len(),
13943        );
13944
13945        Ok(snapshot)
13946    }
13947
13948    /// Export journal entries as graph data for ML training and network reconstruction.
13949    ///
13950    /// Builds a transaction graph where:
13951    /// - Nodes are GL accounts
13952    /// - Edges are money flows from credit to debit accounts
13953    /// - Edge attributes include amount, date, business process, anomaly flags
13954    fn export_graphs(
13955        &mut self,
13956        entries: &[JournalEntry],
13957        _coa: &Arc<ChartOfAccounts>,
13958        stats: &mut EnhancedGenerationStatistics,
13959    ) -> SynthResult<GraphExportSnapshot> {
13960        let pb = self.create_progress_bar(100, "Exporting Graphs");
13961
13962        let mut snapshot = GraphExportSnapshot::default();
13963
13964        // Get output directory
13965        let output_dir = self
13966            .output_path
13967            .clone()
13968            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13969        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13970
13971        // Process each graph type configuration
13972        for graph_type in &self.config.graph_export.graph_types {
13973            if let Some(pb) = &pb {
13974                pb.inc(10);
13975            }
13976
13977            // Build transaction graph
13978            let graph_config = TransactionGraphConfig {
13979                include_vendors: false,
13980                include_customers: false,
13981                create_debit_credit_edges: true,
13982                include_document_nodes: graph_type.include_document_nodes,
13983                min_edge_weight: graph_type.min_edge_weight,
13984                aggregate_parallel_edges: graph_type.aggregate_edges,
13985                framework: None,
13986            };
13987
13988            let mut builder = TransactionGraphBuilder::new(graph_config);
13989            builder.add_journal_entries(entries);
13990            let graph = builder.build();
13991
13992            // Update stats
13993            stats.graph_node_count += graph.node_count();
13994            stats.graph_edge_count += graph.edge_count();
13995
13996            if let Some(pb) = &pb {
13997                pb.inc(40);
13998            }
13999
14000            // Export to each configured format
14001            for format in &self.config.graph_export.formats {
14002                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14003
14004                // Create output directory
14005                if let Err(e) = std::fs::create_dir_all(&format_dir) {
14006                    warn!("Failed to create graph output directory: {}", e);
14007                    continue;
14008                }
14009
14010                match format {
14011                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14012                        let pyg_config = PyGExportConfig {
14013                            common: datasynth_graph::CommonExportConfig {
14014                                export_node_features: true,
14015                                export_edge_features: true,
14016                                export_node_labels: true,
14017                                export_edge_labels: true,
14018                                export_masks: true,
14019                                train_ratio: self.config.graph_export.train_ratio,
14020                                val_ratio: self.config.graph_export.validation_ratio,
14021                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14022                            },
14023                            one_hot_categoricals: false,
14024                        };
14025
14026                        let exporter = PyGExporter::new(pyg_config);
14027                        match exporter.export(&graph, &format_dir) {
14028                            Ok(metadata) => {
14029                                snapshot.exports.insert(
14030                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
14031                                    GraphExportInfo {
14032                                        name: graph_type.name.clone(),
14033                                        format: "pytorch_geometric".to_string(),
14034                                        output_path: format_dir.clone(),
14035                                        node_count: metadata.num_nodes,
14036                                        edge_count: metadata.num_edges,
14037                                    },
14038                                );
14039                                snapshot.graph_count += 1;
14040                            }
14041                            Err(e) => {
14042                                warn!("Failed to export PyTorch Geometric graph: {}", e);
14043                            }
14044                        }
14045                    }
14046                    datasynth_config::schema::GraphExportFormat::Neo4j => {
14047                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14048
14049                        let neo4j_config = Neo4jExportConfig {
14050                            export_node_properties: true,
14051                            export_edge_properties: true,
14052                            export_features: true,
14053                            generate_cypher: true,
14054                            generate_admin_import: true,
14055                            database_name: "synth".to_string(),
14056                            cypher_batch_size: 1000,
14057                        };
14058
14059                        let exporter = Neo4jExporter::new(neo4j_config);
14060                        match exporter.export(&graph, &format_dir) {
14061                            Ok(metadata) => {
14062                                snapshot.exports.insert(
14063                                    format!("{}_{}", graph_type.name, "neo4j"),
14064                                    GraphExportInfo {
14065                                        name: graph_type.name.clone(),
14066                                        format: "neo4j".to_string(),
14067                                        output_path: format_dir.clone(),
14068                                        node_count: metadata.num_nodes,
14069                                        edge_count: metadata.num_edges,
14070                                    },
14071                                );
14072                                snapshot.graph_count += 1;
14073                            }
14074                            Err(e) => {
14075                                warn!("Failed to export Neo4j graph: {}", e);
14076                            }
14077                        }
14078                    }
14079                    datasynth_config::schema::GraphExportFormat::Dgl => {
14080                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14081
14082                        let dgl_config = DGLExportConfig {
14083                            common: datasynth_graph::CommonExportConfig {
14084                                export_node_features: true,
14085                                export_edge_features: true,
14086                                export_node_labels: true,
14087                                export_edge_labels: true,
14088                                export_masks: true,
14089                                train_ratio: self.config.graph_export.train_ratio,
14090                                val_ratio: self.config.graph_export.validation_ratio,
14091                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14092                            },
14093                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14094                            include_pickle_script: true, // DGL ecosystem standard helper
14095                        };
14096
14097                        let exporter = DGLExporter::new(dgl_config);
14098                        match exporter.export(&graph, &format_dir) {
14099                            Ok(metadata) => {
14100                                snapshot.exports.insert(
14101                                    format!("{}_{}", graph_type.name, "dgl"),
14102                                    GraphExportInfo {
14103                                        name: graph_type.name.clone(),
14104                                        format: "dgl".to_string(),
14105                                        output_path: format_dir.clone(),
14106                                        node_count: metadata.common.num_nodes,
14107                                        edge_count: metadata.common.num_edges,
14108                                    },
14109                                );
14110                                snapshot.graph_count += 1;
14111                            }
14112                            Err(e) => {
14113                                warn!("Failed to export DGL graph: {}", e);
14114                            }
14115                        }
14116                    }
14117                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14118                        use datasynth_graph::{
14119                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14120                        };
14121
14122                        let rustgraph_config = RustGraphExportConfig {
14123                            include_features: true,
14124                            include_temporal: true,
14125                            include_labels: true,
14126                            source_name: "datasynth".to_string(),
14127                            batch_id: None,
14128                            output_format: RustGraphOutputFormat::JsonLines,
14129                            export_node_properties: true,
14130                            export_edge_properties: true,
14131                            pretty_print: false,
14132                        };
14133
14134                        let exporter = RustGraphExporter::new(rustgraph_config);
14135                        match exporter.export(&graph, &format_dir) {
14136                            Ok(metadata) => {
14137                                snapshot.exports.insert(
14138                                    format!("{}_{}", graph_type.name, "rustgraph"),
14139                                    GraphExportInfo {
14140                                        name: graph_type.name.clone(),
14141                                        format: "rustgraph".to_string(),
14142                                        output_path: format_dir.clone(),
14143                                        node_count: metadata.num_nodes,
14144                                        edge_count: metadata.num_edges,
14145                                    },
14146                                );
14147                                snapshot.graph_count += 1;
14148                            }
14149                            Err(e) => {
14150                                warn!("Failed to export RustGraph: {}", e);
14151                            }
14152                        }
14153                    }
14154                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14155                        // Hypergraph export is handled separately in Phase 10b
14156                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14157                    }
14158                }
14159            }
14160
14161            if let Some(pb) = &pb {
14162                pb.inc(40);
14163            }
14164        }
14165
14166        stats.graph_export_count = snapshot.graph_count;
14167        snapshot.exported = snapshot.graph_count > 0;
14168
14169        if let Some(pb) = pb {
14170            pb.finish_with_message(format!(
14171                "Graphs exported: {} graphs ({} nodes, {} edges)",
14172                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14173            ));
14174        }
14175
14176        Ok(snapshot)
14177    }
14178
14179    /// Build additional graph types (banking, approval, entity) when relevant data
14180    /// is available. These run as a late phase because the data they need (banking
14181    /// snapshot, intercompany snapshot) is only generated after the main graph
14182    /// export phase.
14183    fn build_additional_graphs(
14184        &self,
14185        banking: &BankingSnapshot,
14186        intercompany: &IntercompanySnapshot,
14187        entries: &[JournalEntry],
14188        stats: &mut EnhancedGenerationStatistics,
14189    ) {
14190        let output_dir = self
14191            .output_path
14192            .clone()
14193            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14194        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14195
14196        // Banking graph: build when banking customers and transactions exist
14197        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14198            info!("Phase 10c: Building banking network graph");
14199            let config = BankingGraphConfig::default();
14200            let mut builder = BankingGraphBuilder::new(config);
14201            builder.add_customers(&banking.customers);
14202            builder.add_accounts(&banking.accounts, &banking.customers);
14203            builder.add_transactions(&banking.transactions);
14204            let graph = builder.build();
14205
14206            let node_count = graph.node_count();
14207            let edge_count = graph.edge_count();
14208            stats.graph_node_count += node_count;
14209            stats.graph_edge_count += edge_count;
14210
14211            // Export as PyG if configured
14212            for format in &self.config.graph_export.formats {
14213                if matches!(
14214                    format,
14215                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14216                ) {
14217                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14218                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14219                        warn!("Failed to create banking graph output dir: {}", e);
14220                        continue;
14221                    }
14222                    let pyg_config = PyGExportConfig::default();
14223                    let exporter = PyGExporter::new(pyg_config);
14224                    if let Err(e) = exporter.export(&graph, &format_dir) {
14225                        warn!("Failed to export banking graph as PyG: {}", e);
14226                    } else {
14227                        info!(
14228                            "Banking network graph exported: {} nodes, {} edges",
14229                            node_count, edge_count
14230                        );
14231                    }
14232                }
14233            }
14234        }
14235
14236        // Approval graph: build from journal entry approval workflows
14237        let approval_entries: Vec<_> = entries
14238            .iter()
14239            .filter(|je| je.header.approval_workflow.is_some())
14240            .collect();
14241
14242        if !approval_entries.is_empty() {
14243            info!(
14244                "Phase 10c: Building approval network graph ({} entries with approvals)",
14245                approval_entries.len()
14246            );
14247            let config = ApprovalGraphConfig::default();
14248            let mut builder = ApprovalGraphBuilder::new(config);
14249
14250            for je in &approval_entries {
14251                if let Some(ref wf) = je.header.approval_workflow {
14252                    for action in &wf.actions {
14253                        let record = datasynth_core::models::ApprovalRecord {
14254                            approval_id: format!(
14255                                "APR-{}-{}",
14256                                je.header.document_id, action.approval_level
14257                            ),
14258                            document_number: je.header.document_id.to_string(),
14259                            document_type: "JE".to_string(),
14260                            company_code: je.company_code().to_string(),
14261                            requester_id: wf.preparer_id.clone(),
14262                            requester_name: Some(wf.preparer_name.clone()),
14263                            approver_id: action.actor_id.clone(),
14264                            approver_name: action.actor_name.clone(),
14265                            approval_date: je.posting_date(),
14266                            action: format!("{:?}", action.action),
14267                            amount: wf.amount,
14268                            approval_limit: None,
14269                            comments: action.comments.clone(),
14270                            delegation_from: None,
14271                            is_auto_approved: false,
14272                        };
14273                        builder.add_approval(&record);
14274                    }
14275                }
14276            }
14277
14278            let graph = builder.build();
14279            let node_count = graph.node_count();
14280            let edge_count = graph.edge_count();
14281            stats.graph_node_count += node_count;
14282            stats.graph_edge_count += edge_count;
14283
14284            // Export as PyG if configured
14285            for format in &self.config.graph_export.formats {
14286                if matches!(
14287                    format,
14288                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14289                ) {
14290                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14291                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14292                        warn!("Failed to create approval graph output dir: {}", e);
14293                        continue;
14294                    }
14295                    let pyg_config = PyGExportConfig::default();
14296                    let exporter = PyGExporter::new(pyg_config);
14297                    if let Err(e) = exporter.export(&graph, &format_dir) {
14298                        warn!("Failed to export approval graph as PyG: {}", e);
14299                    } else {
14300                        info!(
14301                            "Approval network graph exported: {} nodes, {} edges",
14302                            node_count, edge_count
14303                        );
14304                    }
14305                }
14306            }
14307        }
14308
14309        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14310        if self.config.companies.len() >= 2 {
14311            info!(
14312                "Phase 10c: Building entity relationship graph ({} companies)",
14313                self.config.companies.len()
14314            );
14315
14316            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14317                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14318
14319            // Map CompanyConfig → Company objects
14320            let parent_code = &self.config.companies[0].code;
14321            let mut companies: Vec<datasynth_core::models::Company> =
14322                Vec::with_capacity(self.config.companies.len());
14323
14324            // First company is the parent
14325            let first = &self.config.companies[0];
14326            companies.push(datasynth_core::models::Company::parent(
14327                &first.code,
14328                &first.name,
14329                &first.country,
14330                &first.currency,
14331            ));
14332
14333            // Remaining companies are subsidiaries (100% owned by parent)
14334            for cc in self.config.companies.iter().skip(1) {
14335                companies.push(datasynth_core::models::Company::subsidiary(
14336                    &cc.code,
14337                    &cc.name,
14338                    &cc.country,
14339                    &cc.currency,
14340                    parent_code,
14341                    rust_decimal::Decimal::from(100),
14342                ));
14343            }
14344
14345            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14346            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14347                self.config
14348                    .companies
14349                    .iter()
14350                    .skip(1)
14351                    .enumerate()
14352                    .map(|(i, cc)| {
14353                        let mut rel =
14354                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14355                                format!("REL{:03}", i + 1),
14356                                parent_code.clone(),
14357                                cc.code.clone(),
14358                                rust_decimal::Decimal::from(100),
14359                                start_date,
14360                            );
14361                        rel.functional_currency = cc.currency.clone();
14362                        rel
14363                    })
14364                    .collect();
14365
14366            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14367            builder.add_companies(&companies);
14368            builder.add_ownership_relationships(&relationships);
14369
14370            // Thread IC matched-pair transaction edges into the entity graph
14371            for pair in &intercompany.matched_pairs {
14372                builder.add_intercompany_edge(
14373                    &pair.seller_company,
14374                    &pair.buyer_company,
14375                    pair.amount,
14376                    &format!("{:?}", pair.transaction_type),
14377                );
14378            }
14379
14380            let graph = builder.build();
14381            let node_count = graph.node_count();
14382            let edge_count = graph.edge_count();
14383            stats.graph_node_count += node_count;
14384            stats.graph_edge_count += edge_count;
14385
14386            // Export as PyG if configured
14387            for format in &self.config.graph_export.formats {
14388                if matches!(
14389                    format,
14390                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14391                ) {
14392                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14393                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14394                        warn!("Failed to create entity graph output dir: {}", e);
14395                        continue;
14396                    }
14397                    let pyg_config = PyGExportConfig::default();
14398                    let exporter = PyGExporter::new(pyg_config);
14399                    if let Err(e) = exporter.export(&graph, &format_dir) {
14400                        warn!("Failed to export entity graph as PyG: {}", e);
14401                    } else {
14402                        info!(
14403                            "Entity relationship graph exported: {} nodes, {} edges",
14404                            node_count, edge_count
14405                        );
14406                    }
14407                }
14408            }
14409        } else {
14410            debug!(
14411                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14412                self.config.companies.len()
14413            );
14414        }
14415    }
14416
14417    /// Export a multi-layer hypergraph for RustGraph integration.
14418    ///
14419    /// Builds a 3-layer hypergraph:
14420    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14421    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14422    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14423    #[allow(clippy::too_many_arguments)]
14424    fn export_hypergraph(
14425        &self,
14426        coa: &Arc<ChartOfAccounts>,
14427        entries: &[JournalEntry],
14428        document_flows: &DocumentFlowSnapshot,
14429        sourcing: &SourcingSnapshot,
14430        hr: &HrSnapshot,
14431        manufacturing: &ManufacturingSnapshot,
14432        banking: &BankingSnapshot,
14433        audit: &AuditSnapshot,
14434        financial_reporting: &FinancialReportingSnapshot,
14435        ocpm: &OcpmSnapshot,
14436        compliance: &ComplianceRegulationsSnapshot,
14437        stats: &mut EnhancedGenerationStatistics,
14438    ) -> SynthResult<HypergraphExportInfo> {
14439        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14440        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14441        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14442        use datasynth_graph::models::hypergraph::AggregationStrategy;
14443
14444        let hg_settings = &self.config.graph_export.hypergraph;
14445
14446        // Parse aggregation strategy from config string
14447        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14448            "truncate" => AggregationStrategy::Truncate,
14449            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14450            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14451            "importance_sample" => AggregationStrategy::ImportanceSample,
14452            _ => AggregationStrategy::PoolByCounterparty,
14453        };
14454
14455        let builder_config = HypergraphConfig {
14456            max_nodes: hg_settings.max_nodes,
14457            aggregation_strategy,
14458            include_coso: hg_settings.governance_layer.include_coso,
14459            include_controls: hg_settings.governance_layer.include_controls,
14460            include_sox: hg_settings.governance_layer.include_sox,
14461            include_vendors: hg_settings.governance_layer.include_vendors,
14462            include_customers: hg_settings.governance_layer.include_customers,
14463            include_employees: hg_settings.governance_layer.include_employees,
14464            include_p2p: hg_settings.process_layer.include_p2p,
14465            include_o2c: hg_settings.process_layer.include_o2c,
14466            include_s2c: hg_settings.process_layer.include_s2c,
14467            include_h2r: hg_settings.process_layer.include_h2r,
14468            include_mfg: hg_settings.process_layer.include_mfg,
14469            include_bank: hg_settings.process_layer.include_bank,
14470            include_audit: hg_settings.process_layer.include_audit,
14471            include_r2r: hg_settings.process_layer.include_r2r,
14472            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14473            docs_per_counterparty_threshold: hg_settings
14474                .process_layer
14475                .docs_per_counterparty_threshold,
14476            include_accounts: hg_settings.accounting_layer.include_accounts,
14477            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14478            include_cross_layer_edges: hg_settings.cross_layer.enabled,
14479            include_compliance: self.config.compliance_regulations.enabled,
14480            include_tax: true,
14481            include_treasury: true,
14482            include_esg: true,
14483            include_project: true,
14484            include_intercompany: true,
14485            include_temporal_events: true,
14486        };
14487
14488        let mut builder = HypergraphBuilder::new(builder_config);
14489
14490        // Layer 1: Governance & Controls
14491        builder.add_coso_framework();
14492
14493        // Add controls if available (generated during JE generation)
14494        // Controls are generated per-company; we use the standard set
14495        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14496            let controls = InternalControl::standard_controls();
14497            builder.add_controls(&controls);
14498        }
14499
14500        // Add master data
14501        builder.add_vendors(&self.master_data.vendors);
14502        builder.add_customers(&self.master_data.customers);
14503        builder.add_employees(&self.master_data.employees);
14504
14505        // Layer 2: Process Events (all process families)
14506        builder.add_p2p_documents(
14507            &document_flows.purchase_orders,
14508            &document_flows.goods_receipts,
14509            &document_flows.vendor_invoices,
14510            &document_flows.payments,
14511        );
14512        builder.add_o2c_documents(
14513            &document_flows.sales_orders,
14514            &document_flows.deliveries,
14515            &document_flows.customer_invoices,
14516        );
14517        builder.add_s2c_documents(
14518            &sourcing.sourcing_projects,
14519            &sourcing.qualifications,
14520            &sourcing.rfx_events,
14521            &sourcing.bids,
14522            &sourcing.bid_evaluations,
14523            &sourcing.contracts,
14524        );
14525        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14526        builder.add_mfg_documents(
14527            &manufacturing.production_orders,
14528            &manufacturing.quality_inspections,
14529            &manufacturing.cycle_counts,
14530        );
14531        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14532        builder.add_audit_documents(
14533            &audit.engagements,
14534            &audit.workpapers,
14535            &audit.findings,
14536            &audit.evidence,
14537            &audit.risk_assessments,
14538            &audit.judgments,
14539            &audit.materiality_calculations,
14540            &audit.audit_opinions,
14541            &audit.going_concern_assessments,
14542        );
14543        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14544
14545        // OCPM events as hyperedges
14546        if let Some(ref event_log) = ocpm.event_log {
14547            builder.add_ocpm_events(event_log);
14548        }
14549
14550        // Compliance regulations as cross-layer nodes
14551        if self.config.compliance_regulations.enabled
14552            && hg_settings.governance_layer.include_controls
14553        {
14554            // Reconstruct ComplianceStandard objects from the registry
14555            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14556            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14557                .standard_records
14558                .iter()
14559                .filter_map(|r| {
14560                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14561                    registry.get(&sid).cloned()
14562                })
14563                .collect();
14564
14565            builder.add_compliance_regulations(
14566                &standards,
14567                &compliance.findings,
14568                &compliance.filings,
14569            );
14570        }
14571
14572        // Layer 3: Accounting Network
14573        builder.add_accounts(coa);
14574        builder.add_journal_entries_as_hyperedges(entries);
14575
14576        // Build the hypergraph
14577        let hypergraph = builder.build();
14578
14579        // Export
14580        let output_dir = self
14581            .output_path
14582            .clone()
14583            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14584        let hg_dir = output_dir
14585            .join(&self.config.graph_export.output_subdirectory)
14586            .join(&hg_settings.output_subdirectory);
14587
14588        // Branch on output format
14589        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14590            "unified" => {
14591                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14592                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14593                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14594                })?;
14595                (
14596                    metadata.num_nodes,
14597                    metadata.num_edges,
14598                    metadata.num_hyperedges,
14599                )
14600            }
14601            _ => {
14602                // "native" or any unrecognized format → use existing exporter
14603                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14604                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14605                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14606                })?;
14607                (
14608                    metadata.num_nodes,
14609                    metadata.num_edges,
14610                    metadata.num_hyperedges,
14611                )
14612            }
14613        };
14614
14615        // Stream to RustGraph ingest endpoint if configured
14616        #[cfg(feature = "streaming")]
14617        if let Some(ref target_url) = hg_settings.stream_target {
14618            use crate::stream_client::{StreamClient, StreamConfig};
14619            use std::io::Write as _;
14620
14621            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14622            let stream_config = StreamConfig {
14623                target_url: target_url.clone(),
14624                batch_size: hg_settings.stream_batch_size,
14625                api_key,
14626                ..StreamConfig::default()
14627            };
14628
14629            match StreamClient::new(stream_config) {
14630                Ok(mut client) => {
14631                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14632                    match exporter.export_to_writer(&hypergraph, &mut client) {
14633                        Ok(_) => {
14634                            if let Err(e) = client.flush() {
14635                                warn!("Failed to flush stream client: {}", e);
14636                            } else {
14637                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14638                            }
14639                        }
14640                        Err(e) => {
14641                            warn!("Streaming export failed: {}", e);
14642                        }
14643                    }
14644                }
14645                Err(e) => {
14646                    warn!("Failed to create stream client: {}", e);
14647                }
14648            }
14649        }
14650
14651        // Update stats
14652        stats.graph_node_count += num_nodes;
14653        stats.graph_edge_count += num_edges;
14654        stats.graph_export_count += 1;
14655
14656        Ok(HypergraphExportInfo {
14657            node_count: num_nodes,
14658            edge_count: num_edges,
14659            hyperedge_count: num_hyperedges,
14660            output_path: hg_dir,
14661        })
14662    }
14663
14664    /// Generate banking KYC/AML data.
14665    ///
14666    /// Creates banking customers, accounts, and transactions with AML typology injection.
14667    /// Uses the BankingOrchestrator from synth-banking crate.
14668    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14669        let pb = self.create_progress_bar(100, "Generating Banking Data");
14670
14671        // Build the banking orchestrator from config
14672        let orchestrator = BankingOrchestratorBuilder::new()
14673            .config(self.config.banking.clone())
14674            .seed(self.seed + 9000)
14675            .country_pack(self.primary_pack().clone())
14676            .build();
14677
14678        if let Some(pb) = &pb {
14679            pb.inc(10);
14680        }
14681
14682        // Generate the banking data
14683        let result = orchestrator.generate();
14684
14685        if let Some(pb) = &pb {
14686            pb.inc(90);
14687            pb.finish_with_message(format!(
14688                "Banking: {} customers, {} transactions",
14689                result.customers.len(),
14690                result.transactions.len()
14691            ));
14692        }
14693
14694        // Cross-reference banking customers with core master data so that
14695        // banking customer names align with the enterprise customer list.
14696        // We rotate through core customers, overlaying their name and country
14697        // onto the generated banking customers where possible.
14698        let mut banking_customers = result.customers;
14699        let core_customers = &self.master_data.customers;
14700        if !core_customers.is_empty() {
14701            for (i, bc) in banking_customers.iter_mut().enumerate() {
14702                let core = &core_customers[i % core_customers.len()];
14703                bc.name = CustomerName::business(&core.name);
14704                bc.residence_country = core.country.clone();
14705                bc.enterprise_customer_id = Some(core.customer_id.clone());
14706            }
14707            debug!(
14708                "Cross-referenced {} banking customers with {} core customers",
14709                banking_customers.len(),
14710                core_customers.len()
14711            );
14712        }
14713
14714        Ok(BankingSnapshot {
14715            customers: banking_customers,
14716            accounts: result.accounts,
14717            transactions: result.transactions,
14718            transaction_labels: result.transaction_labels,
14719            customer_labels: result.customer_labels,
14720            account_labels: result.account_labels,
14721            relationship_labels: result.relationship_labels,
14722            narratives: result.narratives,
14723            suspicious_count: result.stats.suspicious_count,
14724            scenario_count: result.scenarios.len(),
14725        })
14726    }
14727
14728    /// Calculate total transactions to generate.
14729    fn calculate_total_transactions(&self) -> u64 {
14730        let months = self.config.global.period_months as f64;
14731        self.config
14732            .companies
14733            .iter()
14734            .map(|c| {
14735                let annual = c.annual_transaction_volume.count() as f64;
14736                let weighted = annual * c.volume_weight;
14737                (weighted * months / 12.0) as u64
14738            })
14739            .sum()
14740    }
14741
14742    /// Create a progress bar if progress display is enabled.
14743    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14744        if !self.phase_config.show_progress {
14745            return None;
14746        }
14747
14748        let pb = if let Some(mp) = &self.multi_progress {
14749            mp.add(ProgressBar::new(total))
14750        } else {
14751            ProgressBar::new(total)
14752        };
14753
14754        pb.set_style(
14755            ProgressStyle::default_bar()
14756                .template(&format!(
14757                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14758                ))
14759                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14760                .progress_chars("#>-"),
14761        );
14762
14763        Some(pb)
14764    }
14765
14766    /// Get the generated chart of accounts.
14767    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14768        self.coa.clone()
14769    }
14770
14771    /// Get the generated master data.
14772    pub fn get_master_data(&self) -> &MasterDataSnapshot {
14773        &self.master_data
14774    }
14775
14776    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
14777    fn phase_compliance_regulations(
14778        &mut self,
14779        _stats: &mut EnhancedGenerationStatistics,
14780    ) -> SynthResult<ComplianceRegulationsSnapshot> {
14781        if !self.phase_config.generate_compliance_regulations {
14782            return Ok(ComplianceRegulationsSnapshot::default());
14783        }
14784
14785        info!("Phase: Generating Compliance Regulations Data");
14786
14787        let cr_config = &self.config.compliance_regulations;
14788
14789        // Determine jurisdictions: from config or inferred from companies
14790        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14791            self.config
14792                .companies
14793                .iter()
14794                .map(|c| c.country.clone())
14795                .collect::<std::collections::HashSet<_>>()
14796                .into_iter()
14797                .collect()
14798        } else {
14799            cr_config.jurisdictions.clone()
14800        };
14801
14802        // Determine reference date
14803        let fallback_date =
14804            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14805        let reference_date = cr_config
14806            .reference_date
14807            .as_ref()
14808            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14809            .unwrap_or_else(|| {
14810                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14811                    .unwrap_or(fallback_date)
14812            });
14813
14814        // Generate standards registry data
14815        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14816        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14817        let cross_reference_records = reg_gen.generate_cross_reference_records();
14818        let jurisdiction_records =
14819            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14820
14821        info!(
14822            "  Standards: {} records, {} cross-references, {} jurisdictions",
14823            standard_records.len(),
14824            cross_reference_records.len(),
14825            jurisdiction_records.len()
14826        );
14827
14828        // Generate audit procedures (if enabled)
14829        let audit_procedures = if cr_config.audit_procedures.enabled {
14830            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14831                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14832                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14833                confidence_level: cr_config.audit_procedures.confidence_level,
14834                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14835            };
14836            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14837                self.seed + 9000,
14838                proc_config,
14839            );
14840            let registry = reg_gen.registry();
14841            let mut all_procs = Vec::new();
14842            for jurisdiction in &jurisdictions {
14843                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14844                all_procs.extend(procs);
14845            }
14846            info!("  Audit procedures: {}", all_procs.len());
14847            all_procs
14848        } else {
14849            Vec::new()
14850        };
14851
14852        // Generate compliance findings (if enabled)
14853        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14854            let finding_config =
14855                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14856                    finding_rate: cr_config.findings.finding_rate,
14857                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14858                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14859                    generate_remediation: cr_config.findings.generate_remediation,
14860                };
14861            let mut finding_gen =
14862                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14863                    self.seed + 9100,
14864                    finding_config,
14865                );
14866            let mut all_findings = Vec::new();
14867            for company in &self.config.companies {
14868                let company_findings =
14869                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14870                all_findings.extend(company_findings);
14871            }
14872            info!("  Compliance findings: {}", all_findings.len());
14873            all_findings
14874        } else {
14875            Vec::new()
14876        };
14877
14878        // Generate regulatory filings (if enabled)
14879        let filings = if cr_config.filings.enabled {
14880            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14881                filing_types: cr_config.filings.filing_types.clone(),
14882                generate_status_progression: cr_config.filings.generate_status_progression,
14883            };
14884            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14885                self.seed + 9200,
14886                filing_config,
14887            );
14888            let company_codes: Vec<String> = self
14889                .config
14890                .companies
14891                .iter()
14892                .map(|c| c.code.clone())
14893                .collect();
14894            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14895                .unwrap_or(fallback_date);
14896            let filings = filing_gen.generate_filings(
14897                &company_codes,
14898                &jurisdictions,
14899                start_date,
14900                self.config.global.period_months,
14901            );
14902            info!("  Regulatory filings: {}", filings.len());
14903            filings
14904        } else {
14905            Vec::new()
14906        };
14907
14908        // Build compliance graph (if enabled)
14909        let compliance_graph = if cr_config.graph.enabled {
14910            let graph_config = datasynth_graph::ComplianceGraphConfig {
14911                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14912                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14913                include_cross_references: cr_config.graph.include_cross_references,
14914                include_supersession_edges: cr_config.graph.include_supersession_edges,
14915                include_account_links: cr_config.graph.include_account_links,
14916                include_control_links: cr_config.graph.include_control_links,
14917                include_company_links: cr_config.graph.include_company_links,
14918            };
14919            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14920
14921            // Add standard nodes
14922            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14923                .iter()
14924                .map(|r| datasynth_graph::StandardNodeInput {
14925                    standard_id: r.standard_id.clone(),
14926                    title: r.title.clone(),
14927                    category: r.category.clone(),
14928                    domain: r.domain.clone(),
14929                    is_active: r.is_active,
14930                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14931                    applicable_account_types: r.applicable_account_types.clone(),
14932                    applicable_processes: r.applicable_processes.clone(),
14933                })
14934                .collect();
14935            builder.add_standards(&standard_inputs);
14936
14937            // Add jurisdiction nodes
14938            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14939                jurisdiction_records
14940                    .iter()
14941                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14942                        country_code: r.country_code.clone(),
14943                        country_name: r.country_name.clone(),
14944                        framework: r.accounting_framework.clone(),
14945                        standard_count: r.standard_count,
14946                        tax_rate: r.statutory_tax_rate,
14947                    })
14948                    .collect();
14949            builder.add_jurisdictions(&jurisdiction_inputs);
14950
14951            // Add cross-reference edges
14952            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14953                cross_reference_records
14954                    .iter()
14955                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14956                        from_standard: r.from_standard.clone(),
14957                        to_standard: r.to_standard.clone(),
14958                        relationship: r.relationship.clone(),
14959                        convergence_level: r.convergence_level,
14960                    })
14961                    .collect();
14962            builder.add_cross_references(&xref_inputs);
14963
14964            // Add jurisdiction→standard mappings
14965            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14966                .iter()
14967                .map(|r| datasynth_graph::JurisdictionMappingInput {
14968                    country_code: r.jurisdiction.clone(),
14969                    standard_id: r.standard_id.clone(),
14970                })
14971                .collect();
14972            builder.add_jurisdiction_mappings(&mapping_inputs);
14973
14974            // Add procedure nodes
14975            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14976                .iter()
14977                .map(|p| datasynth_graph::ProcedureNodeInput {
14978                    procedure_id: p.procedure_id.clone(),
14979                    standard_id: p.standard_id.clone(),
14980                    procedure_type: p.procedure_type.clone(),
14981                    sample_size: p.sample_size,
14982                    confidence_level: p.confidence_level,
14983                })
14984                .collect();
14985            builder.add_procedures(&proc_inputs);
14986
14987            // Add finding nodes
14988            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14989                .iter()
14990                .map(|f| datasynth_graph::FindingNodeInput {
14991                    finding_id: f.finding_id.to_string(),
14992                    standard_id: f
14993                        .related_standards
14994                        .first()
14995                        .map(|s| s.as_str().to_string())
14996                        .unwrap_or_default(),
14997                    severity: f.severity.to_string(),
14998                    deficiency_level: f.deficiency_level.to_string(),
14999                    severity_score: f.deficiency_level.severity_score(),
15000                    control_id: f.control_id.clone(),
15001                    affected_accounts: f.affected_accounts.clone(),
15002                })
15003                .collect();
15004            builder.add_findings(&finding_inputs);
15005
15006            // Cross-domain: link standards to accounts from chart of accounts
15007            if cr_config.graph.include_account_links {
15008                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15009                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15010                for std_record in &standard_records {
15011                    if let Some(std_obj) =
15012                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
15013                            &std_record.standard_id,
15014                        ))
15015                    {
15016                        for acct_type in &std_obj.applicable_account_types {
15017                            account_links.push(datasynth_graph::AccountLinkInput {
15018                                standard_id: std_record.standard_id.clone(),
15019                                account_code: acct_type.clone(),
15020                                account_name: acct_type.clone(),
15021                            });
15022                        }
15023                    }
15024                }
15025                builder.add_account_links(&account_links);
15026            }
15027
15028            // Cross-domain: link standards to internal controls
15029            if cr_config.graph.include_control_links {
15030                let mut control_links = Vec::new();
15031                // SOX/PCAOB standards link to all controls
15032                let sox_like_ids: Vec<String> = standard_records
15033                    .iter()
15034                    .filter(|r| {
15035                        r.standard_id.starts_with("SOX")
15036                            || r.standard_id.starts_with("PCAOB-AS-2201")
15037                    })
15038                    .map(|r| r.standard_id.clone())
15039                    .collect();
15040                // Get control IDs from config (C001-C060 standard controls)
15041                let control_ids = [
15042                    ("C001", "Cash Controls"),
15043                    ("C002", "Large Transaction Approval"),
15044                    ("C010", "PO Approval"),
15045                    ("C011", "Three-Way Match"),
15046                    ("C020", "Revenue Recognition"),
15047                    ("C021", "Credit Check"),
15048                    ("C030", "Manual JE Approval"),
15049                    ("C031", "Period Close Review"),
15050                    ("C032", "Account Reconciliation"),
15051                    ("C040", "Payroll Processing"),
15052                    ("C050", "Fixed Asset Capitalization"),
15053                    ("C060", "Intercompany Elimination"),
15054                ];
15055                for sox_id in &sox_like_ids {
15056                    for (ctrl_id, ctrl_name) in &control_ids {
15057                        control_links.push(datasynth_graph::ControlLinkInput {
15058                            standard_id: sox_id.clone(),
15059                            control_id: ctrl_id.to_string(),
15060                            control_name: ctrl_name.to_string(),
15061                        });
15062                    }
15063                }
15064                builder.add_control_links(&control_links);
15065            }
15066
15067            // Cross-domain: filing nodes with company links
15068            if cr_config.graph.include_company_links {
15069                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15070                    .iter()
15071                    .enumerate()
15072                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15073                        filing_id: format!("F{:04}", i + 1),
15074                        filing_type: f.filing_type.to_string(),
15075                        company_code: f.company_code.clone(),
15076                        jurisdiction: f.jurisdiction.clone(),
15077                        status: format!("{:?}", f.status),
15078                    })
15079                    .collect();
15080                builder.add_filings(&filing_inputs);
15081            }
15082
15083            let graph = builder.build();
15084            info!(
15085                "  Compliance graph: {} nodes, {} edges",
15086                graph.nodes.len(),
15087                graph.edges.len()
15088            );
15089            Some(graph)
15090        } else {
15091            None
15092        };
15093
15094        self.check_resources_with_log("post-compliance-regulations")?;
15095
15096        Ok(ComplianceRegulationsSnapshot {
15097            standard_records,
15098            cross_reference_records,
15099            jurisdiction_records,
15100            audit_procedures,
15101            findings,
15102            filings,
15103            compliance_graph,
15104        })
15105    }
15106
15107    /// Build a lineage graph describing config → phase → output relationships.
15108    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15109        use super::lineage::LineageGraphBuilder;
15110
15111        let mut builder = LineageGraphBuilder::new();
15112
15113        // Config sections
15114        builder.add_config_section("config:global", "Global Config");
15115        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15116        builder.add_config_section("config:transactions", "Transaction Config");
15117
15118        // Generator phases
15119        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15120        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15121
15122        // Config → phase edges
15123        builder.configured_by("phase:coa", "config:chart_of_accounts");
15124        builder.configured_by("phase:je", "config:transactions");
15125
15126        // Output files
15127        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15128        builder.produced_by("output:je", "phase:je");
15129
15130        // Optional phases based on config
15131        if self.phase_config.generate_master_data {
15132            builder.add_config_section("config:master_data", "Master Data Config");
15133            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15134            builder.configured_by("phase:master_data", "config:master_data");
15135            builder.input_to("phase:master_data", "phase:je");
15136        }
15137
15138        if self.phase_config.generate_document_flows {
15139            builder.add_config_section("config:document_flows", "Document Flow Config");
15140            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15141            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15142            builder.configured_by("phase:p2p", "config:document_flows");
15143            builder.configured_by("phase:o2c", "config:document_flows");
15144
15145            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15146            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15147            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15148            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15149            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15150
15151            builder.produced_by("output:po", "phase:p2p");
15152            builder.produced_by("output:gr", "phase:p2p");
15153            builder.produced_by("output:vi", "phase:p2p");
15154            builder.produced_by("output:so", "phase:o2c");
15155            builder.produced_by("output:ci", "phase:o2c");
15156        }
15157
15158        if self.phase_config.inject_anomalies {
15159            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15160            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15161            builder.configured_by("phase:anomaly", "config:fraud");
15162            builder.add_output_file(
15163                "output:labels",
15164                "Anomaly Labels",
15165                "labels/anomaly_labels.csv",
15166            );
15167            builder.produced_by("output:labels", "phase:anomaly");
15168        }
15169
15170        if self.phase_config.generate_audit {
15171            builder.add_config_section("config:audit", "Audit Config");
15172            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15173            builder.configured_by("phase:audit", "config:audit");
15174        }
15175
15176        if self.phase_config.generate_banking {
15177            builder.add_config_section("config:banking", "Banking Config");
15178            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15179            builder.configured_by("phase:banking", "config:banking");
15180        }
15181
15182        if self.config.llm.enabled {
15183            builder.add_config_section("config:llm", "LLM Enrichment Config");
15184            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15185            builder.configured_by("phase:llm_enrichment", "config:llm");
15186        }
15187
15188        if self.config.diffusion.enabled {
15189            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15190            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15191            builder.configured_by("phase:diffusion", "config:diffusion");
15192        }
15193
15194        if self.config.causal.enabled {
15195            builder.add_config_section("config:causal", "Causal Generation Config");
15196            builder.add_generator_phase("phase:causal", "Causal Overlay");
15197            builder.configured_by("phase:causal", "config:causal");
15198        }
15199
15200        builder.build()
15201    }
15202
15203    // -----------------------------------------------------------------------
15204    // Trial-balance helpers used to replace hardcoded proxy values
15205    // -----------------------------------------------------------------------
15206
15207    /// Compute total revenue for a company from its journal entries.
15208    ///
15209    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15210    /// net credits on all revenue-account lines filtered to `company_code`.
15211    fn compute_company_revenue(
15212        entries: &[JournalEntry],
15213        company_code: &str,
15214    ) -> rust_decimal::Decimal {
15215        use rust_decimal::Decimal;
15216        let mut revenue = Decimal::ZERO;
15217        for je in entries {
15218            if je.header.company_code != company_code {
15219                continue;
15220            }
15221            for line in &je.lines {
15222                if line.gl_account.starts_with('4') {
15223                    // Revenue is credit-normal
15224                    revenue += line.credit_amount - line.debit_amount;
15225                }
15226            }
15227        }
15228        revenue.max(Decimal::ZERO)
15229    }
15230
15231    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15232    ///
15233    /// Asset accounts start with "1"; liability accounts start with "2".
15234    fn compute_entity_net_assets(
15235        entries: &[JournalEntry],
15236        entity_code: &str,
15237    ) -> rust_decimal::Decimal {
15238        use rust_decimal::Decimal;
15239        let mut asset_net = Decimal::ZERO;
15240        let mut liability_net = Decimal::ZERO;
15241        for je in entries {
15242            if je.header.company_code != entity_code {
15243                continue;
15244            }
15245            for line in &je.lines {
15246                if line.gl_account.starts_with('1') {
15247                    asset_net += line.debit_amount - line.credit_amount;
15248                } else if line.gl_account.starts_with('2') {
15249                    liability_net += line.credit_amount - line.debit_amount;
15250                }
15251            }
15252        }
15253        asset_net - liability_net
15254    }
15255
15256    /// v3.5.1+: Run the statistical validation suite configured in
15257    /// `distributions.validation.tests` over the final amount
15258    /// distribution.  Collects every non-zero line-level amount (debit +
15259    /// credit) and hands it to the runners in
15260    /// `datasynth_core::distributions::validation`.
15261    ///
15262    /// Returns `Ok(None)` when validation is disabled (the default).
15263    /// When `reporting.fail_on_error = true` and any test fails, returns
15264    /// `Err` with a concise message; otherwise attaches the report to
15265    /// the result and lets callers inspect it.
15266    fn phase_statistical_validation(
15267        &self,
15268        entries: &[JournalEntry],
15269    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15270        use datasynth_config::schema::StatisticalTestConfig;
15271        use datasynth_core::distributions::{
15272            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15273            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15274        };
15275        use rust_decimal::prelude::ToPrimitive;
15276
15277        let cfg = &self.config.distributions.validation;
15278        if !cfg.enabled {
15279            return Ok(None);
15280        }
15281
15282        // Collect per-line positive amounts (debit + credit is zero on the
15283        // non-posting side, so this naturally picks the magnitude).
15284        let amounts: Vec<rust_decimal::Decimal> = entries
15285            .iter()
15286            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15287            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15288            .collect();
15289
15290        // v4.1.0+ paired (amount, line_count) per entry for correlation
15291        // checks. Amount per entry is the debit-side total (= credit-side
15292        // total for a balanced entry).
15293        let paired_amount_linecount: Vec<(f64, f64)> = entries
15294            .iter()
15295            .filter_map(|je| {
15296                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15297                if amt > rust_decimal::Decimal::ZERO {
15298                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15299                } else {
15300                    None
15301                }
15302            })
15303            .collect();
15304
15305        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15306        for test_cfg in &cfg.tests {
15307            match test_cfg {
15308                StatisticalTestConfig::BenfordFirstDigit {
15309                    threshold_mad,
15310                    warning_mad,
15311                } => {
15312                    results.push(run_benford_first_digit(
15313                        &amounts,
15314                        *threshold_mad,
15315                        *warning_mad,
15316                    ));
15317                }
15318                StatisticalTestConfig::ChiSquared { bins, significance } => {
15319                    results.push(run_chi_squared(&amounts, *bins, *significance));
15320                }
15321                StatisticalTestConfig::DistributionFit {
15322                    target: _,
15323                    ks_significance,
15324                    method: _,
15325                } => {
15326                    // v3.5.1+: log-uniformity KS check. Target-specific
15327                    // fits against Normal / Exponential land in v4.1.1+.
15328                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15329                }
15330                StatisticalTestConfig::AndersonDarling {
15331                    target: _,
15332                    significance,
15333                } => {
15334                    // v4.1.0+: A*² statistic against log-normal on the
15335                    // log-scale. Other targets follow the same pattern.
15336                    results.push(run_anderson_darling(&amounts, *significance));
15337                }
15338                StatisticalTestConfig::CorrelationCheck {
15339                    expected_correlations,
15340                } => {
15341                    // v4.1.0+: (amount, line_count) is tracked today.
15342                    // Other pairs resolve to Skipped pending richer
15343                    // per-entry attribute collection.
15344                    if expected_correlations.is_empty() {
15345                        results.push(StatisticalTestResult {
15346                            name: "correlation_check".to_string(),
15347                            outcome: TestOutcome::Skipped,
15348                            statistic: 0.0,
15349                            threshold: 0.0,
15350                            message: "no expected correlations declared".to_string(),
15351                        });
15352                    } else {
15353                        for ec in expected_correlations {
15354                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15355                            let is_amount_linecount = (ec.field1 == "amount"
15356                                && ec.field2 == "line_count")
15357                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15358                            if is_amount_linecount {
15359                                let xs: Vec<f64> =
15360                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15361                                let ys: Vec<f64> =
15362                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15363                                results.push(run_correlation_check(
15364                                    &pair_key,
15365                                    &xs,
15366                                    &ys,
15367                                    ec.expected_r,
15368                                    ec.tolerance,
15369                                ));
15370                            } else {
15371                                results.push(StatisticalTestResult {
15372                                    name: format!("correlation_check_{pair_key}"),
15373                                    outcome: TestOutcome::Skipped,
15374                                    statistic: 0.0,
15375                                    threshold: ec.tolerance,
15376                                    message: format!(
15377                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15378                                        ec.field1, ec.field2
15379                                    ),
15380                                });
15381                            }
15382                        }
15383                    }
15384                }
15385            }
15386        }
15387
15388        let report = StatisticalValidationReport {
15389            sample_count: amounts.len(),
15390            results,
15391        };
15392
15393        if cfg.reporting.fail_on_error && !report.all_passed() {
15394            let failed = report.failed_names().join(", ");
15395            return Err(SynthError::validation(format!(
15396                "statistical validation failed: {failed}"
15397            )));
15398        }
15399
15400        Ok(Some(report))
15401    }
15402
15403    /// v3.3.0: analytics-metadata phase.
15404    ///
15405    /// Runs AFTER all JE-adding phases (including Phase 20b's
15406    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15407    /// gated by an individual `analytics_metadata.<flag>` toggle:
15408    ///
15409    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15410    ///    current-period account balances.
15411    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15412    ///    configured `global.industry`.
15413    /// 3. `ManagementReportGenerator` — management-report artefacts.
15414    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15415    fn phase_analytics_metadata(
15416        &mut self,
15417        entries: &[JournalEntry],
15418    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15419        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15420        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15421        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15422        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15423        use std::collections::BTreeMap;
15424
15425        let mut snap = AnalyticsMetadataSnapshot::default();
15426
15427        if !self.phase_config.generate_analytics_metadata {
15428            return Ok(snap);
15429        }
15430
15431        let cfg = &self.config.analytics_metadata;
15432        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15433            .map(|d| d.year())
15434            .unwrap_or(2025);
15435
15436        // ---- 1. Prior-year comparatives ----
15437        if cfg.prior_year {
15438            let mut gen = PriorYearGenerator::new(self.seed + 9100);
15439            for company in &self.config.companies {
15440                // Aggregate current-period balances per account code +
15441                // account name from the entries slice.
15442                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15443                    BTreeMap::new();
15444                for je in entries {
15445                    if je.header.company_code != company.code {
15446                        continue;
15447                    }
15448                    for line in &je.lines {
15449                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15450                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15451                        });
15452                        entry.1 += line.debit_amount - line.credit_amount;
15453                    }
15454                }
15455                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15456                    .into_iter()
15457                    .filter(|(_, (_, bal))| !bal.is_zero())
15458                    .map(|(code, (name, bal))| (code, name, bal))
15459                    .collect();
15460                if !current.is_empty() {
15461                    let comparatives =
15462                        gen.generate_comparatives(&company.code, fiscal_year, &current);
15463                    snap.prior_year_comparatives.extend(comparatives);
15464                }
15465            }
15466            info!(
15467                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15468                snap.prior_year_comparatives.len(),
15469                self.config.companies.len()
15470            );
15471        }
15472
15473        // ---- 2. Industry benchmarks ----
15474        if cfg.industry_benchmark {
15475            use datasynth_core::models::IndustrySector;
15476            let industry = match self.config.global.industry {
15477                IndustrySector::Manufacturing => "manufacturing",
15478                IndustrySector::Retail => "retail",
15479                IndustrySector::FinancialServices => "financial_services",
15480                IndustrySector::Technology => "technology",
15481                IndustrySector::Healthcare => "healthcare",
15482                _ => "other",
15483            };
15484            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15485            let benchmarks = gen.generate(industry, fiscal_year);
15486            info!(
15487                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15488                benchmarks.len()
15489            );
15490            snap.industry_benchmarks = benchmarks;
15491        }
15492
15493        // ---- 3. Management reports ----
15494        if cfg.management_reports {
15495            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15496            let period_months = self.config.global.period_months;
15497            for company in &self.config.companies {
15498                let reports =
15499                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15500                snap.management_reports.extend(reports);
15501            }
15502            info!(
15503                "v3.3.0 analytics: {} management reports across {} companies",
15504                snap.management_reports.len(),
15505                self.config.companies.len()
15506            );
15507        }
15508
15509        // ---- 4. Drift-event labels ----
15510        if cfg.drift_events {
15511            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15512                .expect("hardcoded NaiveDate 2025-01-01 is valid");
15513            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15514                .unwrap_or(fallback_start);
15515            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15516            let mut gen = DriftEventGenerator::new(self.seed + 9400);
15517            let drifts = gen.generate_standalone_drifts(start_date, end_date);
15518            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15519            snap.drift_events = drifts;
15520        }
15521        // `entries` parameter reserved for future JE-aware drift detection
15522        let _ = entries;
15523
15524        Ok(snap)
15525    }
15526}
15527
15528/// Get the directory name for a graph export format.
15529fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15530    match format {
15531        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15532        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15533        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15534        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15535        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15536    }
15537}
15538
15539/// Aggregate journal entry lines into per-account trial balance rows.
15540///
15541/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
15542/// debit/credit totals and a net balance (debit minus credit).
15543fn compute_trial_balance_entries(
15544    entries: &[JournalEntry],
15545    entity_code: &str,
15546    fiscal_year: i32,
15547    coa: Option<&ChartOfAccounts>,
15548) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15549    use std::collections::BTreeMap;
15550
15551    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15552        BTreeMap::new();
15553
15554    for je in entries {
15555        for line in &je.lines {
15556            let entry = balances.entry(line.account_code.clone()).or_default();
15557            entry.0 += line.debit_amount;
15558            entry.1 += line.credit_amount;
15559        }
15560    }
15561
15562    balances
15563        .into_iter()
15564        .map(
15565            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15566                account_description: coa
15567                    .and_then(|c| c.get_account(&account_code))
15568                    .map(|a| a.description().to_string())
15569                    .unwrap_or_else(|| account_code.clone()),
15570                account_code,
15571                debit_balance: debit,
15572                credit_balance: credit,
15573                net_balance: debit - credit,
15574                entity_code: entity_code.to_string(),
15575                period: format!("FY{}", fiscal_year),
15576            },
15577        )
15578        .collect()
15579}
15580
15581#[cfg(test)]
15582#[allow(clippy::unwrap_used)]
15583mod tests {
15584    use super::*;
15585    use datasynth_config::schema::*;
15586
15587    fn create_test_config() -> GeneratorConfig {
15588        GeneratorConfig {
15589            global: GlobalConfig {
15590                industry: IndustrySector::Manufacturing,
15591                start_date: "2024-01-01".to_string(),
15592                period_months: 1,
15593                seed: Some(42),
15594                parallel: false,
15595                group_currency: "USD".to_string(),
15596                presentation_currency: None,
15597                worker_threads: 0,
15598                memory_limit_mb: 0,
15599                fiscal_year_months: None,
15600            },
15601            companies: vec![CompanyConfig {
15602                code: "1000".to_string(),
15603                name: "Test Company".to_string(),
15604                currency: "USD".to_string(),
15605                functional_currency: None,
15606                country: "US".to_string(),
15607                annual_transaction_volume: TransactionVolume::TenK,
15608                volume_weight: 1.0,
15609                fiscal_year_variant: "K4".to_string(),
15610            }],
15611            chart_of_accounts: ChartOfAccountsConfig {
15612                complexity: CoAComplexity::Small,
15613                industry_specific: true,
15614                custom_accounts: None,
15615                min_hierarchy_depth: 2,
15616                max_hierarchy_depth: 4,
15617                expand_industry_subaccounts: false,
15618            },
15619            transactions: TransactionConfig::default(),
15620            output: OutputConfig::default(),
15621            fraud: FraudConfig::default(),
15622            internal_controls: InternalControlsConfig::default(),
15623            business_processes: BusinessProcessConfig::default(),
15624            user_personas: UserPersonaConfig::default(),
15625            templates: TemplateConfig::default(),
15626            approval: ApprovalConfig::default(),
15627            departments: DepartmentConfig::default(),
15628            master_data: MasterDataConfig::default(),
15629            document_flows: DocumentFlowConfig::default(),
15630            intercompany: IntercompanyConfig::default(),
15631            balance: BalanceConfig::default(),
15632            ocpm: OcpmConfig::default(),
15633            audit: AuditGenerationConfig::default(),
15634            banking: datasynth_banking::BankingConfig::default(),
15635            data_quality: DataQualitySchemaConfig::default(),
15636            scenario: ScenarioConfig::default(),
15637            temporal: TemporalDriftConfig::default(),
15638            graph_export: GraphExportConfig::default(),
15639            streaming: StreamingSchemaConfig::default(),
15640            rate_limit: RateLimitSchemaConfig::default(),
15641            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15642            relationships: RelationshipSchemaConfig::default(),
15643            accounting_standards: AccountingStandardsConfig::default(),
15644            audit_standards: AuditStandardsConfig::default(),
15645            distributions: Default::default(),
15646            temporal_patterns: Default::default(),
15647            vendor_network: VendorNetworkSchemaConfig::default(),
15648            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15649            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15650            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15651            organizational_events: OrganizationalEventsSchemaConfig::default(),
15652            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15653            market_drift: MarketDriftSchemaConfig::default(),
15654            drift_labeling: DriftLabelingSchemaConfig::default(),
15655            anomaly_injection: Default::default(),
15656            industry_specific: Default::default(),
15657            fingerprint_privacy: Default::default(),
15658            quality_gates: Default::default(),
15659            compliance: Default::default(),
15660            webhooks: Default::default(),
15661            llm: Default::default(),
15662            diffusion: Default::default(),
15663            causal: Default::default(),
15664            source_to_pay: Default::default(),
15665            financial_reporting: Default::default(),
15666            hr: Default::default(),
15667            manufacturing: Default::default(),
15668            sales_quotes: Default::default(),
15669            tax: Default::default(),
15670            treasury: Default::default(),
15671            project_accounting: Default::default(),
15672            esg: Default::default(),
15673            country_packs: None,
15674            scenarios: Default::default(),
15675            session: Default::default(),
15676            compliance_regulations: Default::default(),
15677            analytics_metadata: Default::default(),
15678        }
15679    }
15680
15681    #[test]
15682    fn test_enhanced_orchestrator_creation() {
15683        let config = create_test_config();
15684        let orchestrator = EnhancedOrchestrator::with_defaults(config);
15685        assert!(orchestrator.is_ok());
15686    }
15687
15688    #[test]
15689    fn test_minimal_generation() {
15690        let config = create_test_config();
15691        let phase_config = PhaseConfig {
15692            generate_master_data: false,
15693            generate_document_flows: false,
15694            generate_journal_entries: true,
15695            inject_anomalies: false,
15696            show_progress: false,
15697            ..Default::default()
15698        };
15699
15700        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15701        let result = orchestrator.generate();
15702
15703        assert!(result.is_ok());
15704        let result = result.unwrap();
15705        assert!(!result.journal_entries.is_empty());
15706    }
15707
15708    #[test]
15709    fn test_master_data_generation() {
15710        let config = create_test_config();
15711        let phase_config = PhaseConfig {
15712            generate_master_data: true,
15713            generate_document_flows: false,
15714            generate_journal_entries: false,
15715            inject_anomalies: false,
15716            show_progress: false,
15717            vendors_per_company: 5,
15718            customers_per_company: 5,
15719            materials_per_company: 10,
15720            assets_per_company: 5,
15721            employees_per_company: 10,
15722            ..Default::default()
15723        };
15724
15725        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15726        let result = orchestrator.generate().unwrap();
15727
15728        assert!(!result.master_data.vendors.is_empty());
15729        assert!(!result.master_data.customers.is_empty());
15730        assert!(!result.master_data.materials.is_empty());
15731    }
15732
15733    #[test]
15734    fn test_document_flow_generation() {
15735        let config = create_test_config();
15736        let phase_config = PhaseConfig {
15737            generate_master_data: true,
15738            generate_document_flows: true,
15739            generate_journal_entries: false,
15740            inject_anomalies: false,
15741            inject_data_quality: false,
15742            validate_balances: false,
15743            validate_coa_coverage_strict: false,
15744            generate_ocpm_events: false,
15745            show_progress: false,
15746            vendors_per_company: 5,
15747            customers_per_company: 5,
15748            materials_per_company: 10,
15749            assets_per_company: 5,
15750            employees_per_company: 10,
15751            p2p_chains: 5,
15752            o2c_chains: 5,
15753            ..Default::default()
15754        };
15755
15756        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15757        let result = orchestrator.generate().unwrap();
15758
15759        // Should have generated P2P and O2C chains
15760        assert!(!result.document_flows.p2p_chains.is_empty());
15761        assert!(!result.document_flows.o2c_chains.is_empty());
15762
15763        // Flattened documents should be populated
15764        assert!(!result.document_flows.purchase_orders.is_empty());
15765        assert!(!result.document_flows.sales_orders.is_empty());
15766    }
15767
15768    #[test]
15769    fn test_anomaly_injection() {
15770        let config = create_test_config();
15771        let phase_config = PhaseConfig {
15772            generate_master_data: false,
15773            generate_document_flows: false,
15774            generate_journal_entries: true,
15775            inject_anomalies: true,
15776            show_progress: false,
15777            ..Default::default()
15778        };
15779
15780        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15781        let result = orchestrator.generate().unwrap();
15782
15783        // Should have journal entries
15784        assert!(!result.journal_entries.is_empty());
15785
15786        // With ~833 entries and 2% rate, expect some anomalies
15787        // Note: This is probabilistic, so we just verify the structure exists
15788        assert!(result.anomaly_labels.summary.is_some());
15789    }
15790
15791    #[test]
15792    fn test_full_generation_pipeline() {
15793        let config = create_test_config();
15794        let phase_config = PhaseConfig {
15795            generate_master_data: true,
15796            generate_document_flows: true,
15797            generate_journal_entries: true,
15798            inject_anomalies: false,
15799            inject_data_quality: false,
15800            validate_balances: true,
15801            validate_coa_coverage_strict: false,
15802            generate_ocpm_events: false,
15803            show_progress: false,
15804            vendors_per_company: 3,
15805            customers_per_company: 3,
15806            materials_per_company: 5,
15807            assets_per_company: 3,
15808            employees_per_company: 5,
15809            p2p_chains: 3,
15810            o2c_chains: 3,
15811            ..Default::default()
15812        };
15813
15814        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15815        let result = orchestrator.generate().unwrap();
15816
15817        // All phases should have results
15818        assert!(!result.master_data.vendors.is_empty());
15819        assert!(!result.master_data.customers.is_empty());
15820        assert!(!result.document_flows.p2p_chains.is_empty());
15821        assert!(!result.document_flows.o2c_chains.is_empty());
15822        assert!(!result.journal_entries.is_empty());
15823        assert!(result.statistics.accounts_count > 0);
15824
15825        // Subledger linking should have run
15826        assert!(!result.subledger.ap_invoices.is_empty());
15827        assert!(!result.subledger.ar_invoices.is_empty());
15828
15829        // Balance validation should have run
15830        assert!(result.balance_validation.validated);
15831        assert!(result.balance_validation.entries_processed > 0);
15832    }
15833
15834    #[test]
15835    fn test_subledger_linking() {
15836        let config = create_test_config();
15837        let phase_config = PhaseConfig {
15838            generate_master_data: true,
15839            generate_document_flows: true,
15840            generate_journal_entries: false,
15841            inject_anomalies: false,
15842            inject_data_quality: false,
15843            validate_balances: false,
15844            validate_coa_coverage_strict: false,
15845            generate_ocpm_events: false,
15846            show_progress: false,
15847            vendors_per_company: 5,
15848            customers_per_company: 5,
15849            materials_per_company: 10,
15850            assets_per_company: 3,
15851            employees_per_company: 5,
15852            p2p_chains: 5,
15853            o2c_chains: 5,
15854            ..Default::default()
15855        };
15856
15857        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15858        let result = orchestrator.generate().unwrap();
15859
15860        // Should have document flows
15861        assert!(!result.document_flows.vendor_invoices.is_empty());
15862        assert!(!result.document_flows.customer_invoices.is_empty());
15863
15864        // Subledger should be linked from document flows
15865        assert!(!result.subledger.ap_invoices.is_empty());
15866        assert!(!result.subledger.ar_invoices.is_empty());
15867
15868        // AP invoices count should match vendor invoices count
15869        assert_eq!(
15870            result.subledger.ap_invoices.len(),
15871            result.document_flows.vendor_invoices.len()
15872        );
15873
15874        // AR invoices count should match customer invoices count
15875        assert_eq!(
15876            result.subledger.ar_invoices.len(),
15877            result.document_flows.customer_invoices.len()
15878        );
15879
15880        // Statistics should reflect subledger counts
15881        assert_eq!(
15882            result.statistics.ap_invoice_count,
15883            result.subledger.ap_invoices.len()
15884        );
15885        assert_eq!(
15886            result.statistics.ar_invoice_count,
15887            result.subledger.ar_invoices.len()
15888        );
15889    }
15890
15891    #[test]
15892    fn test_balance_validation() {
15893        let config = create_test_config();
15894        let phase_config = PhaseConfig {
15895            generate_master_data: false,
15896            generate_document_flows: false,
15897            generate_journal_entries: true,
15898            inject_anomalies: false,
15899            validate_balances: true,
15900            validate_coa_coverage_strict: false,
15901            show_progress: false,
15902            ..Default::default()
15903        };
15904
15905        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15906        let result = orchestrator.generate().unwrap();
15907
15908        // Balance validation should run
15909        assert!(result.balance_validation.validated);
15910        assert!(result.balance_validation.entries_processed > 0);
15911
15912        // Generated JEs should be balanced (no unbalanced entries)
15913        assert!(!result.balance_validation.has_unbalanced_entries);
15914
15915        // Total debits should equal total credits
15916        assert_eq!(
15917            result.balance_validation.total_debits,
15918            result.balance_validation.total_credits
15919        );
15920    }
15921
15922    #[test]
15923    fn test_statistics_accuracy() {
15924        let config = create_test_config();
15925        let phase_config = PhaseConfig {
15926            generate_master_data: true,
15927            generate_document_flows: false,
15928            generate_journal_entries: true,
15929            inject_anomalies: false,
15930            show_progress: false,
15931            vendors_per_company: 10,
15932            customers_per_company: 20,
15933            materials_per_company: 15,
15934            assets_per_company: 5,
15935            employees_per_company: 8,
15936            ..Default::default()
15937        };
15938
15939        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15940        let result = orchestrator.generate().unwrap();
15941
15942        // Statistics should match actual data
15943        assert_eq!(
15944            result.statistics.vendor_count,
15945            result.master_data.vendors.len()
15946        );
15947        assert_eq!(
15948            result.statistics.customer_count,
15949            result.master_data.customers.len()
15950        );
15951        assert_eq!(
15952            result.statistics.material_count,
15953            result.master_data.materials.len()
15954        );
15955        assert_eq!(
15956            result.statistics.total_entries as usize,
15957            result.journal_entries.len()
15958        );
15959    }
15960
15961    #[test]
15962    fn test_phase_config_defaults() {
15963        let config = PhaseConfig::default();
15964        assert!(config.generate_master_data);
15965        assert!(config.generate_document_flows);
15966        assert!(config.generate_journal_entries);
15967        assert!(!config.inject_anomalies);
15968        assert!(config.validate_balances);
15969        assert!(config.show_progress);
15970        assert!(config.vendors_per_company > 0);
15971        assert!(config.customers_per_company > 0);
15972    }
15973
15974    #[test]
15975    fn test_get_coa_before_generation() {
15976        let config = create_test_config();
15977        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15978
15979        // Before generation, CoA should be None
15980        assert!(orchestrator.get_coa().is_none());
15981    }
15982
15983    #[test]
15984    fn test_get_coa_after_generation() {
15985        let config = create_test_config();
15986        let phase_config = PhaseConfig {
15987            generate_master_data: false,
15988            generate_document_flows: false,
15989            generate_journal_entries: true,
15990            inject_anomalies: false,
15991            show_progress: false,
15992            ..Default::default()
15993        };
15994
15995        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15996        let _ = orchestrator.generate().unwrap();
15997
15998        // After generation, CoA should be available
15999        assert!(orchestrator.get_coa().is_some());
16000    }
16001
16002    #[test]
16003    fn test_get_master_data() {
16004        let config = create_test_config();
16005        let phase_config = PhaseConfig {
16006            generate_master_data: true,
16007            generate_document_flows: false,
16008            generate_journal_entries: false,
16009            inject_anomalies: false,
16010            show_progress: false,
16011            vendors_per_company: 5,
16012            customers_per_company: 5,
16013            materials_per_company: 5,
16014            assets_per_company: 5,
16015            employees_per_company: 5,
16016            ..Default::default()
16017        };
16018
16019        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16020        let result = orchestrator.generate().unwrap();
16021
16022        // After generate(), master_data is moved into the result
16023        assert!(!result.master_data.vendors.is_empty());
16024    }
16025
16026    #[test]
16027    fn test_with_progress_builder() {
16028        let config = create_test_config();
16029        let orchestrator = EnhancedOrchestrator::with_defaults(config)
16030            .unwrap()
16031            .with_progress(false);
16032
16033        // Should still work without progress
16034        assert!(!orchestrator.phase_config.show_progress);
16035    }
16036
16037    #[test]
16038    fn test_multi_company_generation() {
16039        let mut config = create_test_config();
16040        config.companies.push(CompanyConfig {
16041            code: "2000".to_string(),
16042            name: "Subsidiary".to_string(),
16043            currency: "EUR".to_string(),
16044            functional_currency: None,
16045            country: "DE".to_string(),
16046            annual_transaction_volume: TransactionVolume::TenK,
16047            volume_weight: 0.5,
16048            fiscal_year_variant: "K4".to_string(),
16049        });
16050
16051        let phase_config = PhaseConfig {
16052            generate_master_data: true,
16053            generate_document_flows: false,
16054            generate_journal_entries: true,
16055            inject_anomalies: false,
16056            show_progress: false,
16057            vendors_per_company: 5,
16058            customers_per_company: 5,
16059            materials_per_company: 5,
16060            assets_per_company: 5,
16061            employees_per_company: 5,
16062            ..Default::default()
16063        };
16064
16065        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16066        let result = orchestrator.generate().unwrap();
16067
16068        // Should have master data for both companies
16069        assert!(result.statistics.vendor_count >= 10); // 5 per company
16070        assert!(result.statistics.customer_count >= 10);
16071        assert!(result.statistics.companies_count == 2);
16072    }
16073
16074    #[test]
16075    fn test_empty_master_data_skips_document_flows() {
16076        let config = create_test_config();
16077        let phase_config = PhaseConfig {
16078            generate_master_data: false,   // Skip master data
16079            generate_document_flows: true, // Try to generate flows
16080            generate_journal_entries: false,
16081            inject_anomalies: false,
16082            show_progress: false,
16083            ..Default::default()
16084        };
16085
16086        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16087        let result = orchestrator.generate().unwrap();
16088
16089        // Without master data, document flows should be empty
16090        assert!(result.document_flows.p2p_chains.is_empty());
16091        assert!(result.document_flows.o2c_chains.is_empty());
16092    }
16093
16094    #[test]
16095    fn test_journal_entry_line_item_count() {
16096        let config = create_test_config();
16097        let phase_config = PhaseConfig {
16098            generate_master_data: false,
16099            generate_document_flows: false,
16100            generate_journal_entries: true,
16101            inject_anomalies: false,
16102            show_progress: false,
16103            ..Default::default()
16104        };
16105
16106        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16107        let result = orchestrator.generate().unwrap();
16108
16109        // Total line items should match sum of all entry line counts
16110        let calculated_line_items: u64 = result
16111            .journal_entries
16112            .iter()
16113            .map(|e| e.line_count() as u64)
16114            .sum();
16115        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16116    }
16117
16118    #[test]
16119    fn test_audit_generation() {
16120        let config = create_test_config();
16121        let phase_config = PhaseConfig {
16122            generate_master_data: false,
16123            generate_document_flows: false,
16124            generate_journal_entries: true,
16125            inject_anomalies: false,
16126            show_progress: false,
16127            generate_audit: true,
16128            audit_engagements: 2,
16129            workpapers_per_engagement: 5,
16130            evidence_per_workpaper: 2,
16131            risks_per_engagement: 3,
16132            findings_per_engagement: 2,
16133            judgments_per_engagement: 2,
16134            ..Default::default()
16135        };
16136
16137        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16138        let result = orchestrator.generate().unwrap();
16139
16140        // Should have generated audit data
16141        assert_eq!(result.audit.engagements.len(), 2);
16142        assert!(!result.audit.workpapers.is_empty());
16143        assert!(!result.audit.evidence.is_empty());
16144        assert!(!result.audit.risk_assessments.is_empty());
16145        assert!(!result.audit.findings.is_empty());
16146        assert!(!result.audit.judgments.is_empty());
16147
16148        // New ISA entity collections should also be populated
16149        assert!(
16150            !result.audit.confirmations.is_empty(),
16151            "ISA 505 confirmations should be generated"
16152        );
16153        assert!(
16154            !result.audit.confirmation_responses.is_empty(),
16155            "ISA 505 confirmation responses should be generated"
16156        );
16157        assert!(
16158            !result.audit.procedure_steps.is_empty(),
16159            "ISA 330 procedure steps should be generated"
16160        );
16161        // Samples may or may not be generated depending on workpaper sampling methods
16162        assert!(
16163            !result.audit.analytical_results.is_empty(),
16164            "ISA 520 analytical procedures should be generated"
16165        );
16166        assert!(
16167            !result.audit.ia_functions.is_empty(),
16168            "ISA 610 IA functions should be generated (one per engagement)"
16169        );
16170        assert!(
16171            !result.audit.related_parties.is_empty(),
16172            "ISA 550 related parties should be generated"
16173        );
16174
16175        // Statistics should match
16176        assert_eq!(
16177            result.statistics.audit_engagement_count,
16178            result.audit.engagements.len()
16179        );
16180        assert_eq!(
16181            result.statistics.audit_workpaper_count,
16182            result.audit.workpapers.len()
16183        );
16184        assert_eq!(
16185            result.statistics.audit_evidence_count,
16186            result.audit.evidence.len()
16187        );
16188        assert_eq!(
16189            result.statistics.audit_risk_count,
16190            result.audit.risk_assessments.len()
16191        );
16192        assert_eq!(
16193            result.statistics.audit_finding_count,
16194            result.audit.findings.len()
16195        );
16196        assert_eq!(
16197            result.statistics.audit_judgment_count,
16198            result.audit.judgments.len()
16199        );
16200        assert_eq!(
16201            result.statistics.audit_confirmation_count,
16202            result.audit.confirmations.len()
16203        );
16204        assert_eq!(
16205            result.statistics.audit_confirmation_response_count,
16206            result.audit.confirmation_responses.len()
16207        );
16208        assert_eq!(
16209            result.statistics.audit_procedure_step_count,
16210            result.audit.procedure_steps.len()
16211        );
16212        assert_eq!(
16213            result.statistics.audit_sample_count,
16214            result.audit.samples.len()
16215        );
16216        assert_eq!(
16217            result.statistics.audit_analytical_result_count,
16218            result.audit.analytical_results.len()
16219        );
16220        assert_eq!(
16221            result.statistics.audit_ia_function_count,
16222            result.audit.ia_functions.len()
16223        );
16224        assert_eq!(
16225            result.statistics.audit_ia_report_count,
16226            result.audit.ia_reports.len()
16227        );
16228        assert_eq!(
16229            result.statistics.audit_related_party_count,
16230            result.audit.related_parties.len()
16231        );
16232        assert_eq!(
16233            result.statistics.audit_related_party_transaction_count,
16234            result.audit.related_party_transactions.len()
16235        );
16236    }
16237
16238    #[test]
16239    fn test_new_phases_disabled_by_default() {
16240        let config = create_test_config();
16241        // Verify new config fields default to disabled
16242        assert!(!config.llm.enabled);
16243        assert!(!config.diffusion.enabled);
16244        assert!(!config.causal.enabled);
16245
16246        let phase_config = PhaseConfig {
16247            generate_master_data: false,
16248            generate_document_flows: false,
16249            generate_journal_entries: true,
16250            inject_anomalies: false,
16251            show_progress: false,
16252            ..Default::default()
16253        };
16254
16255        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16256        let result = orchestrator.generate().unwrap();
16257
16258        // All new phase statistics should be zero when disabled
16259        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16260        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16261        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16262        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16263        assert_eq!(result.statistics.causal_generation_ms, 0);
16264        assert_eq!(result.statistics.causal_samples_generated, 0);
16265        assert!(result.statistics.causal_validation_passed.is_none());
16266        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16267        assert!(result.counterfactual_pairs.is_empty());
16268    }
16269
16270    #[test]
16271    fn test_counterfactual_generation_enabled() {
16272        let config = create_test_config();
16273        let phase_config = PhaseConfig {
16274            generate_master_data: false,
16275            generate_document_flows: false,
16276            generate_journal_entries: true,
16277            inject_anomalies: false,
16278            show_progress: false,
16279            generate_counterfactuals: true,
16280            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16281            ..Default::default()
16282        };
16283
16284        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16285        let result = orchestrator.generate().unwrap();
16286
16287        // With JE generation enabled, counterfactual pairs should be generated
16288        if !result.journal_entries.is_empty() {
16289            assert_eq!(
16290                result.counterfactual_pairs.len(),
16291                result.journal_entries.len()
16292            );
16293            assert_eq!(
16294                result.statistics.counterfactual_pair_count,
16295                result.journal_entries.len()
16296            );
16297            // Each pair should have a distinct pair_id
16298            let ids: std::collections::HashSet<_> = result
16299                .counterfactual_pairs
16300                .iter()
16301                .map(|p| p.pair_id.clone())
16302                .collect();
16303            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16304        }
16305    }
16306
16307    #[test]
16308    fn test_llm_enrichment_enabled() {
16309        let mut config = create_test_config();
16310        config.llm.enabled = true;
16311        config.llm.max_vendor_enrichments = 3;
16312
16313        let phase_config = PhaseConfig {
16314            generate_master_data: true,
16315            generate_document_flows: false,
16316            generate_journal_entries: false,
16317            inject_anomalies: false,
16318            show_progress: false,
16319            vendors_per_company: 5,
16320            customers_per_company: 3,
16321            materials_per_company: 3,
16322            assets_per_company: 3,
16323            employees_per_company: 3,
16324            ..Default::default()
16325        };
16326
16327        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16328        let result = orchestrator.generate().unwrap();
16329
16330        // LLM enrichment should have run
16331        assert!(result.statistics.llm_vendors_enriched > 0);
16332        assert!(result.statistics.llm_vendors_enriched <= 3);
16333    }
16334
16335    #[test]
16336    fn test_diffusion_enhancement_enabled() {
16337        let mut config = create_test_config();
16338        config.diffusion.enabled = true;
16339        config.diffusion.n_steps = 50;
16340        config.diffusion.sample_size = 20;
16341
16342        let phase_config = PhaseConfig {
16343            generate_master_data: false,
16344            generate_document_flows: false,
16345            generate_journal_entries: true,
16346            inject_anomalies: false,
16347            show_progress: false,
16348            ..Default::default()
16349        };
16350
16351        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16352        let result = orchestrator.generate().unwrap();
16353
16354        // Diffusion phase should have generated samples
16355        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16356    }
16357
16358    #[test]
16359    fn test_causal_overlay_enabled() {
16360        let mut config = create_test_config();
16361        config.causal.enabled = true;
16362        config.causal.template = "fraud_detection".to_string();
16363        config.causal.sample_size = 100;
16364        config.causal.validate = true;
16365
16366        let phase_config = PhaseConfig {
16367            generate_master_data: false,
16368            generate_document_flows: false,
16369            generate_journal_entries: true,
16370            inject_anomalies: false,
16371            show_progress: false,
16372            ..Default::default()
16373        };
16374
16375        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16376        let result = orchestrator.generate().unwrap();
16377
16378        // Causal phase should have generated samples
16379        assert_eq!(result.statistics.causal_samples_generated, 100);
16380        // Validation should have run
16381        assert!(result.statistics.causal_validation_passed.is_some());
16382    }
16383
16384    #[test]
16385    fn test_causal_overlay_revenue_cycle_template() {
16386        let mut config = create_test_config();
16387        config.causal.enabled = true;
16388        config.causal.template = "revenue_cycle".to_string();
16389        config.causal.sample_size = 50;
16390        config.causal.validate = false;
16391
16392        let phase_config = PhaseConfig {
16393            generate_master_data: false,
16394            generate_document_flows: false,
16395            generate_journal_entries: true,
16396            inject_anomalies: false,
16397            show_progress: false,
16398            ..Default::default()
16399        };
16400
16401        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16402        let result = orchestrator.generate().unwrap();
16403
16404        // Causal phase should have generated samples
16405        assert_eq!(result.statistics.causal_samples_generated, 50);
16406        // Validation was disabled
16407        assert!(result.statistics.causal_validation_passed.is_none());
16408    }
16409
16410    #[test]
16411    fn test_all_new_phases_enabled_together() {
16412        let mut config = create_test_config();
16413        config.llm.enabled = true;
16414        config.llm.max_vendor_enrichments = 2;
16415        config.diffusion.enabled = true;
16416        config.diffusion.n_steps = 20;
16417        config.diffusion.sample_size = 10;
16418        config.causal.enabled = true;
16419        config.causal.sample_size = 50;
16420        config.causal.validate = true;
16421
16422        let phase_config = PhaseConfig {
16423            generate_master_data: true,
16424            generate_document_flows: false,
16425            generate_journal_entries: true,
16426            inject_anomalies: false,
16427            show_progress: false,
16428            vendors_per_company: 5,
16429            customers_per_company: 3,
16430            materials_per_company: 3,
16431            assets_per_company: 3,
16432            employees_per_company: 3,
16433            ..Default::default()
16434        };
16435
16436        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16437        let result = orchestrator.generate().unwrap();
16438
16439        // All three phases should have run
16440        assert!(result.statistics.llm_vendors_enriched > 0);
16441        assert_eq!(result.statistics.diffusion_samples_generated, 10);
16442        assert_eq!(result.statistics.causal_samples_generated, 50);
16443        assert!(result.statistics.causal_validation_passed.is_some());
16444    }
16445
16446    #[test]
16447    fn test_statistics_serialization_with_new_fields() {
16448        let stats = EnhancedGenerationStatistics {
16449            total_entries: 100,
16450            total_line_items: 500,
16451            llm_enrichment_ms: 42,
16452            llm_vendors_enriched: 10,
16453            diffusion_enhancement_ms: 100,
16454            diffusion_samples_generated: 50,
16455            causal_generation_ms: 200,
16456            causal_samples_generated: 100,
16457            causal_validation_passed: Some(true),
16458            ..Default::default()
16459        };
16460
16461        let json = serde_json::to_string(&stats).unwrap();
16462        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16463
16464        assert_eq!(deserialized.llm_enrichment_ms, 42);
16465        assert_eq!(deserialized.llm_vendors_enriched, 10);
16466        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16467        assert_eq!(deserialized.diffusion_samples_generated, 50);
16468        assert_eq!(deserialized.causal_generation_ms, 200);
16469        assert_eq!(deserialized.causal_samples_generated, 100);
16470        assert_eq!(deserialized.causal_validation_passed, Some(true));
16471    }
16472
16473    #[test]
16474    fn test_statistics_backward_compat_deserialization() {
16475        // Old JSON without the new fields should still deserialize
16476        let old_json = r#"{
16477            "total_entries": 100,
16478            "total_line_items": 500,
16479            "accounts_count": 50,
16480            "companies_count": 1,
16481            "period_months": 12,
16482            "vendor_count": 10,
16483            "customer_count": 20,
16484            "material_count": 15,
16485            "asset_count": 5,
16486            "employee_count": 8,
16487            "p2p_chain_count": 5,
16488            "o2c_chain_count": 5,
16489            "ap_invoice_count": 5,
16490            "ar_invoice_count": 5,
16491            "ocpm_event_count": 0,
16492            "ocpm_object_count": 0,
16493            "ocpm_case_count": 0,
16494            "audit_engagement_count": 0,
16495            "audit_workpaper_count": 0,
16496            "audit_evidence_count": 0,
16497            "audit_risk_count": 0,
16498            "audit_finding_count": 0,
16499            "audit_judgment_count": 0,
16500            "anomalies_injected": 0,
16501            "data_quality_issues": 0,
16502            "banking_customer_count": 0,
16503            "banking_account_count": 0,
16504            "banking_transaction_count": 0,
16505            "banking_suspicious_count": 0,
16506            "graph_export_count": 0,
16507            "graph_node_count": 0,
16508            "graph_edge_count": 0
16509        }"#;
16510
16511        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16512
16513        // New fields should default to 0 / None
16514        assert_eq!(stats.llm_enrichment_ms, 0);
16515        assert_eq!(stats.llm_vendors_enriched, 0);
16516        assert_eq!(stats.diffusion_enhancement_ms, 0);
16517        assert_eq!(stats.diffusion_samples_generated, 0);
16518        assert_eq!(stats.causal_generation_ms, 0);
16519        assert_eq!(stats.causal_samples_generated, 0);
16520        assert!(stats.causal_validation_passed.is_none());
16521    }
16522}