Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    // ESG anomaly labels
102    EsgAnomalyLabel,
103    EvidenceGenerator,
104    // Subledger depreciation schedule generator
105    FaDepreciationScheduleConfig,
106    FaDepreciationScheduleGenerator,
107    // Financial statement generator
108    FinancialStatementGenerator,
109    FindingGenerator,
110    // Inventory valuation generator
111    InventoryValuationGenerator,
112    InventoryValuationGeneratorConfig,
113    JournalEntryGenerator,
114    JudgmentGenerator,
115    LatePaymentDistribution,
116    // Manufacturing cost accounting + warranty provisions
117    ManufacturingCostAccounting,
118    MaterialGenerator,
119    O2CDocumentChain,
120    O2CGenerator,
121    O2CGeneratorConfig,
122    O2CPaymentBehavior,
123    P2PDocumentChain,
124    // Document flow generators
125    P2PGenerator,
126    P2PGeneratorConfig,
127    P2PPaymentBehavior,
128    PaymentReference,
129    // Provisions and contingencies generator (IAS 37 / ASC 450)
130    ProvisionGenerator,
131    QualificationGenerator,
132    RfxGenerator,
133    RiskAssessmentGenerator,
134    // Balance validation
135    RunningBalanceTracker,
136    ScorecardGenerator,
137    // Segment reporting generator (IFRS 8 / ASC 280)
138    SegmentGenerator,
139    SegmentSeed,
140    SourcingProjectGenerator,
141    SpendAnalysisGenerator,
142    ValidationError,
143    // Master data generators
144    VendorGenerator,
145    WarrantyProvisionGenerator,
146    WorkpaperGenerator,
147};
148use datasynth_graph::{
149    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
150    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
151    TransactionGraphConfig,
152};
153use datasynth_ocpm::{
154    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
155    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
156    OcpmUuidFactory, P2pDocuments, S2cDocuments,
157};
158
159use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
160use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
161use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
162use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
163use datasynth_core::models::balance::{
164    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
165    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
166};
167use datasynth_core::models::documents::PaymentMethod;
168use datasynth_core::models::IndustrySector;
169use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
170use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
171use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
172use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
173use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
174use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
175use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
176use datasynth_generators::audit::sample_generator::SampleGenerator;
177use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
178use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
179use datasynth_generators::coa_generator::CoAFramework;
180use rayon::prelude::*;
181use rust_decimal::Decimal;
182
183// ============================================================================
184// Configuration Conversion Functions
185// ============================================================================
186
187/// Convert P2P flow config from schema to generator config.
188/// v4.4.1 — build a `DataQualityStats` with only `total_records`
189/// populated to `n_entries`. Used when the data-quality phase is
190/// skipped (by config or resource pressure) so downstream consumers
191/// can still see the denominator. Before v4.4.1 the writer emitted
192/// `total_records: 0` in those cases, which the SDK team flagged as
193/// indistinguishable from "ran but processed nothing".
194fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
195    #[allow(clippy::field_reassign_with_default)]
196    {
197        let mut s = DataQualityStats::default();
198        s.total_records = n_entries;
199        s.missing_values.total_records = n_entries;
200        s.format_variations.total_processed = n_entries;
201        s.duplicates.total_processed = n_entries;
202        s
203    }
204}
205
206fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
207    let payment_behavior = &schema_config.payment_behavior;
208    let late_dist = &payment_behavior.late_payment_days_distribution;
209
210    P2PGeneratorConfig {
211        three_way_match_rate: schema_config.three_way_match_rate,
212        partial_delivery_rate: schema_config.partial_delivery_rate,
213        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
214        price_variance_rate: schema_config.price_variance_rate,
215        max_price_variance_percent: schema_config.max_price_variance_percent,
216        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
217        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
218        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
219        payment_method_distribution: vec![
220            (PaymentMethod::BankTransfer, 0.60),
221            (PaymentMethod::Check, 0.25),
222            (PaymentMethod::Wire, 0.10),
223            (PaymentMethod::CreditCard, 0.05),
224        ],
225        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
226        payment_behavior: P2PPaymentBehavior {
227            late_payment_rate: payment_behavior.late_payment_rate,
228            late_payment_distribution: LatePaymentDistribution {
229                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
230                late_8_to_14: late_dist.late_8_to_14,
231                very_late_15_to_30: late_dist.very_late_15_to_30,
232                severely_late_31_to_60: late_dist.severely_late_31_to_60,
233                extremely_late_over_60: late_dist.extremely_late_over_60,
234            },
235            partial_payment_rate: payment_behavior.partial_payment_rate,
236            payment_correction_rate: payment_behavior.payment_correction_rate,
237            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
238        },
239    }
240}
241
242/// Convert O2C flow config from schema to generator config.
243fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
244    let payment_behavior = &schema_config.payment_behavior;
245
246    O2CGeneratorConfig {
247        credit_check_failure_rate: schema_config.credit_check_failure_rate,
248        partial_shipment_rate: schema_config.partial_shipment_rate,
249        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
250        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
251        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
252        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
253        bad_debt_rate: schema_config.bad_debt_rate,
254        returns_rate: schema_config.return_rate,
255        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
256        payment_method_distribution: vec![
257            (PaymentMethod::BankTransfer, 0.50),
258            (PaymentMethod::Check, 0.30),
259            (PaymentMethod::Wire, 0.15),
260            (PaymentMethod::CreditCard, 0.05),
261        ],
262        payment_behavior: O2CPaymentBehavior {
263            partial_payment_rate: payment_behavior.partial_payments.rate,
264            short_payment_rate: payment_behavior.short_payments.rate,
265            max_short_percent: payment_behavior.short_payments.max_short_percent,
266            on_account_rate: payment_behavior.on_account_payments.rate,
267            payment_correction_rate: payment_behavior.payment_corrections.rate,
268            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
269        },
270    }
271}
272
273/// Configuration for which generation phases to run.
274#[derive(Debug, Clone)]
275pub struct PhaseConfig {
276    /// Generate master data (vendors, customers, materials, assets, employees).
277    pub generate_master_data: bool,
278    /// Generate document flows (P2P, O2C).
279    pub generate_document_flows: bool,
280    /// Generate OCPM events from document flows.
281    pub generate_ocpm_events: bool,
282    /// Generate journal entries.
283    pub generate_journal_entries: bool,
284    /// Inject anomalies.
285    pub inject_anomalies: bool,
286    /// Inject data quality variations (typos, missing values, format variations).
287    pub inject_data_quality: bool,
288    /// Validate balance sheet equation after generation.
289    pub validate_balances: bool,
290    /// Validate that every `gl_account` referenced in generated JEs exists
291    /// in the chart of accounts. Off by default (a soft warning is emitted
292    /// instead). Set true to fail the run on any orphan account.
293    pub validate_coa_coverage_strict: bool,
294    /// Show progress bars.
295    pub show_progress: bool,
296    /// Number of vendors to generate per company.
297    pub vendors_per_company: usize,
298    /// Number of customers to generate per company.
299    pub customers_per_company: usize,
300    /// Number of materials to generate per company.
301    pub materials_per_company: usize,
302    /// Number of assets to generate per company.
303    pub assets_per_company: usize,
304    /// Number of employees to generate per company.
305    pub employees_per_company: usize,
306    /// Number of P2P chains to generate.
307    pub p2p_chains: usize,
308    /// Number of O2C chains to generate.
309    pub o2c_chains: usize,
310    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
311    pub generate_audit: bool,
312    /// Number of audit engagements to generate.
313    pub audit_engagements: usize,
314    /// Number of workpapers per engagement.
315    pub workpapers_per_engagement: usize,
316    /// Number of evidence items per workpaper.
317    pub evidence_per_workpaper: usize,
318    /// Number of risk assessments per engagement.
319    pub risks_per_engagement: usize,
320    /// Number of findings per engagement.
321    pub findings_per_engagement: usize,
322    /// Number of professional judgments per engagement.
323    pub judgments_per_engagement: usize,
324    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
325    pub generate_banking: bool,
326    /// Generate graph exports (accounting network for ML training).
327    pub generate_graph_export: bool,
328    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
329    pub generate_sourcing: bool,
330    /// Generate bank reconciliations from payments.
331    pub generate_bank_reconciliation: bool,
332    /// Generate financial statements from trial balances.
333    pub generate_financial_statements: bool,
334    /// Generate accounting standards data (revenue recognition, impairment).
335    pub generate_accounting_standards: bool,
336    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
337    pub generate_manufacturing: bool,
338    /// Generate sales quotes, management KPIs, and budgets.
339    pub generate_sales_kpi_budgets: bool,
340    /// Generate tax jurisdictions and tax codes.
341    pub generate_tax: bool,
342    /// Generate ESG data (emissions, energy, water, waste, social, governance).
343    pub generate_esg: bool,
344    /// Generate intercompany transactions and eliminations.
345    pub generate_intercompany: bool,
346    /// Generate process evolution and organizational events.
347    pub generate_evolution_events: bool,
348    /// Generate counterfactual (original, mutated) JE pairs for ML training.
349    pub generate_counterfactuals: bool,
350    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
351    pub generate_compliance_regulations: bool,
352    /// Generate period-close journal entries (tax provision, income statement close).
353    pub generate_period_close: bool,
354    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
355    pub generate_hr: bool,
356    /// Generate treasury data (cash management, hedging, debt, pooling).
357    pub generate_treasury: bool,
358    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
359    pub generate_project_accounting: bool,
360    /// v3.3.0: generate legal documents per engagement (engagement letters,
361    /// management rep letters, legal opinions, regulatory filings,
362    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
363    pub generate_legal_documents: bool,
364    /// v3.3.0: generate IT general controls (access logs, change
365    /// management records) per audit engagement. Gated by
366    /// `audit.it_controls.enabled`.
367    pub generate_it_controls: bool,
368    /// v3.3.0: run the analytics-metadata phase after all JE-adding
369    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
370    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
371    /// top-level `analytics_metadata.enabled` config flag.
372    pub generate_analytics_metadata: bool,
373}
374
375impl Default for PhaseConfig {
376    fn default() -> Self {
377        Self {
378            generate_master_data: true,
379            generate_document_flows: true,
380            generate_ocpm_events: false, // Off by default
381            generate_journal_entries: true,
382            inject_anomalies: false,
383            inject_data_quality: false, // Off by default (to preserve clean test data)
384            validate_balances: true,
385            validate_coa_coverage_strict: false,
386            show_progress: true,
387            vendors_per_company: 50,
388            customers_per_company: 100,
389            materials_per_company: 200,
390            assets_per_company: 50,
391            employees_per_company: 100,
392            p2p_chains: 100,
393            o2c_chains: 100,
394            generate_audit: false, // Off by default
395            audit_engagements: 5,
396            workpapers_per_engagement: 20,
397            evidence_per_workpaper: 5,
398            risks_per_engagement: 15,
399            findings_per_engagement: 8,
400            judgments_per_engagement: 10,
401            generate_banking: false,                // Off by default
402            generate_graph_export: false,           // Off by default
403            generate_sourcing: false,               // Off by default
404            generate_bank_reconciliation: false,    // Off by default
405            generate_financial_statements: false,   // Off by default
406            generate_accounting_standards: false,   // Off by default
407            generate_manufacturing: false,          // Off by default
408            generate_sales_kpi_budgets: false,      // Off by default
409            generate_tax: false,                    // Off by default
410            generate_esg: false,                    // Off by default
411            generate_intercompany: false,           // Off by default
412            generate_evolution_events: true,        // On by default
413            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
414            generate_compliance_regulations: false, // Off by default
415            generate_period_close: true,            // On by default
416            generate_hr: false,                     // Off by default
417            generate_treasury: false,               // Off by default
418            generate_project_accounting: false,     // Off by default
419            generate_legal_documents: false,        // v3.3.0 — off by default
420            generate_it_controls: false,            // v3.3.0 — off by default
421            generate_analytics_metadata: false,     // v3.3.0 — off by default
422        }
423    }
424}
425
426impl PhaseConfig {
427    /// Derive phase flags from [`GeneratorConfig`].
428    ///
429    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
430    /// CLI flags can override individual fields after calling this method.
431    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
432        Self {
433            // Always-on phases
434            generate_master_data: true,
435            generate_document_flows: true,
436            generate_journal_entries: true,
437            validate_balances: true,
438            validate_coa_coverage_strict: false,
439            generate_period_close: true,
440            generate_evolution_events: true,
441            show_progress: true,
442
443            // Feature-gated phases — derived from config sections
444            generate_audit: cfg.audit.enabled,
445            generate_banking: cfg.banking.enabled,
446            generate_graph_export: cfg.graph_export.enabled,
447            generate_sourcing: cfg.source_to_pay.enabled,
448            generate_intercompany: cfg.intercompany.enabled,
449            generate_financial_statements: cfg.financial_reporting.enabled,
450            generate_bank_reconciliation: cfg.financial_reporting.enabled,
451            generate_accounting_standards: cfg.accounting_standards.enabled,
452            generate_manufacturing: cfg.manufacturing.enabled,
453            generate_sales_kpi_budgets: cfg.sales_quotes.enabled,
454            generate_tax: cfg.tax.enabled,
455            generate_esg: cfg.esg.enabled,
456            generate_ocpm_events: cfg.ocpm.enabled,
457            generate_compliance_regulations: cfg.compliance_regulations.enabled,
458            generate_hr: cfg.hr.enabled,
459            generate_treasury: cfg.treasury.enabled,
460            generate_project_accounting: cfg.project_accounting.enabled,
461
462            // v3.3.0: L1 generator wiring
463            // Legal documents emitted when compliance_regulations is enabled
464            // and the nested legal_documents.enabled flag is set.
465            generate_legal_documents: cfg.compliance_regulations.enabled
466                && cfg.compliance_regulations.legal_documents.enabled,
467            // IT general controls emitted when audit is enabled and the
468            // nested it_controls.enabled flag is set.
469            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
470            // Analytics metadata phase (prior-year, industry benchmarks,
471            // management reports, drift events).
472            generate_analytics_metadata: cfg.analytics_metadata.enabled,
473
474            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
475            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
476
477            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
478            inject_data_quality: cfg.data_quality.enabled,
479
480            // Count defaults (CLI can override after calling this method)
481            vendors_per_company: 50,
482            customers_per_company: 100,
483            materials_per_company: 200,
484            assets_per_company: 50,
485            employees_per_company: 100,
486            p2p_chains: 100,
487            o2c_chains: 100,
488            audit_engagements: 5,
489            workpapers_per_engagement: 20,
490            evidence_per_workpaper: 5,
491            risks_per_engagement: 15,
492            findings_per_engagement: 8,
493            judgments_per_engagement: 10,
494        }
495    }
496}
497
498/// Master data snapshot containing all generated entities.
499#[derive(Debug, Clone, Default)]
500pub struct MasterDataSnapshot {
501    /// Generated vendors.
502    pub vendors: Vec<Vendor>,
503    /// Generated customers.
504    pub customers: Vec<Customer>,
505    /// Generated materials.
506    pub materials: Vec<Material>,
507    /// Generated fixed assets.
508    pub assets: Vec<FixedAsset>,
509    /// Generated employees.
510    pub employees: Vec<Employee>,
511    /// Generated cost center hierarchy (two-level: departments + sub-departments).
512    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
513    /// v5.1: Generated profit centre hierarchy (two-level: top-level
514    /// segment / region / product-group nodes + sub-units).  Emits to
515    /// SAP CEPC alongside `cost_centers` → CSKS.
516    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
517    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
518    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
519    /// v3.3.0+: organizational profiles (one per company) with
520    /// industry / geography / structure / complexity metadata. Emitted
521    /// alongside master data when `generate_master_data = true`.
522    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
523}
524
525/// Info about a completed hypergraph export.
526#[derive(Debug, Clone)]
527pub struct HypergraphExportInfo {
528    /// Number of nodes exported.
529    pub node_count: usize,
530    /// Number of pairwise edges exported.
531    pub edge_count: usize,
532    /// Number of hyperedges exported.
533    pub hyperedge_count: usize,
534    /// Output directory path.
535    pub output_path: PathBuf,
536}
537
538/// Document flow snapshot containing all generated document chains.
539#[derive(Debug, Clone, Default)]
540pub struct DocumentFlowSnapshot {
541    /// P2P document chains.
542    pub p2p_chains: Vec<P2PDocumentChain>,
543    /// O2C document chains.
544    pub o2c_chains: Vec<O2CDocumentChain>,
545    /// All purchase orders (flattened).
546    pub purchase_orders: Vec<documents::PurchaseOrder>,
547    /// All goods receipts (flattened).
548    pub goods_receipts: Vec<documents::GoodsReceipt>,
549    /// All vendor invoices (flattened).
550    pub vendor_invoices: Vec<documents::VendorInvoice>,
551    /// All sales orders (flattened).
552    pub sales_orders: Vec<documents::SalesOrder>,
553    /// All deliveries (flattened).
554    pub deliveries: Vec<documents::Delivery>,
555    /// All customer invoices (flattened).
556    pub customer_invoices: Vec<documents::CustomerInvoice>,
557    /// All payments (flattened).
558    pub payments: Vec<documents::Payment>,
559    /// Cross-document references collected from all document headers
560    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
561    pub document_references: Vec<documents::DocumentReference>,
562}
563
564/// Subledger snapshot containing generated subledger records.
565#[derive(Debug, Clone, Default)]
566pub struct SubledgerSnapshot {
567    /// AP invoices linked from document flow vendor invoices.
568    pub ap_invoices: Vec<APInvoice>,
569    /// AR invoices linked from document flow customer invoices.
570    pub ar_invoices: Vec<ARInvoice>,
571    /// FA subledger records (asset acquisitions from FA generator).
572    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
573    /// Inventory positions from inventory generator.
574    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
575    /// Inventory movements from inventory generator.
576    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
577    /// AR aging reports, one per company, computed after payment settlement.
578    pub ar_aging_reports: Vec<ARAgingReport>,
579    /// AP aging reports, one per company, computed after payment settlement.
580    pub ap_aging_reports: Vec<APAgingReport>,
581    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
582    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
583    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
584    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
585    /// Dunning runs executed after AR aging (one per company per dunning cycle).
586    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
587    /// Dunning letters generated across all dunning runs.
588    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
589}
590
591/// OCPM snapshot containing generated OCPM event log data.
592#[derive(Debug, Clone, Default)]
593pub struct OcpmSnapshot {
594    /// OCPM event log (if generated)
595    pub event_log: Option<OcpmEventLog>,
596    /// Number of events generated
597    pub event_count: usize,
598    /// Number of objects generated
599    pub object_count: usize,
600    /// Number of cases generated
601    pub case_count: usize,
602}
603
604/// Audit data snapshot containing all generated audit-related entities.
605#[derive(Debug, Clone, Default)]
606pub struct AuditSnapshot {
607    /// Audit engagements per ISA 210/220.
608    pub engagements: Vec<AuditEngagement>,
609    /// Workpapers per ISA 230.
610    pub workpapers: Vec<Workpaper>,
611    /// Audit evidence per ISA 500.
612    pub evidence: Vec<AuditEvidence>,
613    /// Risk assessments per ISA 315/330.
614    pub risk_assessments: Vec<RiskAssessment>,
615    /// Audit findings per ISA 265.
616    pub findings: Vec<AuditFinding>,
617    /// Professional judgments per ISA 200.
618    pub judgments: Vec<ProfessionalJudgment>,
619    /// External confirmations per ISA 505.
620    pub confirmations: Vec<ExternalConfirmation>,
621    /// Confirmation responses per ISA 505.
622    pub confirmation_responses: Vec<ConfirmationResponse>,
623    /// Audit procedure steps per ISA 330/530.
624    pub procedure_steps: Vec<AuditProcedureStep>,
625    /// Audit samples per ISA 530.
626    pub samples: Vec<AuditSample>,
627    /// Analytical procedure results per ISA 520.
628    pub analytical_results: Vec<AnalyticalProcedureResult>,
629    /// Internal audit functions per ISA 610.
630    pub ia_functions: Vec<InternalAuditFunction>,
631    /// Internal audit reports per ISA 610.
632    pub ia_reports: Vec<InternalAuditReport>,
633    /// Related parties per ISA 550.
634    pub related_parties: Vec<RelatedParty>,
635    /// Related party transactions per ISA 550.
636    pub related_party_transactions: Vec<RelatedPartyTransaction>,
637    // ---- ISA 600: Group Audits ----
638    /// Component auditors assigned by jurisdiction (ISA 600).
639    pub component_auditors: Vec<ComponentAuditor>,
640    /// Group audit plan with materiality allocations (ISA 600).
641    pub group_audit_plan: Option<GroupAuditPlan>,
642    /// Component instructions issued to component auditors (ISA 600).
643    pub component_instructions: Vec<ComponentInstruction>,
644    /// Reports received from component auditors (ISA 600).
645    pub component_reports: Vec<ComponentAuditorReport>,
646    // ---- ISA 210: Engagement Letters ----
647    /// Engagement letters per ISA 210.
648    pub engagement_letters: Vec<EngagementLetter>,
649    // ---- ISA 560 / IAS 10: Subsequent Events ----
650    /// Subsequent events per ISA 560 / IAS 10.
651    pub subsequent_events: Vec<SubsequentEvent>,
652    // ---- ISA 402: Service Organization Controls ----
653    /// Service organizations identified per ISA 402.
654    pub service_organizations: Vec<ServiceOrganization>,
655    /// SOC reports obtained per ISA 402.
656    pub soc_reports: Vec<SocReport>,
657    /// User entity controls documented per ISA 402.
658    pub user_entity_controls: Vec<UserEntityControl>,
659    // ---- ISA 570: Going Concern ----
660    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
661    pub going_concern_assessments:
662        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
663    // ---- ISA 540: Accounting Estimates ----
664    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
665    pub accounting_estimates:
666        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
667    // ---- ISA 700/701/705/706: Audit Opinions ----
668    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
669    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
670    /// Key Audit Matters per ISA 701 (flattened across all opinions).
671    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
672    // ---- SOX 302 / 404 ----
673    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
674    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
675    /// SOX Section 404 ICFR assessments (one per entity per year).
676    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
677    // ---- ISA 320: Materiality ----
678    /// Materiality calculations per entity per period (ISA 320).
679    pub materiality_calculations:
680        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
681    // ---- ISA 315: Combined Risk Assessments ----
682    /// Combined Risk Assessments per account area / assertion (ISA 315).
683    pub combined_risk_assessments:
684        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
685    // ---- ISA 530: Sampling Plans ----
686    /// Sampling plans per CRA at Moderate or higher (ISA 530).
687    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
688    /// Individual sampled items (key items + representative items) per ISA 530.
689    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
690    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
691    /// Significant classes of transactions per ISA 315 (one set per entity).
692    pub significant_transaction_classes:
693        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
694    // ---- ISA 520: Unusual Item Markers ----
695    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
696    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
697    // ---- ISA 520: Analytical Relationships ----
698    /// Analytical relationships (ratios, trends, correlations) per entity.
699    pub analytical_relationships:
700        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
701    // ---- PCAOB-ISA Cross-Reference ----
702    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
703    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
704    // ---- ISA Standard Reference ----
705    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
706    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
707    // ---- ISA 220 / ISA 300: Audit Scopes ----
708    /// Audit scope records (one per engagement) describing the audit boundary.
709    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
710    // ---- FSM Event Trail ----
711    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
712    /// Contains the ordered sequence of state-transition and procedure-step events
713    /// generated by the audit FSM engine.
714    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
715    // ---- v3.3.0: L1 generator wiring ----
716    /// Legal documents (engagement letters, management reps, legal
717    /// opinions, regulatory filings, board resolutions) per entity.
718    /// Emitted by `LegalDocumentGenerator` when
719    /// `compliance_regulations.legal_documents.enabled = true`.
720    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
721    /// IT general controls — access logs (login/privileged action
722    /// audit trail). Emitted by `ItControlsGenerator` when
723    /// `audit.it_controls.enabled = true`.
724    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
725    /// IT general controls — change management records (code deploys,
726    /// config changes, patches). Emitted by `ItControlsGenerator`.
727    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
728}
729
730/// Banking KYC/AML data snapshot containing all generated banking entities.
731#[derive(Debug, Clone, Default)]
732pub struct BankingSnapshot {
733    /// Banking customers (retail, business, trust).
734    pub customers: Vec<BankingCustomer>,
735    /// Bank accounts.
736    pub accounts: Vec<BankAccount>,
737    /// Bank transactions with AML labels.
738    pub transactions: Vec<BankTransaction>,
739    /// Transaction-level AML labels with features.
740    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
741    /// Customer-level AML labels.
742    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
743    /// Account-level AML labels.
744    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
745    /// Relationship-level AML labels.
746    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
747    /// Case narratives for AML scenarios.
748    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
749    /// Number of suspicious transactions.
750    pub suspicious_count: usize,
751    /// Number of AML scenarios generated.
752    pub scenario_count: usize,
753}
754
755/// Graph export snapshot containing exported graph metadata.
756#[derive(Debug, Clone, Default, Serialize)]
757pub struct GraphExportSnapshot {
758    /// Whether graph export was performed.
759    pub exported: bool,
760    /// Number of graphs exported.
761    pub graph_count: usize,
762    /// Exported graph metadata (by format name).
763    pub exports: HashMap<String, GraphExportInfo>,
764}
765
766/// Information about an exported graph.
767#[derive(Debug, Clone, Serialize)]
768pub struct GraphExportInfo {
769    /// Graph name.
770    pub name: String,
771    /// Export format (pytorch_geometric, neo4j, dgl).
772    pub format: String,
773    /// Output directory path.
774    pub output_path: PathBuf,
775    /// Number of nodes.
776    pub node_count: usize,
777    /// Number of edges.
778    pub edge_count: usize,
779}
780
781/// S2C sourcing data snapshot.
782#[derive(Debug, Clone, Default)]
783pub struct SourcingSnapshot {
784    /// Spend analyses.
785    pub spend_analyses: Vec<SpendAnalysis>,
786    /// Sourcing projects.
787    pub sourcing_projects: Vec<SourcingProject>,
788    /// Supplier qualifications.
789    pub qualifications: Vec<SupplierQualification>,
790    /// RFx events (RFI, RFP, RFQ).
791    pub rfx_events: Vec<RfxEvent>,
792    /// Supplier bids.
793    pub bids: Vec<SupplierBid>,
794    /// Bid evaluations.
795    pub bid_evaluations: Vec<BidEvaluation>,
796    /// Procurement contracts.
797    pub contracts: Vec<ProcurementContract>,
798    /// Catalog items.
799    pub catalog_items: Vec<CatalogItem>,
800    /// Supplier scorecards.
801    pub scorecards: Vec<SupplierScorecard>,
802}
803
804/// A single period's trial balance with metadata.
805///
806/// Used as the orchestrator's in-memory representation while it
807/// builds per-period FS / CF artefacts.  At write time the runtime
808/// converts each `PeriodTrialBalance` to the canonical
809/// [`datasynth_core::models::balance::TrialBalance`] shape via
810/// [`PeriodTrialBalance::into_canonical`] so the on-disk
811/// `period_close/trial_balances.json` matches what the group
812/// aggregate phase loads — see
813/// [`crate::output_writer::write_outputs`].
814#[derive(Debug, Clone, Serialize, Deserialize)]
815pub struct PeriodTrialBalance {
816    /// Fiscal year.
817    pub fiscal_year: u16,
818    /// Fiscal period (1-12).
819    pub fiscal_period: u8,
820    /// Period start date.
821    pub period_start: NaiveDate,
822    /// Period end date.
823    pub period_end: NaiveDate,
824    /// Trial balance entries for this period.
825    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
826}
827
828impl PeriodTrialBalance {
829    /// Convert this in-memory period TB into the canonical
830    /// [`datasynth_core::models::balance::TrialBalance`] shape used
831    /// for the on-disk artefact.
832    ///
833    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
834    /// aggregate's `tb_loader` consumes the canonical type directly,
835    /// dropping the v5.0 dual-shape detection that converted from
836    /// `PeriodTrialBalance` JSON on the fly.
837    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
838        let mut total_debits = Decimal::ZERO;
839        let mut total_credits = Decimal::ZERO;
840        let lines: Vec<TrialBalanceLine> = self
841            .entries
842            .into_iter()
843            .map(|e| {
844                total_debits += e.debit_balance;
845                total_credits += e.credit_balance;
846                let category = AccountCategory::from_account_code(&e.account_code);
847                TrialBalanceLine {
848                    account_code: e.account_code,
849                    account_description: e.account_name,
850                    category,
851                    account_type: AccountType::Asset,
852                    opening_balance: Decimal::ZERO,
853                    period_debits: e.debit_balance,
854                    period_credits: e.credit_balance,
855                    closing_balance: e.debit_balance - e.credit_balance,
856                    debit_balance: e.debit_balance,
857                    credit_balance: e.credit_balance,
858                    cost_center: None,
859                    profit_center: None,
860                }
861            })
862            .collect();
863        let imbalance = total_debits - total_credits;
864        let is_balanced = imbalance.abs() < Decimal::new(1, 2);
865        TrialBalance {
866            trial_balance_id: format!(
867                "{company_code}-{:04}{:02}",
868                self.fiscal_year, self.fiscal_period
869            ),
870            company_code: company_code.to_string(),
871            company_name: None,
872            as_of_date: self.period_end,
873            fiscal_year: self.fiscal_year as i32,
874            fiscal_period: self.fiscal_period as u32,
875            currency: currency.to_string(),
876            balance_type: TrialBalanceType::Adjusted,
877            lines,
878            total_debits,
879            total_credits,
880            is_balanced,
881            out_of_balance: imbalance,
882            is_equation_valid: is_balanced,
883            equation_difference: imbalance,
884            category_summary: std::collections::HashMap::new(),
885            created_at: self
886                .period_start
887                .and_hms_opt(0, 0, 0)
888                .expect("midnight is a valid time"),
889            created_by: "ORCHESTRATOR".to_string(),
890            approved_by: None,
891            approved_at: None,
892            status: TrialBalanceStatus::Final,
893        }
894    }
895}
896
897/// Financial reporting snapshot (financial statements + bank reconciliations).
898#[derive(Debug, Clone, Default)]
899pub struct FinancialReportingSnapshot {
900    /// Financial statements (balance sheet, income statement, cash flow).
901    /// For multi-entity configs this includes all standalone statements.
902    pub financial_statements: Vec<FinancialStatement>,
903    /// Standalone financial statements keyed by entity code.
904    /// Each entity has its own slice of statements.
905    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
906    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
907    pub consolidated_statements: Vec<FinancialStatement>,
908    /// Consolidation schedules (one per period) showing pre/post elimination detail.
909    pub consolidation_schedules: Vec<ConsolidationSchedule>,
910    /// Bank reconciliations.
911    pub bank_reconciliations: Vec<BankReconciliation>,
912    /// Period-close trial balances (one per period).
913    pub trial_balances: Vec<PeriodTrialBalance>,
914    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
915    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
916    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
917    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
918    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
919    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
920}
921
922/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
923#[derive(Debug, Clone, Default)]
924pub struct HrSnapshot {
925    /// Payroll runs (actual data).
926    pub payroll_runs: Vec<PayrollRun>,
927    /// Payroll line items (actual data).
928    pub payroll_line_items: Vec<PayrollLineItem>,
929    /// Time entries (actual data).
930    pub time_entries: Vec<TimeEntry>,
931    /// Expense reports (actual data).
932    pub expense_reports: Vec<ExpenseReport>,
933    /// Benefit enrollments (actual data).
934    pub benefit_enrollments: Vec<BenefitEnrollment>,
935    /// Defined benefit pension plans (IAS 19 / ASC 715).
936    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
937    /// Pension obligation (DBO) roll-forwards.
938    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
939    /// Plan asset roll-forwards.
940    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
941    /// Pension disclosures.
942    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
943    /// Journal entries generated from pension expense and OCI remeasurements.
944    pub pension_journal_entries: Vec<JournalEntry>,
945    /// Stock grants (ASC 718 / IFRS 2).
946    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
947    /// Stock-based compensation period expense records.
948    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
949    /// Journal entries generated from stock-based compensation expense.
950    pub stock_comp_journal_entries: Vec<JournalEntry>,
951    /// Payroll runs.
952    pub payroll_run_count: usize,
953    /// Payroll line item count.
954    pub payroll_line_item_count: usize,
955    /// Time entry count.
956    pub time_entry_count: usize,
957    /// Expense report count.
958    pub expense_report_count: usize,
959    /// Benefit enrollment count.
960    pub benefit_enrollment_count: usize,
961    /// Pension plan count.
962    pub pension_plan_count: usize,
963    /// Stock grant count.
964    pub stock_grant_count: usize,
965}
966
967/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
968#[derive(Debug, Clone, Default)]
969pub struct AccountingStandardsSnapshot {
970    /// Revenue recognition contracts (actual data).
971    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
972    /// Impairment tests (actual data).
973    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
974    /// Business combinations (IFRS 3 / ASC 805).
975    pub business_combinations:
976        Vec<datasynth_core::models::business_combination::BusinessCombination>,
977    /// Journal entries generated from business combinations (Day 1 + amortization).
978    pub business_combination_journal_entries: Vec<JournalEntry>,
979    /// ECL models (IFRS 9 / ASC 326).
980    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
981    /// ECL provision movements.
982    pub ecl_provision_movements:
983        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
984    /// Journal entries from ECL provision.
985    pub ecl_journal_entries: Vec<JournalEntry>,
986    /// Provisions (IAS 37 / ASC 450).
987    pub provisions: Vec<datasynth_core::models::provision::Provision>,
988    /// Provision movement roll-forwards (IAS 37 / ASC 450).
989    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
990    /// Contingent liabilities (IAS 37 / ASC 450).
991    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
992    /// Journal entries from provisions.
993    pub provision_journal_entries: Vec<JournalEntry>,
994    /// IAS 21 functional currency translation results (one per entity per period).
995    pub currency_translation_results:
996        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
997    /// Revenue recognition contract count.
998    pub revenue_contract_count: usize,
999    /// Impairment test count.
1000    pub impairment_test_count: usize,
1001    /// Business combination count.
1002    pub business_combination_count: usize,
1003    /// ECL model count.
1004    pub ecl_model_count: usize,
1005    /// Provision count.
1006    pub provision_count: usize,
1007    /// Currency translation result count (IAS 21).
1008    pub currency_translation_count: usize,
1009    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1010    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1011    /// ROU asset + lease liability details.
1012    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1013    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1014    pub fair_value_measurements:
1015        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1016    /// Framework difference records (dual-reporting only).
1017    pub framework_differences:
1018        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1019    /// Per-entity framework reconciliation (dual-reporting only).
1020    pub framework_reconciliations:
1021        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1022    /// Counts for stats logging.
1023    pub lease_count: usize,
1024    pub fair_value_measurement_count: usize,
1025    pub framework_difference_count: usize,
1026}
1027
1028/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1029#[derive(Debug, Clone, Default)]
1030pub struct ComplianceRegulationsSnapshot {
1031    /// Flattened standard records for output.
1032    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1033    /// Cross-reference records.
1034    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1035    /// Jurisdiction profile records.
1036    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1037    /// Generated audit procedures.
1038    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1039    /// Generated compliance findings.
1040    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1041    /// Generated regulatory filings.
1042    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1043    /// Compliance graph (if graph integration enabled).
1044    pub compliance_graph: Option<datasynth_graph::Graph>,
1045}
1046
1047/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1048#[derive(Debug, Clone, Default)]
1049pub struct ManufacturingSnapshot {
1050    /// Production orders (actual data).
1051    pub production_orders: Vec<ProductionOrder>,
1052    /// Quality inspections (actual data).
1053    pub quality_inspections: Vec<QualityInspection>,
1054    /// Cycle counts (actual data).
1055    pub cycle_counts: Vec<CycleCount>,
1056    /// BOM components (actual data).
1057    pub bom_components: Vec<BomComponent>,
1058    /// Inventory movements (actual data).
1059    pub inventory_movements: Vec<InventoryMovement>,
1060    /// Production order count.
1061    pub production_order_count: usize,
1062    /// Quality inspection count.
1063    pub quality_inspection_count: usize,
1064    /// Cycle count count.
1065    pub cycle_count_count: usize,
1066    /// BOM component count.
1067    pub bom_component_count: usize,
1068    /// Inventory movement count.
1069    pub inventory_movement_count: usize,
1070}
1071
1072/// Sales, KPI, and budget data snapshot.
1073#[derive(Debug, Clone, Default)]
1074pub struct SalesKpiBudgetsSnapshot {
1075    /// Sales quotes (actual data).
1076    pub sales_quotes: Vec<SalesQuote>,
1077    /// Management KPIs (actual data).
1078    pub kpis: Vec<ManagementKpi>,
1079    /// Budgets (actual data).
1080    pub budgets: Vec<Budget>,
1081    /// Sales quote count.
1082    pub sales_quote_count: usize,
1083    /// Management KPI count.
1084    pub kpi_count: usize,
1085    /// Budget line count.
1086    pub budget_line_count: usize,
1087}
1088
1089/// Anomaly labels generated during injection.
1090#[derive(Debug, Clone, Default)]
1091pub struct AnomalyLabels {
1092    /// All anomaly labels.
1093    pub labels: Vec<LabeledAnomaly>,
1094    /// Summary statistics.
1095    pub summary: Option<AnomalySummary>,
1096    /// Count by anomaly type.
1097    pub by_type: HashMap<String, usize>,
1098}
1099
1100/// Balance validation results from running balance tracker.
1101#[derive(Debug, Clone, Default)]
1102pub struct BalanceValidationResult {
1103    /// Whether validation was performed.
1104    pub validated: bool,
1105    /// Whether balance sheet equation is satisfied.
1106    pub is_balanced: bool,
1107    /// Number of entries processed.
1108    pub entries_processed: u64,
1109    /// Total debits across all entries.
1110    pub total_debits: rust_decimal::Decimal,
1111    /// Total credits across all entries.
1112    pub total_credits: rust_decimal::Decimal,
1113    /// Number of accounts tracked.
1114    pub accounts_tracked: usize,
1115    /// Number of companies tracked.
1116    pub companies_tracked: usize,
1117    /// Validation errors encountered.
1118    pub validation_errors: Vec<ValidationError>,
1119    /// Whether any unbalanced entries were found.
1120    pub has_unbalanced_entries: bool,
1121}
1122
1123/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1124#[derive(Debug, Clone, Default)]
1125pub struct TaxSnapshot {
1126    /// Tax jurisdictions.
1127    pub jurisdictions: Vec<TaxJurisdiction>,
1128    /// Tax codes.
1129    pub codes: Vec<TaxCode>,
1130    /// Tax lines computed on documents.
1131    pub tax_lines: Vec<TaxLine>,
1132    /// Tax returns filed per period.
1133    pub tax_returns: Vec<TaxReturn>,
1134    /// Tax provisions.
1135    pub tax_provisions: Vec<TaxProvision>,
1136    /// Withholding tax records.
1137    pub withholding_records: Vec<WithholdingTaxRecord>,
1138    /// Tax anomaly labels.
1139    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1140    /// Jurisdiction count.
1141    pub jurisdiction_count: usize,
1142    /// Code count.
1143    pub code_count: usize,
1144    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1145    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1146    /// Journal entries posting tax payable/receivable from computed tax lines.
1147    pub tax_posting_journal_entries: Vec<JournalEntry>,
1148}
1149
1150/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1151#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1152pub struct IntercompanySnapshot {
1153    /// Group ownership structure (parent/subsidiary/associate relationships).
1154    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1155    /// IC matched pairs (transaction pairs between related entities).
1156    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1157    /// IC journal entries generated from matched pairs (seller side).
1158    pub seller_journal_entries: Vec<JournalEntry>,
1159    /// IC journal entries generated from matched pairs (buyer side).
1160    pub buyer_journal_entries: Vec<JournalEntry>,
1161    /// Elimination entries for consolidation.
1162    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1163    /// NCI measurements derived from group structure ownership percentages.
1164    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1165    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1166    #[serde(skip)]
1167    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1168    /// IC matched pair count.
1169    pub matched_pair_count: usize,
1170    /// IC elimination entry count.
1171    pub elimination_entry_count: usize,
1172    /// IC matching rate (0.0 to 1.0).
1173    pub match_rate: f64,
1174}
1175
1176/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1177#[derive(Debug, Clone, Default)]
1178pub struct EsgSnapshot {
1179    /// Emission records (scope 1, 2, 3).
1180    pub emissions: Vec<EmissionRecord>,
1181    /// Energy consumption records.
1182    pub energy: Vec<EnergyConsumption>,
1183    /// Water usage records.
1184    pub water: Vec<WaterUsage>,
1185    /// Waste records.
1186    pub waste: Vec<WasteRecord>,
1187    /// Workforce diversity metrics.
1188    pub diversity: Vec<WorkforceDiversityMetric>,
1189    /// Pay equity metrics.
1190    pub pay_equity: Vec<PayEquityMetric>,
1191    /// Safety incidents.
1192    pub safety_incidents: Vec<SafetyIncident>,
1193    /// Safety metrics.
1194    pub safety_metrics: Vec<SafetyMetric>,
1195    /// Governance metrics.
1196    pub governance: Vec<GovernanceMetric>,
1197    /// Supplier ESG assessments.
1198    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1199    /// Materiality assessments.
1200    pub materiality: Vec<MaterialityAssessment>,
1201    /// ESG disclosures.
1202    pub disclosures: Vec<EsgDisclosure>,
1203    /// Climate scenarios.
1204    pub climate_scenarios: Vec<ClimateScenario>,
1205    /// ESG anomaly labels.
1206    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1207    /// Total emission record count.
1208    pub emission_count: usize,
1209    /// Total disclosure count.
1210    pub disclosure_count: usize,
1211}
1212
1213/// Treasury data snapshot (cash management, hedging, debt, pooling).
1214#[derive(Debug, Clone, Default)]
1215pub struct TreasurySnapshot {
1216    /// Cash positions (daily balances per account).
1217    pub cash_positions: Vec<CashPosition>,
1218    /// Cash forecasts.
1219    pub cash_forecasts: Vec<CashForecast>,
1220    /// Cash pools.
1221    pub cash_pools: Vec<CashPool>,
1222    /// Cash pool sweep transactions.
1223    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1224    /// Hedging instruments.
1225    pub hedging_instruments: Vec<HedgingInstrument>,
1226    /// Hedge relationships (ASC 815/IFRS 9 designations).
1227    pub hedge_relationships: Vec<HedgeRelationship>,
1228    /// Debt instruments.
1229    pub debt_instruments: Vec<DebtInstrument>,
1230    /// Bank guarantees and letters of credit.
1231    pub bank_guarantees: Vec<BankGuarantee>,
1232    /// Intercompany netting runs.
1233    pub netting_runs: Vec<NettingRun>,
1234    /// Treasury anomaly labels.
1235    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1236    /// Journal entries generated from treasury instruments (debt interest accruals,
1237    /// hedge MTM, cash pool sweeps).
1238    pub journal_entries: Vec<JournalEntry>,
1239}
1240
1241/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1242#[derive(Debug, Clone, Default)]
1243pub struct ProjectAccountingSnapshot {
1244    /// Projects with WBS hierarchies.
1245    pub projects: Vec<Project>,
1246    /// Project cost lines (linked from source documents).
1247    pub cost_lines: Vec<ProjectCostLine>,
1248    /// Revenue recognition records.
1249    pub revenue_records: Vec<ProjectRevenue>,
1250    /// Earned value metrics.
1251    pub earned_value_metrics: Vec<EarnedValueMetric>,
1252    /// Change orders.
1253    pub change_orders: Vec<ChangeOrder>,
1254    /// Project milestones.
1255    pub milestones: Vec<ProjectMilestone>,
1256}
1257
1258/// Complete result of enhanced generation run.
1259#[derive(Debug, Default)]
1260pub struct EnhancedGenerationResult {
1261    /// Generated chart of accounts.
1262    pub chart_of_accounts: ChartOfAccounts,
1263    /// Master data snapshot.
1264    pub master_data: MasterDataSnapshot,
1265    /// Document flow snapshot.
1266    pub document_flows: DocumentFlowSnapshot,
1267    /// Subledger snapshot (linked from document flows).
1268    pub subledger: SubledgerSnapshot,
1269    /// OCPM event log snapshot (if OCPM generation enabled).
1270    pub ocpm: OcpmSnapshot,
1271    /// Audit data snapshot (if audit generation enabled).
1272    pub audit: AuditSnapshot,
1273    /// Banking KYC/AML data snapshot (if banking generation enabled).
1274    pub banking: BankingSnapshot,
1275    /// Graph export snapshot (if graph export enabled).
1276    pub graph_export: GraphExportSnapshot,
1277    /// S2C sourcing data snapshot (if sourcing generation enabled).
1278    pub sourcing: SourcingSnapshot,
1279    /// Financial reporting snapshot (financial statements + bank reconciliations).
1280    pub financial_reporting: FinancialReportingSnapshot,
1281    /// HR data snapshot (payroll, time entries, expenses).
1282    pub hr: HrSnapshot,
1283    /// Accounting standards snapshot (revenue recognition, impairment).
1284    pub accounting_standards: AccountingStandardsSnapshot,
1285    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1286    pub manufacturing: ManufacturingSnapshot,
1287    /// Sales, KPI, and budget snapshot.
1288    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1289    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1290    pub tax: TaxSnapshot,
1291    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1292    pub esg: EsgSnapshot,
1293    /// Treasury data snapshot (cash management, hedging, debt).
1294    pub treasury: TreasurySnapshot,
1295    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1296    pub project_accounting: ProjectAccountingSnapshot,
1297    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1298    pub process_evolution: Vec<ProcessEvolutionEvent>,
1299    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1300    pub organizational_events: Vec<OrganizationalEvent>,
1301    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1302    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1303    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1304    pub intercompany: IntercompanySnapshot,
1305    /// Generated journal entries.
1306    pub journal_entries: Vec<JournalEntry>,
1307    /// Anomaly labels (if injection enabled).
1308    pub anomaly_labels: AnomalyLabels,
1309    /// Balance validation results (if validation enabled).
1310    pub balance_validation: BalanceValidationResult,
1311    /// Data quality statistics (if injection enabled).
1312    pub data_quality_stats: DataQualityStats,
1313    /// Data quality issue records (if injection enabled).
1314    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1315    /// Generation statistics.
1316    pub statistics: EnhancedGenerationStatistics,
1317    /// Data lineage graph (if tracking enabled).
1318    pub lineage: Option<super::lineage::LineageGraph>,
1319    /// Quality gate evaluation result.
1320    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1321    /// Internal controls (if controls generation enabled).
1322    pub internal_controls: Vec<InternalControl>,
1323    /// SoD (Segregation of Duties) violations identified during control application.
1324    ///
1325    /// Each record corresponds to a journal entry where `sod_violation == true`.
1326    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1327    /// Opening balances (if opening balance generation enabled).
1328    pub opening_balances: Vec<GeneratedOpeningBalance>,
1329    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1330    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1331    /// Counterfactual (original, mutated) JE pairs for ML training.
1332    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1333    /// Fraud red-flag indicators on P2P/O2C documents.
1334    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1335    /// Collusion rings (coordinated fraud networks).
1336    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1337    /// Bi-temporal version chains for vendor entities.
1338    pub temporal_vendor_chains:
1339        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1340    /// Entity relationship graph (nodes + edges with strength scores).
1341    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1342    /// Cross-process links (P2P ↔ O2C via inventory movements).
1343    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1344    /// Industry-specific GL accounts and metadata.
1345    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1346    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1347    pub compliance_regulations: ComplianceRegulationsSnapshot,
1348    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1349    /// industry benchmarks, management reports, drift events). Empty
1350    /// when `analytics_metadata.enabled = false`.
1351    pub analytics_metadata: AnalyticsMetadataSnapshot,
1352    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1353    /// KS) over the generated amount distribution.  `None` when
1354    /// `distributions.validation.enabled = false`.
1355    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1356    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1357    /// customer value-segment labels, and industry-specific metadata
1358    /// populated from the previously-inert `vendor_network`,
1359    /// `customer_segmentation`, and `industry_specific` schema
1360    /// sections. Empty when those sections are disabled.
1361    pub interconnectivity: InterconnectivitySnapshot,
1362}
1363
1364/// v4.1.3+: interconnectivity snapshot. Populated when
1365/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1366/// `industry_specific.enabled` are set. Holds tier / segment / industry
1367/// labels for generated entities so downstream tooling (graph export,
1368/// risk models) can consume them without re-deriving from scratch.
1369#[derive(Debug, Clone, Default)]
1370pub struct InterconnectivitySnapshot {
1371    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1372    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1373    pub vendor_tiers: Vec<(String, u8)>,
1374    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1375    /// `"reliable_strategic" / "standard_operational" / "transactional"
1376    /// / "problematic"`.
1377    pub vendor_clusters: Vec<(String, String)>,
1378    /// `(customer_id, value_segment)` pairs where value_segment is one
1379    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1380    pub customer_value_segments: Vec<(String, String)>,
1381    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1382    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1383    /// "churned" / "won_back"`.
1384    pub customer_lifecycle_stages: Vec<(String, String)>,
1385    /// Summary: industry-specific knob applied, if any (e.g.
1386    /// `"manufacturing.bom_depth=3"`).
1387    pub industry_metadata: Vec<String>,
1388}
1389
1390/// v3.3.0: snapshot for the analytics-metadata phase.
1391#[derive(Debug, Clone, Default)]
1392pub struct AnalyticsMetadataSnapshot {
1393    /// Prior-year comparative balances per account, per entity.
1394    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1395    /// Industry benchmarks for the configured industry.
1396    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1397    /// Management-report artefacts (dashboards, MDA sections).
1398    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1399    /// Drift-event labels emitted from the post-generation sweep.
1400    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1401}
1402
1403/// Enhanced statistics about a generation run.
1404#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1405pub struct EnhancedGenerationStatistics {
1406    /// Total journal entries generated.
1407    pub total_entries: u64,
1408    /// Total line items generated.
1409    pub total_line_items: u64,
1410    /// Number of accounts in CoA.
1411    pub accounts_count: usize,
1412    /// Number of companies.
1413    pub companies_count: usize,
1414    /// Period in months.
1415    pub period_months: u32,
1416    /// Master data counts.
1417    pub vendor_count: usize,
1418    pub customer_count: usize,
1419    pub material_count: usize,
1420    pub asset_count: usize,
1421    pub employee_count: usize,
1422    /// Document flow counts.
1423    pub p2p_chain_count: usize,
1424    pub o2c_chain_count: usize,
1425    /// Subledger counts.
1426    pub ap_invoice_count: usize,
1427    pub ar_invoice_count: usize,
1428    /// OCPM counts.
1429    pub ocpm_event_count: usize,
1430    pub ocpm_object_count: usize,
1431    pub ocpm_case_count: usize,
1432    /// Audit counts.
1433    pub audit_engagement_count: usize,
1434    pub audit_workpaper_count: usize,
1435    pub audit_evidence_count: usize,
1436    pub audit_risk_count: usize,
1437    pub audit_finding_count: usize,
1438    pub audit_judgment_count: usize,
1439    /// ISA 505 confirmation counts.
1440    #[serde(default)]
1441    pub audit_confirmation_count: usize,
1442    #[serde(default)]
1443    pub audit_confirmation_response_count: usize,
1444    /// ISA 330/530 procedure step and sample counts.
1445    #[serde(default)]
1446    pub audit_procedure_step_count: usize,
1447    #[serde(default)]
1448    pub audit_sample_count: usize,
1449    /// ISA 520 analytical procedure counts.
1450    #[serde(default)]
1451    pub audit_analytical_result_count: usize,
1452    /// ISA 610 internal audit counts.
1453    #[serde(default)]
1454    pub audit_ia_function_count: usize,
1455    #[serde(default)]
1456    pub audit_ia_report_count: usize,
1457    /// ISA 550 related party counts.
1458    #[serde(default)]
1459    pub audit_related_party_count: usize,
1460    #[serde(default)]
1461    pub audit_related_party_transaction_count: usize,
1462    /// Anomaly counts.
1463    pub anomalies_injected: usize,
1464    /// Data quality issue counts.
1465    pub data_quality_issues: usize,
1466    /// Banking counts.
1467    pub banking_customer_count: usize,
1468    pub banking_account_count: usize,
1469    pub banking_transaction_count: usize,
1470    pub banking_suspicious_count: usize,
1471    /// Graph export counts.
1472    pub graph_export_count: usize,
1473    pub graph_node_count: usize,
1474    pub graph_edge_count: usize,
1475    /// LLM enrichment timing (milliseconds).
1476    #[serde(default)]
1477    pub llm_enrichment_ms: u64,
1478    /// Number of vendor names enriched by LLM.
1479    #[serde(default)]
1480    pub llm_vendors_enriched: usize,
1481    /// v4.1.1+: number of customer names enriched by LLM.
1482    #[serde(default)]
1483    pub llm_customers_enriched: usize,
1484    /// v4.1.1+: number of material descriptions enriched by LLM.
1485    #[serde(default)]
1486    pub llm_materials_enriched: usize,
1487    /// v4.1.1+: number of audit finding titles enriched by LLM.
1488    #[serde(default)]
1489    pub llm_findings_enriched: usize,
1490    /// Diffusion enhancement timing (milliseconds).
1491    #[serde(default)]
1492    pub diffusion_enhancement_ms: u64,
1493    /// Number of diffusion samples generated.
1494    #[serde(default)]
1495    pub diffusion_samples_generated: usize,
1496    /// Hybrid-diffusion blend weight actually applied (after clamp to [0,1]).
1497    /// `None` when the neural/hybrid backend is not active.
1498    #[serde(default, skip_serializing_if = "Option::is_none")]
1499    pub neural_hybrid_weight: Option<f64>,
1500    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1501    #[serde(default, skip_serializing_if = "Option::is_none")]
1502    pub neural_hybrid_strategy: Option<String>,
1503    /// How many columns were routed through the neural backend.
1504    #[serde(default, skip_serializing_if = "Option::is_none")]
1505    pub neural_routed_column_count: Option<usize>,
1506    /// Causal generation timing (milliseconds).
1507    #[serde(default)]
1508    pub causal_generation_ms: u64,
1509    /// Number of causal samples generated.
1510    #[serde(default)]
1511    pub causal_samples_generated: usize,
1512    /// Whether causal validation passed.
1513    #[serde(default)]
1514    pub causal_validation_passed: Option<bool>,
1515    /// S2C sourcing counts.
1516    #[serde(default)]
1517    pub sourcing_project_count: usize,
1518    #[serde(default)]
1519    pub rfx_event_count: usize,
1520    #[serde(default)]
1521    pub bid_count: usize,
1522    #[serde(default)]
1523    pub contract_count: usize,
1524    #[serde(default)]
1525    pub catalog_item_count: usize,
1526    #[serde(default)]
1527    pub scorecard_count: usize,
1528    /// Financial reporting counts.
1529    #[serde(default)]
1530    pub financial_statement_count: usize,
1531    #[serde(default)]
1532    pub bank_reconciliation_count: usize,
1533    /// HR counts.
1534    #[serde(default)]
1535    pub payroll_run_count: usize,
1536    #[serde(default)]
1537    pub time_entry_count: usize,
1538    #[serde(default)]
1539    pub expense_report_count: usize,
1540    #[serde(default)]
1541    pub benefit_enrollment_count: usize,
1542    #[serde(default)]
1543    pub pension_plan_count: usize,
1544    #[serde(default)]
1545    pub stock_grant_count: usize,
1546    /// Accounting standards counts.
1547    #[serde(default)]
1548    pub revenue_contract_count: usize,
1549    #[serde(default)]
1550    pub impairment_test_count: usize,
1551    #[serde(default)]
1552    pub business_combination_count: usize,
1553    #[serde(default)]
1554    pub ecl_model_count: usize,
1555    #[serde(default)]
1556    pub provision_count: usize,
1557    /// Manufacturing counts.
1558    #[serde(default)]
1559    pub production_order_count: usize,
1560    #[serde(default)]
1561    pub quality_inspection_count: usize,
1562    #[serde(default)]
1563    pub cycle_count_count: usize,
1564    #[serde(default)]
1565    pub bom_component_count: usize,
1566    #[serde(default)]
1567    pub inventory_movement_count: usize,
1568    /// Sales & reporting counts.
1569    #[serde(default)]
1570    pub sales_quote_count: usize,
1571    #[serde(default)]
1572    pub kpi_count: usize,
1573    #[serde(default)]
1574    pub budget_line_count: usize,
1575    /// Tax counts.
1576    #[serde(default)]
1577    pub tax_jurisdiction_count: usize,
1578    #[serde(default)]
1579    pub tax_code_count: usize,
1580    /// ESG counts.
1581    #[serde(default)]
1582    pub esg_emission_count: usize,
1583    #[serde(default)]
1584    pub esg_disclosure_count: usize,
1585    /// Intercompany counts.
1586    #[serde(default)]
1587    pub ic_matched_pair_count: usize,
1588    #[serde(default)]
1589    pub ic_elimination_count: usize,
1590    /// Number of intercompany journal entries (seller + buyer side).
1591    #[serde(default)]
1592    pub ic_transaction_count: usize,
1593    /// Number of fixed asset subledger records.
1594    #[serde(default)]
1595    pub fa_subledger_count: usize,
1596    /// Number of inventory subledger records.
1597    #[serde(default)]
1598    pub inventory_subledger_count: usize,
1599    /// Treasury debt instrument count.
1600    #[serde(default)]
1601    pub treasury_debt_instrument_count: usize,
1602    /// Treasury hedging instrument count.
1603    #[serde(default)]
1604    pub treasury_hedging_instrument_count: usize,
1605    /// Project accounting project count.
1606    #[serde(default)]
1607    pub project_count: usize,
1608    /// Project accounting change order count.
1609    #[serde(default)]
1610    pub project_change_order_count: usize,
1611    /// Tax provision count.
1612    #[serde(default)]
1613    pub tax_provision_count: usize,
1614    /// Opening balance count.
1615    #[serde(default)]
1616    pub opening_balance_count: usize,
1617    /// Subledger reconciliation count.
1618    #[serde(default)]
1619    pub subledger_reconciliation_count: usize,
1620    /// Tax line count.
1621    #[serde(default)]
1622    pub tax_line_count: usize,
1623    /// Project cost line count.
1624    #[serde(default)]
1625    pub project_cost_line_count: usize,
1626    /// Cash position count.
1627    #[serde(default)]
1628    pub cash_position_count: usize,
1629    /// Cash forecast count.
1630    #[serde(default)]
1631    pub cash_forecast_count: usize,
1632    /// Cash pool count.
1633    #[serde(default)]
1634    pub cash_pool_count: usize,
1635    /// Process evolution event count.
1636    #[serde(default)]
1637    pub process_evolution_event_count: usize,
1638    /// Organizational event count.
1639    #[serde(default)]
1640    pub organizational_event_count: usize,
1641    /// Counterfactual pair count.
1642    #[serde(default)]
1643    pub counterfactual_pair_count: usize,
1644    /// Number of fraud red-flag indicators generated.
1645    #[serde(default)]
1646    pub red_flag_count: usize,
1647    /// Number of collusion rings generated.
1648    #[serde(default)]
1649    pub collusion_ring_count: usize,
1650    /// Number of bi-temporal vendor version chains generated.
1651    #[serde(default)]
1652    pub temporal_version_chain_count: usize,
1653    /// Number of nodes in the entity relationship graph.
1654    #[serde(default)]
1655    pub entity_relationship_node_count: usize,
1656    /// Number of edges in the entity relationship graph.
1657    #[serde(default)]
1658    pub entity_relationship_edge_count: usize,
1659    /// Number of cross-process links generated.
1660    #[serde(default)]
1661    pub cross_process_link_count: usize,
1662    /// Number of disruption events generated.
1663    #[serde(default)]
1664    pub disruption_event_count: usize,
1665    /// Number of industry-specific GL accounts generated.
1666    #[serde(default)]
1667    pub industry_gl_account_count: usize,
1668    /// Number of period-close journal entries generated (tax provision + closing entries).
1669    #[serde(default)]
1670    pub period_close_je_count: usize,
1671}
1672
1673/// Enhanced orchestrator with full feature integration.
1674pub struct EnhancedOrchestrator {
1675    config: GeneratorConfig,
1676    phase_config: PhaseConfig,
1677    coa: Option<Arc<ChartOfAccounts>>,
1678    master_data: MasterDataSnapshot,
1679    seed: u64,
1680    multi_progress: Option<MultiProgress>,
1681    /// Resource guard for memory, disk, and CPU monitoring
1682    resource_guard: ResourceGuard,
1683    /// Output path for disk space monitoring
1684    output_path: Option<PathBuf>,
1685    /// Copula generators for preserving correlations (from fingerprint)
1686    copula_generators: Vec<CopulaGeneratorSpec>,
1687    /// Country pack registry for localized data generation
1688    country_pack_registry: datasynth_core::CountryPackRegistry,
1689    /// Optional streaming sink for phase-by-phase output
1690    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1691    /// Shared template provider for user-supplied template packs.
1692    ///
1693    /// Constructed from `config.templates.path` at orchestrator creation
1694    /// time. When the path is `None`, this is still populated with an
1695    /// embedded-only provider so generators can always call trait methods
1696    /// without an `Option<…>` guard. v3.2.0+.
1697    template_provider: datasynth_core::templates::SharedTemplateProvider,
1698    /// v3.4.1+ temporal context for business-day / holiday awareness.
1699    ///
1700    /// Populated only when `temporal_patterns.business_days.enabled`. When
1701    /// `None`, document-flow / HR / treasury / period-close generators keep
1702    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1703    /// for the same seed).
1704    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1705    /// Optional shard-mode context (set by group-engine shard runners).
1706    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1707    shard_context: Option<crate::shard_context::ShardContext>,
1708}
1709
1710impl EnhancedOrchestrator {
1711    /// Create a new enhanced orchestrator.
1712    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1713        datasynth_config::validate_config(&config)?;
1714
1715        let seed = config.global.seed.unwrap_or_else(rand::random);
1716
1717        // Build resource guard from config
1718        let resource_guard = Self::build_resource_guard(&config, None);
1719
1720        // Build country pack registry from config
1721        let country_pack_registry = match &config.country_packs {
1722            Some(cp) => {
1723                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1724                    .map_err(|e| SynthError::config(e.to_string()))?
1725            }
1726            None => datasynth_core::CountryPackRegistry::builtin_only()
1727                .map_err(|e| SynthError::config(e.to_string()))?,
1728        };
1729
1730        // Build the shared template provider from config.templates.path.
1731        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1732        // `Some(path)` → load file/dir and honour `merge_strategy`.
1733        let template_provider = Self::build_template_provider(&config)?;
1734
1735        // v3.4.1: build a shared temporal context when
1736        // `temporal_patterns.business_days.enabled`. `None` preserves the
1737        // raw-RNG date-offset behaviour per-generator.
1738        let temporal_context = Self::build_temporal_context(&config)?;
1739
1740        Ok(Self {
1741            config,
1742            phase_config,
1743            coa: None,
1744            master_data: MasterDataSnapshot::default(),
1745            seed,
1746            multi_progress: None,
1747            resource_guard,
1748            output_path: None,
1749            copula_generators: Vec::new(),
1750            country_pack_registry,
1751            phase_sink: None,
1752            template_provider,
1753            temporal_context,
1754            shard_context: None,
1755        })
1756    }
1757
1758    /// Install shard-mode context.  Called by the group shard runner
1759    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1760    /// entry point).  Has no effect on single-entity runs.
1761    ///
1762    /// See [`crate::shard_context::ShardContext`] for rationale.
1763    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1764        self.shard_context = Some(ctx);
1765    }
1766
1767    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1768    ///
1769    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1770    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1771    /// enabled. Returns `Err` only for unrecoverable config errors.
1772    fn build_temporal_context(
1773        config: &GeneratorConfig,
1774    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1775        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1776
1777        let tp = &config.temporal_patterns;
1778        if !tp.enabled || !tp.business_days.enabled {
1779            return Ok(None);
1780        }
1781
1782        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1783            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1784        let end_date = start_date + chrono::Months::new(config.global.period_months);
1785
1786        let region_code = tp
1787            .calendars
1788            .regions
1789            .first()
1790            .cloned()
1791            .unwrap_or_else(|| "US".to_string());
1792        let region = parse_region_code(&region_code);
1793
1794        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1795    }
1796
1797    /// Build the shared template provider from `config.templates`.
1798    ///
1799    /// Always returns a provider — falls back to embedded-only when
1800    /// `config.templates.path` is `None`. The merge-strategy from config
1801    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1802    /// orchestrator-construction time are fatal (preferable to silently
1803    /// using embedded pools when the user supplied a bad path).
1804    fn build_template_provider(
1805        config: &GeneratorConfig,
1806    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1807        use datasynth_core::templates::{
1808            loader::{MergeStrategy, TemplateLoader},
1809            DefaultTemplateProvider,
1810        };
1811        use std::sync::Arc;
1812
1813        let provider = match &config.templates.path {
1814            None => DefaultTemplateProvider::new(),
1815            Some(path) => {
1816                let data = if path.is_dir() {
1817                    TemplateLoader::load_from_directory(path)
1818                } else {
1819                    TemplateLoader::load_from_file(path)
1820                }
1821                .map_err(|e| {
1822                    SynthError::config(format!(
1823                        "Failed to load templates from {}: {e}",
1824                        path.display()
1825                    ))
1826                })?;
1827                let strategy = match config.templates.merge_strategy {
1828                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1829                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1830                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1831                        MergeStrategy::MergePreferFile
1832                    }
1833                };
1834                DefaultTemplateProvider::with_templates(data, strategy)
1835            }
1836        };
1837        Ok(Arc::new(provider))
1838    }
1839
1840    /// Create with default phase config.
1841    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1842        Self::new(config, PhaseConfig::default())
1843    }
1844
1845    /// Set a streaming phase sink for real-time output (builder pattern).
1846    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1847        self.phase_sink = Some(sink);
1848        self
1849    }
1850
1851    /// Set a streaming phase sink on an existing orchestrator.
1852    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1853        self.phase_sink = Some(sink);
1854    }
1855
1856    /// Emit a batch of items to the phase sink (if configured).
1857    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1858        if let Some(ref sink) = self.phase_sink {
1859            for item in items {
1860                if let Ok(value) = serde_json::to_value(item) {
1861                    if let Err(e) = sink.emit(phase, type_name, &value) {
1862                        warn!(
1863                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1864                        );
1865                    }
1866                }
1867            }
1868            if let Err(e) = sink.phase_complete(phase) {
1869                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1870            }
1871        }
1872    }
1873
1874    /// Enable/disable progress bars.
1875    pub fn with_progress(mut self, show: bool) -> Self {
1876        self.phase_config.show_progress = show;
1877        if show {
1878            self.multi_progress = Some(MultiProgress::new());
1879        }
1880        self
1881    }
1882
1883    /// Set the output path for disk space monitoring.
1884    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1885        let path = path.into();
1886        self.output_path = Some(path.clone());
1887        // Rebuild resource guard with the output path
1888        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1889        self
1890    }
1891
1892    /// Access the country pack registry.
1893    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1894        &self.country_pack_registry
1895    }
1896
1897    /// Look up a country pack by country code string.
1898    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1899        self.country_pack_registry.get_by_str(country)
1900    }
1901
1902    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1903    /// company, defaulting to `"US"` if no companies are configured.
1904    fn primary_country_code(&self) -> &str {
1905        self.config
1906            .companies
1907            .first()
1908            .map(|c| c.country.as_str())
1909            .unwrap_or("US")
1910    }
1911
1912    /// Resolve the country pack for the primary (first) company.
1913    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1914        self.country_pack_for(self.primary_country_code())
1915    }
1916
1917    /// Resolve the CoA framework from config/country-pack.
1918    fn resolve_coa_framework(&self) -> CoAFramework {
1919        if self.config.accounting_standards.enabled {
1920            match self.config.accounting_standards.framework {
1921                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1922                    return CoAFramework::FrenchPcg;
1923                }
1924                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1925                    return CoAFramework::GermanSkr04;
1926                }
1927                _ => {}
1928            }
1929        }
1930        // Fallback: derive from country pack
1931        let pack = self.primary_pack();
1932        match pack.accounting.framework.as_str() {
1933            "french_gaap" => CoAFramework::FrenchPcg,
1934            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1935            _ => CoAFramework::UsGaap,
1936        }
1937    }
1938
1939    /// Check if copula generators are available.
1940    ///
1941    /// Returns true if the orchestrator has copula generators for preserving
1942    /// correlations (typically from fingerprint-based generation).
1943    pub fn has_copulas(&self) -> bool {
1944        !self.copula_generators.is_empty()
1945    }
1946
1947    /// Get the copula generators.
1948    ///
1949    /// Returns a reference to the copula generators for use during generation.
1950    /// These can be used to generate correlated samples that preserve the
1951    /// statistical relationships from the source data.
1952    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
1953        &self.copula_generators
1954    }
1955
1956    /// Get a mutable reference to the copula generators.
1957    ///
1958    /// Allows generators to sample from copulas during data generation.
1959    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
1960        &mut self.copula_generators
1961    }
1962
1963    /// Sample correlated values from a named copula.
1964    ///
1965    /// Returns None if the copula doesn't exist.
1966    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
1967        self.copula_generators
1968            .iter_mut()
1969            .find(|c| c.name == copula_name)
1970            .map(|c| c.generator.sample())
1971    }
1972
1973    /// Create an orchestrator from a fingerprint file.
1974    ///
1975    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
1976    /// and creates an orchestrator configured to generate data matching
1977    /// the statistical properties of the original data.
1978    ///
1979    /// # Arguments
1980    /// * `fingerprint_path` - Path to the .dsf fingerprint file
1981    /// * `phase_config` - Phase configuration for generation
1982    /// * `scale` - Scale factor for row counts (1.0 = same as original)
1983    ///
1984    /// # Example
1985    /// ```no_run
1986    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
1987    /// use std::path::Path;
1988    ///
1989    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
1990    ///     Path::new("fingerprint.dsf"),
1991    ///     PhaseConfig::default(),
1992    ///     1.0,
1993    /// ).unwrap();
1994    /// ```
1995    pub fn from_fingerprint(
1996        fingerprint_path: &std::path::Path,
1997        phase_config: PhaseConfig,
1998        scale: f64,
1999    ) -> SynthResult<Self> {
2000        info!("Loading fingerprint from: {}", fingerprint_path.display());
2001
2002        // Read the fingerprint
2003        let reader = FingerprintReader::new();
2004        let fingerprint = reader
2005            .read_from_file(fingerprint_path)
2006            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2007
2008        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2009    }
2010
2011    /// Create an orchestrator from a loaded fingerprint.
2012    ///
2013    /// # Arguments
2014    /// * `fingerprint` - The loaded fingerprint
2015    /// * `phase_config` - Phase configuration for generation
2016    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2017    pub fn from_fingerprint_data(
2018        fingerprint: Fingerprint,
2019        phase_config: PhaseConfig,
2020        scale: f64,
2021    ) -> SynthResult<Self> {
2022        info!(
2023            "Synthesizing config from fingerprint (version: {}, tables: {})",
2024            fingerprint.manifest.version,
2025            fingerprint.schema.tables.len()
2026        );
2027
2028        // Generate a seed for the synthesis
2029        let seed: u64 = rand::random();
2030        info!("Fingerprint synthesis seed: {}", seed);
2031
2032        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2033        let options = SynthesisOptions {
2034            scale,
2035            seed: Some(seed),
2036            preserve_correlations: true,
2037            inject_anomalies: true,
2038        };
2039        let synthesizer = ConfigSynthesizer::with_options(options);
2040
2041        // Synthesize full result including copula generators
2042        let synthesis_result = synthesizer
2043            .synthesize_full(&fingerprint, seed)
2044            .map_err(|e| {
2045                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2046            })?;
2047
2048        // Start with a base config from the fingerprint's industry if available
2049        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2050            Self::base_config_for_industry(industry)
2051        } else {
2052            Self::base_config_for_industry("manufacturing")
2053        };
2054
2055        // Apply the synthesized patches
2056        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2057
2058        // Log synthesis results
2059        info!(
2060            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2061            fingerprint.schema.tables.len(),
2062            scale,
2063            synthesis_result.copula_generators.len()
2064        );
2065
2066        if !synthesis_result.copula_generators.is_empty() {
2067            for spec in &synthesis_result.copula_generators {
2068                info!(
2069                    "  Copula '{}' for table '{}': {} columns",
2070                    spec.name,
2071                    spec.table,
2072                    spec.columns.len()
2073                );
2074            }
2075        }
2076
2077        // Create the orchestrator with the synthesized config
2078        let mut orchestrator = Self::new(config, phase_config)?;
2079
2080        // Store copula generators for use during generation
2081        orchestrator.copula_generators = synthesis_result.copula_generators;
2082
2083        Ok(orchestrator)
2084    }
2085
2086    /// Create a base config for a given industry.
2087    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2088        use datasynth_config::presets::create_preset;
2089        use datasynth_config::TransactionVolume;
2090        use datasynth_core::models::{CoAComplexity, IndustrySector};
2091
2092        let sector = match industry.to_lowercase().as_str() {
2093            "manufacturing" => IndustrySector::Manufacturing,
2094            "retail" => IndustrySector::Retail,
2095            "financial" | "financial_services" => IndustrySector::FinancialServices,
2096            "healthcare" => IndustrySector::Healthcare,
2097            "technology" | "tech" => IndustrySector::Technology,
2098            _ => IndustrySector::Manufacturing,
2099        };
2100
2101        // Create a preset with reasonable defaults
2102        create_preset(
2103            sector,
2104            1,  // company count
2105            12, // period months
2106            CoAComplexity::Medium,
2107            TransactionVolume::TenK,
2108        )
2109    }
2110
2111    /// Apply a config patch to a GeneratorConfig.
2112    fn apply_config_patch(
2113        mut config: GeneratorConfig,
2114        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2115    ) -> GeneratorConfig {
2116        use datasynth_fingerprint::synthesis::ConfigValue;
2117
2118        for (key, value) in patch.values() {
2119            match (key.as_str(), value) {
2120                // Transaction count is handled via TransactionVolume enum on companies
2121                // Log it but cannot directly set it (would need to modify company volumes)
2122                ("transactions.count", ConfigValue::Integer(n)) => {
2123                    info!(
2124                        "Fingerprint suggests {} transactions (apply via company volumes)",
2125                        n
2126                    );
2127                }
2128                ("global.period_months", ConfigValue::Integer(n)) => {
2129                    config.global.period_months = (*n).clamp(1, 120) as u32;
2130                }
2131                ("global.start_date", ConfigValue::String(s)) => {
2132                    config.global.start_date = s.clone();
2133                }
2134                ("global.seed", ConfigValue::Integer(n)) => {
2135                    config.global.seed = Some(*n as u64);
2136                }
2137                ("fraud.enabled", ConfigValue::Bool(b)) => {
2138                    config.fraud.enabled = *b;
2139                }
2140                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2141                    config.fraud.fraud_rate = *f;
2142                }
2143                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2144                    config.data_quality.enabled = *b;
2145                }
2146                // Handle anomaly injection paths (mapped to fraud config)
2147                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2148                    config.fraud.enabled = *b;
2149                }
2150                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2151                    config.fraud.fraud_rate = *f;
2152                }
2153                _ => {
2154                    debug!("Ignoring unknown config patch key: {}", key);
2155                }
2156            }
2157        }
2158
2159        config
2160    }
2161
2162    /// Build a resource guard from the configuration.
2163    fn build_resource_guard(
2164        config: &GeneratorConfig,
2165        output_path: Option<PathBuf>,
2166    ) -> ResourceGuard {
2167        let mut builder = ResourceGuardBuilder::new();
2168
2169        // Configure memory limit if set
2170        if config.global.memory_limit_mb > 0 {
2171            builder = builder.memory_limit(config.global.memory_limit_mb);
2172        }
2173
2174        // Configure disk monitoring for output path
2175        if let Some(path) = output_path {
2176            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2177        }
2178
2179        // Use conservative degradation settings for production safety
2180        builder = builder.conservative();
2181
2182        builder.build()
2183    }
2184
2185    /// Check resources (memory, disk, CPU) and return degradation level.
2186    ///
2187    /// Returns an error if hard limits are exceeded.
2188    /// Returns Ok(DegradationLevel) indicating current resource state.
2189    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2190        self.resource_guard.check()
2191    }
2192
2193    /// Check resources with logging.
2194    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2195        let level = self.resource_guard.check()?;
2196
2197        if level != DegradationLevel::Normal {
2198            warn!(
2199                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2200                phase,
2201                level,
2202                self.resource_guard.current_memory_mb(),
2203                self.resource_guard.available_disk_mb()
2204            );
2205        }
2206
2207        Ok(level)
2208    }
2209
2210    /// Get current degradation actions based on resource state.
2211    fn get_degradation_actions(&self) -> DegradationActions {
2212        self.resource_guard.get_actions()
2213    }
2214
2215    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2216    fn check_memory_limit(&self) -> SynthResult<()> {
2217        self.check_resources()?;
2218        Ok(())
2219    }
2220
2221    /// Run the complete generation workflow.
2222    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2223        info!("Starting enhanced generation workflow");
2224        info!(
2225            "Config: industry={:?}, period_months={}, companies={}",
2226            self.config.global.industry,
2227            self.config.global.period_months,
2228            self.config.companies.len()
2229        );
2230
2231        // Set decimal serialization mode (thread-local, affects JSON output).
2232        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2233        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2234        datasynth_core::serde_decimal::set_numeric_native(is_native);
2235        struct NumericModeGuard;
2236        impl Drop for NumericModeGuard {
2237            fn drop(&mut self) {
2238                datasynth_core::serde_decimal::set_numeric_native(false);
2239            }
2240        }
2241        let _numeric_guard = if is_native {
2242            Some(NumericModeGuard)
2243        } else {
2244            None
2245        };
2246
2247        // Initial resource check before starting
2248        let initial_level = self.check_resources_with_log("initial")?;
2249        if initial_level == DegradationLevel::Emergency {
2250            return Err(SynthError::resource(
2251                "Insufficient resources to start generation",
2252            ));
2253        }
2254
2255        let mut stats = EnhancedGenerationStatistics {
2256            companies_count: self.config.companies.len(),
2257            period_months: self.config.global.period_months,
2258            ..Default::default()
2259        };
2260
2261        // Phase 1: Chart of Accounts
2262        let coa = self.phase_chart_of_accounts(&mut stats)?;
2263
2264        // Phase 2: Master Data
2265        self.phase_master_data(&mut stats)?;
2266
2267        // Emit master data to stream sink
2268        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2269        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2270        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2271
2272        // Phase 3: Document Flows + Subledger Linking
2273        let (mut document_flows, mut subledger, fa_journal_entries) =
2274            self.phase_document_flows(&mut stats)?;
2275
2276        // Emit document flows to stream sink
2277        self.emit_phase_items(
2278            "document_flows",
2279            "PurchaseOrder",
2280            &document_flows.purchase_orders,
2281        );
2282        self.emit_phase_items(
2283            "document_flows",
2284            "GoodsReceipt",
2285            &document_flows.goods_receipts,
2286        );
2287        self.emit_phase_items(
2288            "document_flows",
2289            "VendorInvoice",
2290            &document_flows.vendor_invoices,
2291        );
2292        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2293        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2294
2295        // Phase 3b: Opening Balances (before JE generation)
2296        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2297
2298        // Phase 3c: Convert opening balances to journal entries and prepend them.
2299        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2300        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2301        // balance map type.
2302        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2303            .iter()
2304            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2305            .collect();
2306        if !opening_balance_jes.is_empty() {
2307            debug!(
2308                "Prepending {} opening balance JEs to entries",
2309                opening_balance_jes.len()
2310            );
2311        }
2312
2313        // Phase 4: Journal Entries
2314        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2315
2316        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2317        // starts from the correct initial state.
2318        if !opening_balance_jes.is_empty() {
2319            let mut combined = opening_balance_jes;
2320            combined.extend(entries);
2321            entries = combined;
2322        }
2323
2324        // Phase 4c: Append FA acquisition journal entries to main entries
2325        if !fa_journal_entries.is_empty() {
2326            debug!(
2327                "Appending {} FA acquisition JEs to main entries",
2328                fa_journal_entries.len()
2329            );
2330            entries.extend(fa_journal_entries);
2331        }
2332
2333        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2334        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2335
2336        // Get current degradation actions for optional phases
2337        let actions = self.get_degradation_actions();
2338
2339        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2340        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2341
2342        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2343        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2344        if !sourcing.contracts.is_empty() {
2345            let mut linked_count = 0usize;
2346            // Collect (vendor_id, po_id) pairs from P2P chains
2347            let po_vendor_pairs: Vec<(String, String)> = document_flows
2348                .p2p_chains
2349                .iter()
2350                .map(|chain| {
2351                    (
2352                        chain.purchase_order.vendor_id.clone(),
2353                        chain.purchase_order.header.document_id.clone(),
2354                    )
2355                })
2356                .collect();
2357
2358            for chain in &mut document_flows.p2p_chains {
2359                if chain.purchase_order.contract_id.is_none() {
2360                    if let Some(contract) = sourcing
2361                        .contracts
2362                        .iter()
2363                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2364                    {
2365                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2366                        linked_count += 1;
2367                    }
2368                }
2369            }
2370
2371            // Populate reverse FK: purchase_order_ids on each contract
2372            for contract in &mut sourcing.contracts {
2373                let po_ids: Vec<String> = po_vendor_pairs
2374                    .iter()
2375                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2376                    .map(|(_, po_id)| po_id.clone())
2377                    .collect();
2378                if !po_ids.is_empty() {
2379                    contract.purchase_order_ids = po_ids;
2380                }
2381            }
2382
2383            if linked_count > 0 {
2384                debug!(
2385                    "Linked {} purchase orders to S2C contracts by vendor match",
2386                    linked_count
2387                );
2388            }
2389        }
2390
2391        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2392        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2393
2394        // Phase 5c: Append IC journal entries to main entries
2395        if !intercompany.seller_journal_entries.is_empty()
2396            || !intercompany.buyer_journal_entries.is_empty()
2397        {
2398            let ic_je_count = intercompany.seller_journal_entries.len()
2399                + intercompany.buyer_journal_entries.len();
2400            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2401            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2402            debug!(
2403                "Appended {} IC journal entries to main entries",
2404                ic_je_count
2405            );
2406        }
2407
2408        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2409        if !intercompany.elimination_entries.is_empty() {
2410            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2411                &intercompany.elimination_entries,
2412            );
2413            if !elim_jes.is_empty() {
2414                debug!(
2415                    "Appended {} elimination journal entries to main entries",
2416                    elim_jes.len()
2417                );
2418                // IC elimination net-zero assertion (v2.5 hardening)
2419                let elim_debit: rust_decimal::Decimal =
2420                    elim_jes.iter().map(|je| je.total_debit()).sum();
2421                let elim_credit: rust_decimal::Decimal =
2422                    elim_jes.iter().map(|je| je.total_credit()).sum();
2423                let elim_diff = (elim_debit - elim_credit).abs();
2424                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2425                if elim_diff > tolerance {
2426                    return Err(datasynth_core::error::SynthError::generation(format!(
2427                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2428                        elim_debit, elim_credit, elim_diff, tolerance
2429                    )));
2430                }
2431                debug!(
2432                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2433                    elim_debit, elim_credit, elim_diff
2434                );
2435                entries.extend(elim_jes);
2436            }
2437        }
2438
2439        // Phase 5e: Wire IC source documents into document flow snapshot
2440        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2441            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2442                document_flows
2443                    .customer_invoices
2444                    .extend(ic_docs.seller_invoices.iter().cloned());
2445                document_flows
2446                    .purchase_orders
2447                    .extend(ic_docs.buyer_orders.iter().cloned());
2448                document_flows
2449                    .goods_receipts
2450                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2451                document_flows
2452                    .vendor_invoices
2453                    .extend(ic_docs.buyer_invoices.iter().cloned());
2454                debug!(
2455                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2456                    ic_docs.seller_invoices.len(),
2457                    ic_docs.buyer_orders.len(),
2458                    ic_docs.buyer_goods_receipts.len(),
2459                    ic_docs.buyer_invoices.len(),
2460                );
2461            }
2462        }
2463
2464        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2465        let hr = self.phase_hr_data(&mut stats)?;
2466
2467        // Phase 6b: Generate JEs from payroll runs
2468        if !hr.payroll_runs.is_empty() {
2469            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2470            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2471            entries.extend(payroll_jes);
2472        }
2473
2474        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2475        if !hr.pension_journal_entries.is_empty() {
2476            debug!(
2477                "Generated {} JEs from pension plans",
2478                hr.pension_journal_entries.len()
2479            );
2480            entries.extend(hr.pension_journal_entries.iter().cloned());
2481        }
2482
2483        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2484        if !hr.stock_comp_journal_entries.is_empty() {
2485            debug!(
2486                "Generated {} JEs from stock-based compensation",
2487                hr.stock_comp_journal_entries.len()
2488            );
2489            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2490        }
2491
2492        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2493        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2494
2495        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2496        if !manufacturing_snap.production_orders.is_empty() {
2497            let currency = self
2498                .config
2499                .companies
2500                .first()
2501                .map(|c| c.currency.as_str())
2502                .unwrap_or("USD");
2503            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2504                &manufacturing_snap.production_orders,
2505                &manufacturing_snap.quality_inspections,
2506                currency,
2507            );
2508            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2509            entries.extend(mfg_jes);
2510        }
2511
2512        // Phase 7a-warranty: Generate warranty provisions per company
2513        if !manufacturing_snap.quality_inspections.is_empty() {
2514            let framework = match self.config.accounting_standards.framework {
2515                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2516                _ => "US_GAAP",
2517            };
2518            for company in &self.config.companies {
2519                let company_orders: Vec<_> = manufacturing_snap
2520                    .production_orders
2521                    .iter()
2522                    .filter(|o| o.company_code == company.code)
2523                    .cloned()
2524                    .collect();
2525                let company_inspections: Vec<_> = manufacturing_snap
2526                    .quality_inspections
2527                    .iter()
2528                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2529                    .cloned()
2530                    .collect();
2531                if company_inspections.is_empty() {
2532                    continue;
2533                }
2534                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2535                let warranty_result = warranty_gen.generate(
2536                    &company.code,
2537                    &company_orders,
2538                    &company_inspections,
2539                    &company.currency,
2540                    framework,
2541                );
2542                if !warranty_result.journal_entries.is_empty() {
2543                    debug!(
2544                        "Generated {} warranty provision JEs for {}",
2545                        warranty_result.journal_entries.len(),
2546                        company.code
2547                    );
2548                    entries.extend(warranty_result.journal_entries);
2549                }
2550            }
2551        }
2552
2553        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2554        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2555        {
2556            let cogs_currency = self
2557                .config
2558                .companies
2559                .first()
2560                .map(|c| c.currency.as_str())
2561                .unwrap_or("USD");
2562            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2563                &document_flows.deliveries,
2564                &manufacturing_snap.production_orders,
2565                cogs_currency,
2566            );
2567            if !cogs_jes.is_empty() {
2568                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2569                entries.extend(cogs_jes);
2570            }
2571        }
2572
2573        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2574        //
2575        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2576        // subledger inventory positions.  Here we reconcile them so that position balances
2577        // reflect the actual stock movements within the generation period.
2578        if !manufacturing_snap.inventory_movements.is_empty()
2579            && !subledger.inventory_positions.is_empty()
2580        {
2581            use datasynth_core::models::MovementType as MfgMovementType;
2582            let mut receipt_count = 0usize;
2583            let mut issue_count = 0usize;
2584            for movement in &manufacturing_snap.inventory_movements {
2585                // Find a matching position by material code and company
2586                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2587                    p.material_id == movement.material_code
2588                        && p.company_code == movement.entity_code
2589                }) {
2590                    match movement.movement_type {
2591                        MfgMovementType::GoodsReceipt => {
2592                            // Increase stock and update weighted-average cost
2593                            pos.add_quantity(
2594                                movement.quantity,
2595                                movement.value,
2596                                movement.movement_date,
2597                            );
2598                            receipt_count += 1;
2599                        }
2600                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2601                            // Decrease stock (best-effort; silently skip if insufficient)
2602                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2603                            issue_count += 1;
2604                        }
2605                        _ => {}
2606                    }
2607                }
2608            }
2609            debug!(
2610                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2611                manufacturing_snap.inventory_movements.len(),
2612                receipt_count,
2613                issue_count,
2614            );
2615        }
2616
2617        // Update final entry/line-item stats after all JE-generating phases
2618        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2619        if !entries.is_empty() {
2620            stats.total_entries = entries.len() as u64;
2621            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2622            debug!(
2623                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2624                stats.total_entries, stats.total_line_items
2625            );
2626        }
2627
2628        // Phase 7b: Apply internal controls to journal entries
2629        if self.config.internal_controls.enabled && !entries.is_empty() {
2630            info!("Phase 7b: Applying internal controls to journal entries");
2631            let control_config = ControlGeneratorConfig {
2632                exception_rate: self.config.internal_controls.exception_rate,
2633                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2634                enable_sox_marking: true,
2635                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2636                    self.config.internal_controls.sox_materiality_threshold,
2637                )
2638                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2639                ..Default::default()
2640            };
2641            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2642            for entry in &mut entries {
2643                control_gen.apply_controls(entry, &coa);
2644            }
2645            let with_controls = entries
2646                .iter()
2647                .filter(|e| !e.header.control_ids.is_empty())
2648                .count();
2649            info!(
2650                "Applied controls to {} entries ({} with control IDs assigned)",
2651                entries.len(),
2652                with_controls
2653            );
2654        }
2655
2656        // Phase 7c: Extract SoD violations from annotated journal entries.
2657        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2658        // Here we materialise those flags into standalone SodViolation records.
2659        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2660            .iter()
2661            .filter(|e| e.header.sod_violation)
2662            .filter_map(|e| {
2663                e.header.sod_conflict_type.map(|ct| {
2664                    use datasynth_core::models::{RiskLevel, SodViolation};
2665                    let severity = match ct {
2666                        datasynth_core::models::SodConflictType::PaymentReleaser
2667                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2668                            RiskLevel::Critical
2669                        }
2670                        datasynth_core::models::SodConflictType::PreparerApprover
2671                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2672                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2673                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2674                            RiskLevel::High
2675                        }
2676                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2677                            RiskLevel::Medium
2678                        }
2679                    };
2680                    let action = format!(
2681                        "SoD conflict {:?} on entry {} ({})",
2682                        ct, e.header.document_id, e.header.company_code
2683                    );
2684                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2685                })
2686            })
2687            .collect();
2688        if !sod_violations.is_empty() {
2689            info!(
2690                "Phase 7c: Extracted {} SoD violations from {} entries",
2691                sod_violations.len(),
2692                entries.len()
2693            );
2694        }
2695
2696        // Emit journal entries to stream sink (after all JE-generating phases)
2697        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2698
2699        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2700        //
2701        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2702        // document-level fraud are exempt from subsequent line-level flag
2703        // overwrites, and so downstream consumers see a coherent picture.
2704        //
2705        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2706        {
2707            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2708            if self.config.fraud.enabled && doc_rate > 0.0 {
2709                use datasynth_core::fraud_propagation::{
2710                    inject_document_fraud, propagate_documents_to_entries,
2711                };
2712                use datasynth_core::utils::weighted_select;
2713                use datasynth_core::FraudType;
2714                use rand_chacha::rand_core::SeedableRng;
2715
2716                let dist = &self.config.fraud.fraud_type_distribution;
2717                let fraud_type_weights: [(FraudType, f64); 8] = [
2718                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2719                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2720                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2721                    (
2722                        FraudType::ImproperCapitalization,
2723                        dist.expense_capitalization,
2724                    ),
2725                    (FraudType::SplitTransaction, dist.split_transaction),
2726                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2727                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2728                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2729                ];
2730                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2731                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2732                    if weights_sum <= 0.0 {
2733                        FraudType::FictitiousEntry
2734                    } else {
2735                        *weighted_select(rng, &fraud_type_weights)
2736                    }
2737                };
2738
2739                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2740                let mut doc_tagged = 0usize;
2741                macro_rules! inject_into {
2742                    ($collection:expr) => {{
2743                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2744                            $collection.iter_mut().map(|d| &mut d.header).collect();
2745                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2746                    }};
2747                }
2748                inject_into!(document_flows.purchase_orders);
2749                inject_into!(document_flows.goods_receipts);
2750                inject_into!(document_flows.vendor_invoices);
2751                inject_into!(document_flows.payments);
2752                inject_into!(document_flows.sales_orders);
2753                inject_into!(document_flows.deliveries);
2754                inject_into!(document_flows.customer_invoices);
2755                if doc_tagged > 0 {
2756                    info!(
2757                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2758                    );
2759                }
2760
2761                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2762                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2763                        Vec::new();
2764                    headers.extend(
2765                        document_flows
2766                            .purchase_orders
2767                            .iter()
2768                            .map(|d| d.header.clone()),
2769                    );
2770                    headers.extend(
2771                        document_flows
2772                            .goods_receipts
2773                            .iter()
2774                            .map(|d| d.header.clone()),
2775                    );
2776                    headers.extend(
2777                        document_flows
2778                            .vendor_invoices
2779                            .iter()
2780                            .map(|d| d.header.clone()),
2781                    );
2782                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2783                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2784                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2785                    headers.extend(
2786                        document_flows
2787                            .customer_invoices
2788                            .iter()
2789                            .map(|d| d.header.clone()),
2790                    );
2791                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2792                    if propagated > 0 {
2793                        info!(
2794                            "Propagated document-level fraud to {propagated} derived journal entries"
2795                        );
2796                    }
2797                }
2798            }
2799        }
2800
2801        // Phase 8: Anomaly Injection (after all JE-generating phases)
2802        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2803
2804        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2805        // through the anomaly injector.
2806        //
2807        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2808        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2809        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2810        //   - Any external mutation that sets is_fraud after the fact
2811        //
2812        // The anomaly injector already applies the same bias inline when it
2813        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2814        // so gating this sweep on `!is_anomaly` avoids double-application.
2815        //
2816        // Without this sweep, fraud entries from these paths show 0 lift on
2817        // the canonical forensic signals (is_round_1000, is_off_hours,
2818        // is_weekend, is_post_close), which is exactly what the SDK-side
2819        // evaluator caught in v3.1 — fraud features had worse lift than
2820        // baseline. See DS-3.1 post-deploy feedback.
2821        {
2822            use datasynth_core::fraud_bias::{
2823                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
2824            };
2825            use rand_chacha::rand_core::SeedableRng;
2826            let cfg = FraudBehavioralBiasConfig::default();
2827            if cfg.enabled {
2828                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2829                let mut swept = 0usize;
2830                for entry in entries.iter_mut() {
2831                    if entry.header.is_fraud && !entry.header.is_anomaly {
2832                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2833                        swept += 1;
2834                    }
2835                }
2836                if swept > 0 {
2837                    info!(
2838                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2839                         (doc-propagated + je_generator intrinsic fraud)"
2840                    );
2841                }
2842            }
2843        }
2844
2845        // Emit anomaly labels to stream sink
2846        self.emit_phase_items(
2847            "anomaly_injection",
2848            "LabeledAnomaly",
2849            &anomaly_labels.labels,
2850        );
2851
2852        // Propagate fraud labels from journal entries to source documents.
2853        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2854        // instead of tracing through document_references.json.
2855        //
2856        // Gated by `fraud.propagate_to_document` (default true) — disable when
2857        // downstream consumers want document fraud flags to reflect only
2858        // document-level injection, not line-level.
2859        if self.config.fraud.propagate_to_document {
2860            use std::collections::HashMap;
2861            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2862            //
2863            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2864            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2865            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2866            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2867            // we register BOTH the prefixed form (raw reference) AND the bare form
2868            // (post-colon portion) in the map. Also register the JE's document_id
2869            // UUID so documents that set `journal_entry_id` match via that path.
2870            //
2871            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2872            // looked up "foo", silently producing 0 propagations.
2873            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2874            for je in &entries {
2875                if je.header.is_fraud {
2876                    if let Some(ref fraud_type) = je.header.fraud_type {
2877                        if let Some(ref reference) = je.header.reference {
2878                            // Register the full reference ("GR:PO-2024-000001")
2879                            fraud_map.insert(reference.clone(), *fraud_type);
2880                            // Also register the bare document ID ("PO-2024-000001")
2881                            // by stripping the "PREFIX:" if present.
2882                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2883                                if !bare.is_empty() {
2884                                    fraud_map.insert(bare.to_string(), *fraud_type);
2885                                }
2886                            }
2887                        }
2888                        // Also tag via journal_entry_id on document headers
2889                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2890                    }
2891                }
2892            }
2893            if !fraud_map.is_empty() {
2894                let mut propagated = 0usize;
2895                // Use DocumentHeader::propagate_fraud method for each doc type
2896                macro_rules! propagate_to {
2897                    ($collection:expr) => {
2898                        for doc in &mut $collection {
2899                            if doc.header.propagate_fraud(&fraud_map) {
2900                                propagated += 1;
2901                            }
2902                        }
2903                    };
2904                }
2905                propagate_to!(document_flows.purchase_orders);
2906                propagate_to!(document_flows.goods_receipts);
2907                propagate_to!(document_flows.vendor_invoices);
2908                propagate_to!(document_flows.payments);
2909                propagate_to!(document_flows.sales_orders);
2910                propagate_to!(document_flows.deliveries);
2911                propagate_to!(document_flows.customer_invoices);
2912                if propagated > 0 {
2913                    info!(
2914                        "Propagated fraud labels to {} document flow records",
2915                        propagated
2916                    );
2917                }
2918            }
2919        }
2920
2921        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
2922        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
2923
2924        // Emit red flags to stream sink
2925        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
2926
2927        // Phase 26b: Collusion Ring Generation (after red flags)
2928        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
2929
2930        // Emit collusion rings to stream sink
2931        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
2932
2933        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
2934        let balance_validation = self.phase_balance_validation(&entries)?;
2935
2936        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
2937        // chart of accounts. Soft warning by default; hard fail when the
2938        // user passes --validate-coa-coverage / sets the strict flag.
2939        self.validate_coa_coverage(&entries, coa.as_ref())?;
2940
2941        // Phase 9b: GL-to-Subledger Reconciliation
2942        let subledger_reconciliation =
2943            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
2944
2945        // Phase 10: Data Quality Injection
2946        let (data_quality_stats, quality_issues) =
2947            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
2948
2949        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
2950        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
2951
2952        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
2953        {
2954            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2955
2956            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
2957            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
2958            let mut unbalanced_clean = 0usize;
2959            for je in &entries {
2960                if je.header.is_fraud || je.header.is_anomaly {
2961                    continue;
2962                }
2963                let diff = (je.total_debit() - je.total_credit()).abs();
2964                if diff > tolerance {
2965                    unbalanced_clean += 1;
2966                    if unbalanced_clean <= 3 {
2967                        warn!(
2968                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
2969                            je.header.document_id,
2970                            je.total_debit(),
2971                            je.total_credit(),
2972                            diff
2973                        );
2974                    }
2975                }
2976            }
2977            if unbalanced_clean > 0 {
2978                return Err(datasynth_core::error::SynthError::generation(format!(
2979                    "{} non-anomaly JEs are unbalanced (debits != credits). \
2980                     First few logged above. Tolerance={}",
2981                    unbalanced_clean, tolerance
2982                )));
2983            }
2984            debug!(
2985                "Phase 10c: All {} non-anomaly JEs individually balanced",
2986                entries
2987                    .iter()
2988                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
2989                    .count()
2990            );
2991
2992            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
2993            let company_codes: Vec<String> = self
2994                .config
2995                .companies
2996                .iter()
2997                .map(|c| c.code.clone())
2998                .collect();
2999            for company_code in &company_codes {
3000                let mut assets = rust_decimal::Decimal::ZERO;
3001                let mut liab_equity = rust_decimal::Decimal::ZERO;
3002
3003                for entry in &entries {
3004                    if entry.header.company_code != *company_code {
3005                        continue;
3006                    }
3007                    for line in &entry.lines {
3008                        let acct = &line.gl_account;
3009                        let net = line.debit_amount - line.credit_amount;
3010                        // Asset accounts (1xxx): normal debit balance
3011                        if acct.starts_with('1') {
3012                            assets += net;
3013                        }
3014                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3015                        else if acct.starts_with('2') || acct.starts_with('3') {
3016                            liab_equity -= net; // credit-normal, so negate debit-net
3017                        }
3018                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3019                        // so they net to zero after closing entries
3020                    }
3021                }
3022
3023                let bs_diff = (assets - liab_equity).abs();
3024                if bs_diff > tolerance {
3025                    warn!(
3026                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3027                         revenue/expense closing entries may not fully offset",
3028                        company_code, assets, liab_equity, bs_diff
3029                    );
3030                    // Warn rather than error: multi-period datasets may have timing
3031                    // differences from accruals/deferrals that resolve in later periods.
3032                    // The TB footing check (Assert 1) is the hard gate.
3033                } else {
3034                    debug!(
3035                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3036                        company_code, assets, liab_equity, bs_diff
3037                    );
3038                }
3039            }
3040
3041            info!("Phase 10c: All generation-time accounting assertions passed");
3042        }
3043
3044        // Phase 11: Audit Data
3045        let audit = self.phase_audit_data(&entries, &mut stats)?;
3046
3047        // Phase 12: Banking KYC/AML Data
3048        let mut banking = self.phase_banking_data(&mut stats)?;
3049
3050        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3051        // Creates coherence between the accounting layer (payments, JEs) and the
3052        // banking layer (bank transactions). A vendor invoice payment now appears
3053        // on both sides with cross-references and fraud labels propagated.
3054        if self.phase_config.generate_banking
3055            && !document_flows.payments.is_empty()
3056            && !banking.accounts.is_empty()
3057        {
3058            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3059            if bridge_rate > 0.0 {
3060                let mut bridge =
3061                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3062                        self.seed,
3063                    );
3064                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3065                    &document_flows.payments,
3066                    &banking.customers,
3067                    &banking.accounts,
3068                    bridge_rate,
3069                );
3070                info!(
3071                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3072                    bridge_stats.bridged_count,
3073                    bridge_stats.transactions_emitted,
3074                    bridge_stats.fraud_propagated,
3075                );
3076                let bridged_count = bridged_txns.len();
3077                banking.transactions.extend(bridged_txns);
3078
3079                // Re-run velocity computation so bridged txns also get features
3080                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3081                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3082                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3083                        &mut banking.transactions,
3084                    );
3085                }
3086
3087                // Recompute suspicious count after bridging
3088                banking.suspicious_count = banking
3089                    .transactions
3090                    .iter()
3091                    .filter(|t| t.is_suspicious)
3092                    .count();
3093                stats.banking_transaction_count = banking.transactions.len();
3094                stats.banking_suspicious_count = banking.suspicious_count;
3095            }
3096        }
3097
3098        // Phase 13: Graph Export
3099        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3100
3101        // Phase 14: LLM Enrichment
3102        self.phase_llm_enrichment(&mut stats);
3103
3104        // Phase 15: Diffusion Enhancement
3105        self.phase_diffusion_enhancement(&entries, &mut stats);
3106
3107        // Phase 16: Causal Overlay
3108        self.phase_causal_overlay(&mut stats);
3109
3110        // Phase 17: Bank Reconciliation + Financial Statements
3111        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3112        // provision data (from accounting_standards / tax snapshots) can be wired in.
3113        let mut financial_reporting = self.phase_financial_reporting(
3114            &document_flows,
3115            &entries,
3116            &coa,
3117            &hr,
3118            &audit,
3119            &mut stats,
3120        )?;
3121
3122        // BS coherence check: assets = liabilities + equity
3123        {
3124            use datasynth_core::models::StatementType;
3125            for stmt in &financial_reporting.consolidated_statements {
3126                if stmt.statement_type == StatementType::BalanceSheet {
3127                    let total_assets: rust_decimal::Decimal = stmt
3128                        .line_items
3129                        .iter()
3130                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3131                        .map(|li| li.amount)
3132                        .sum();
3133                    let total_le: rust_decimal::Decimal = stmt
3134                        .line_items
3135                        .iter()
3136                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3137                        .map(|li| li.amount)
3138                        .sum();
3139                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3140                        warn!(
3141                            "BS equation imbalance: assets={}, L+E={}",
3142                            total_assets, total_le
3143                        );
3144                    }
3145                }
3146            }
3147        }
3148
3149        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3150        let accounting_standards =
3151            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3152
3153        // Phase 18a: Merge ECL journal entries into main GL
3154        if !accounting_standards.ecl_journal_entries.is_empty() {
3155            debug!(
3156                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3157                accounting_standards.ecl_journal_entries.len()
3158            );
3159            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3160        }
3161
3162        // Phase 18a: Merge provision journal entries into main GL
3163        if !accounting_standards.provision_journal_entries.is_empty() {
3164            debug!(
3165                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3166                accounting_standards.provision_journal_entries.len()
3167            );
3168            entries.extend(
3169                accounting_standards
3170                    .provision_journal_entries
3171                    .iter()
3172                    .cloned(),
3173            );
3174        }
3175
3176        // Phase 18b: OCPM Events (after all process data is available)
3177        let mut ocpm = self.phase_ocpm_events(
3178            &document_flows,
3179            &sourcing,
3180            &hr,
3181            &manufacturing_snap,
3182            &banking,
3183            &audit,
3184            &financial_reporting,
3185            &mut stats,
3186        )?;
3187
3188        // Emit OCPM events to stream sink
3189        if let Some(ref event_log) = ocpm.event_log {
3190            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3191        }
3192
3193        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3194        if let Some(ref event_log) = ocpm.event_log {
3195            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3196            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3197                std::collections::HashMap::new();
3198            for (idx, event) in event_log.events.iter().enumerate() {
3199                if let Some(ref doc_ref) = event.document_ref {
3200                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3201                }
3202            }
3203
3204            if !doc_index.is_empty() {
3205                let mut annotated = 0usize;
3206                for entry in &mut entries {
3207                    let doc_id_str = entry.header.document_id.to_string();
3208                    // Collect matching event indices from document_id and reference
3209                    let mut matched_indices: Vec<usize> = Vec::new();
3210                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3211                        matched_indices.extend(indices);
3212                    }
3213                    if let Some(ref reference) = entry.header.reference {
3214                        let bare_ref = reference
3215                            .find(':')
3216                            .map(|i| &reference[i + 1..])
3217                            .unwrap_or(reference.as_str());
3218                        if let Some(indices) = doc_index.get(bare_ref) {
3219                            for &idx in indices {
3220                                if !matched_indices.contains(&idx) {
3221                                    matched_indices.push(idx);
3222                                }
3223                            }
3224                        }
3225                    }
3226                    // Apply matches to JE header
3227                    if !matched_indices.is_empty() {
3228                        for &idx in &matched_indices {
3229                            let event = &event_log.events[idx];
3230                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3231                                entry.header.ocpm_event_ids.push(event.event_id);
3232                            }
3233                            for obj_ref in &event.object_refs {
3234                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3235                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3236                                }
3237                            }
3238                            if entry.header.ocpm_case_id.is_none() {
3239                                entry.header.ocpm_case_id = event.case_id;
3240                            }
3241                        }
3242                        annotated += 1;
3243                    }
3244                }
3245                debug!(
3246                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3247                    annotated
3248                );
3249            }
3250        }
3251
3252        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3253        // IC eliminations, opening balances, standards-driven entries) so
3254        // every JournalEntry carries at least one `ocpm_event_ids` link.
3255        if let Some(ref mut event_log) = ocpm.event_log {
3256            let synthesized =
3257                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3258            if synthesized > 0 {
3259                info!(
3260                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3261                );
3262            }
3263
3264            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3265            // events and their owning CaseTrace. Without this, every exported
3266            // OCEL event has `is_anomaly = false` even when the underlying JE
3267            // was flagged.
3268            let anomaly_events =
3269                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3270            if anomaly_events > 0 {
3271                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3272            }
3273
3274            // Phase 18f: Inject process-variant imperfections (rework, skipped
3275            // steps, out-of-order events) so conformance checkers see
3276            // realistic variant counts and fitness < 1.0. Uses the P2P
3277            // process rates as the single source of truth.
3278            let p2p_cfg = &self.config.ocpm.p2p_process;
3279            let any_imperfection = p2p_cfg.rework_probability > 0.0
3280                || p2p_cfg.skip_step_probability > 0.0
3281                || p2p_cfg.out_of_order_probability > 0.0;
3282            if any_imperfection {
3283                use rand_chacha::rand_core::SeedableRng;
3284                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3285                    rework_rate: p2p_cfg.rework_probability,
3286                    skip_rate: p2p_cfg.skip_step_probability,
3287                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3288                };
3289                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3290                let stats =
3291                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3292                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3293                    info!(
3294                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3295                        stats.rework, stats.skipped, stats.out_of_order
3296                    );
3297                }
3298            }
3299        }
3300
3301        // Phase 19: Sales Quotes, Management KPIs, Budgets
3302        let sales_kpi_budgets =
3303            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &mut stats)?;
3304
3305        // Phase 22: Treasury Data Generation
3306        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3307        // are included in the pre-tax income used by phase_tax_generation.
3308        let treasury =
3309            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3310
3311        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3312        if !treasury.journal_entries.is_empty() {
3313            debug!(
3314                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3315                treasury.journal_entries.len()
3316            );
3317            entries.extend(treasury.journal_entries.iter().cloned());
3318        }
3319
3320        // Phase 20: Tax Generation
3321        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3322
3323        // Phase 20 JEs: Merge tax posting journal entries into main GL
3324        if !tax.tax_posting_journal_entries.is_empty() {
3325            debug!(
3326                "Merging {} tax posting JEs into GL",
3327                tax.tax_posting_journal_entries.len()
3328            );
3329            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3330        }
3331
3332        // Phase 20b: FINAL fraud behavioral bias sweep.
3333        //
3334        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3335        // period close) extend `entries` with new journal entries that may
3336        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3337        // already-fraudulent transactions). Those late additions miss the
3338        // Phase 8b sweep and ship without bias applied — which is exactly
3339        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3340        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3341        //
3342        // Running the sweep one more time here guarantees every is_fraud
3343        // entry — regardless of which phase added it — has bias applied.
3344        // `!is_anomaly` gates out anomaly-injector entries (which already
3345        // got biased inline); the sweep is otherwise idempotent-ish:
3346        // weekend / off_hours re-fire to another valid weekend / off-hour,
3347        // post_close is guarded by `!is_post_close`, and round-dollar
3348        // rescaling on an already-round amount is a no-op (ratio = 1).
3349        {
3350            use datasynth_core::fraud_bias::{
3351                apply_fraud_behavioral_bias, FraudBehavioralBiasConfig,
3352            };
3353            use rand_chacha::rand_core::SeedableRng;
3354            let cfg = FraudBehavioralBiasConfig::default();
3355            if cfg.enabled {
3356                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3357                let mut swept = 0usize;
3358                for entry in entries.iter_mut() {
3359                    if entry.header.is_fraud && !entry.header.is_anomaly {
3360                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3361                        swept += 1;
3362                    }
3363                }
3364                if swept > 0 {
3365                    info!(
3366                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3367                         non-anomaly fraud entries (covers late-added JEs from \
3368                         ECL / provisions / treasury / tax / period-close)"
3369                    );
3370                }
3371            }
3372        }
3373
3374        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3375        // Build supplementary cash flow items from upstream JE data (depreciation,
3376        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3377        {
3378            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3379
3380            let framework_str = {
3381                use datasynth_config::schema::AccountingFrameworkConfig;
3382                match self
3383                    .config
3384                    .accounting_standards
3385                    .framework
3386                    .unwrap_or_default()
3387                {
3388                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3389                        "IFRS"
3390                    }
3391                    _ => "US_GAAP",
3392                }
3393            };
3394
3395            // Sum depreciation debits (account 6000) from close JEs
3396            let depreciation_total: rust_decimal::Decimal = entries
3397                .iter()
3398                .filter(|je| je.header.document_type == "CL")
3399                .flat_map(|je| je.lines.iter())
3400                .filter(|l| l.gl_account.starts_with("6000"))
3401                .map(|l| l.debit_amount)
3402                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3403
3404            // Sum interest expense debits (account 7100)
3405            let interest_paid: rust_decimal::Decimal = entries
3406                .iter()
3407                .flat_map(|je| je.lines.iter())
3408                .filter(|l| l.gl_account.starts_with("7100"))
3409                .map(|l| l.debit_amount)
3410                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3411
3412            // Sum tax expense debits (account 8000)
3413            let tax_paid: rust_decimal::Decimal = entries
3414                .iter()
3415                .flat_map(|je| je.lines.iter())
3416                .filter(|l| l.gl_account.starts_with("8000"))
3417                .map(|l| l.debit_amount)
3418                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3419
3420            // Sum capex debits on fixed assets (account 1500)
3421            let capex: rust_decimal::Decimal = entries
3422                .iter()
3423                .flat_map(|je| je.lines.iter())
3424                .filter(|l| l.gl_account.starts_with("1500"))
3425                .map(|l| l.debit_amount)
3426                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3427
3428            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3429            let dividends_paid: rust_decimal::Decimal = entries
3430                .iter()
3431                .flat_map(|je| je.lines.iter())
3432                .filter(|l| l.gl_account == "2170")
3433                .map(|l| l.debit_amount)
3434                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3435
3436            let cf_data = CashFlowSourceData {
3437                depreciation_total,
3438                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3439                delta_ar: rust_decimal::Decimal::ZERO,
3440                delta_ap: rust_decimal::Decimal::ZERO,
3441                delta_inventory: rust_decimal::Decimal::ZERO,
3442                capex,
3443                debt_issuance: rust_decimal::Decimal::ZERO,
3444                debt_repayment: rust_decimal::Decimal::ZERO,
3445                interest_paid,
3446                tax_paid,
3447                dividends_paid,
3448                framework: framework_str.to_string(),
3449            };
3450
3451            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3452            if !enhanced_cf_items.is_empty() {
3453                // Merge into ALL cash flow statements (standalone + consolidated)
3454                use datasynth_core::models::StatementType;
3455                let merge_count = enhanced_cf_items.len();
3456                for stmt in financial_reporting
3457                    .financial_statements
3458                    .iter_mut()
3459                    .chain(financial_reporting.consolidated_statements.iter_mut())
3460                    .chain(
3461                        financial_reporting
3462                            .standalone_statements
3463                            .values_mut()
3464                            .flat_map(|v| v.iter_mut()),
3465                    )
3466                {
3467                    if stmt.statement_type == StatementType::CashFlowStatement {
3468                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3469                    }
3470                }
3471                info!(
3472                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3473                    merge_count
3474                );
3475            }
3476        }
3477
3478        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3479        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3480        self.generate_notes_to_financial_statements(
3481            &mut financial_reporting,
3482            &accounting_standards,
3483            &tax,
3484            &hr,
3485            &audit,
3486            &treasury,
3487        );
3488
3489        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3490        // When we have 2+ companies, derive segment data from actual journal entries
3491        // to complement or replace the FS-generator-based segments.
3492        if self.config.companies.len() >= 2 && !entries.is_empty() {
3493            let companies: Vec<(String, String)> = self
3494                .config
3495                .companies
3496                .iter()
3497                .map(|c| (c.code.clone(), c.name.clone()))
3498                .collect();
3499            let ic_elim: rust_decimal::Decimal =
3500                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3501            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3502                .unwrap_or(NaiveDate::MIN);
3503            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3504            let period_label = format!(
3505                "{}-{:02}",
3506                end_date.year(),
3507                (end_date - chrono::Days::new(1)).month()
3508            );
3509
3510            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3511            let (je_segments, je_recon) =
3512                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3513            if !je_segments.is_empty() {
3514                info!(
3515                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3516                    je_segments.len(),
3517                    ic_elim,
3518                );
3519                // Replace if existing segment_reports were empty; otherwise supplement
3520                if financial_reporting.segment_reports.is_empty() {
3521                    financial_reporting.segment_reports = je_segments;
3522                    financial_reporting.segment_reconciliations = vec![je_recon];
3523                } else {
3524                    financial_reporting.segment_reports.extend(je_segments);
3525                    financial_reporting.segment_reconciliations.push(je_recon);
3526                }
3527            }
3528        }
3529
3530        // Phase 21: ESG Data Generation
3531        let esg_snap =
3532            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3533
3534        // Phase 23: Project Accounting Data Generation
3535        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3536
3537        // Phase 24: Process Evolution + Organizational Events
3538        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3539
3540        // Phase 24b: Disruption Events
3541        let disruption_events = self.phase_disruption_events(&mut stats)?;
3542
3543        // Phase 27: Bi-Temporal Vendor Version Chains
3544        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3545
3546        // Phase 28: Entity Relationship Graph + Cross-Process Links
3547        let (entity_relationship_graph, cross_process_links) =
3548            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3549
3550        // Phase 29: Industry-specific GL accounts
3551        let industry_output = self.phase_industry_data(&mut stats);
3552
3553        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3554        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3555
3556        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3557        //
3558        // The neural / hybrid diffusion path was a documented L2 stub
3559        // in v3.x; actual neural-network training requires ML
3560        // infrastructure (PyTorch / candle bindings, GPU access,
3561        // training loops) that was never wired through the
3562        // orchestrator. Rather than keep a silently-no-op block that
3563        // misleads users into thinking neural training happens, v4.0
3564        // acknowledges the config — exposing stats so downstream
3565        // tooling can see the request — but emits a clear warning
3566        // when a non-statistical backend is requested. The statistical
3567        // diffusion backend continues to run via
3568        // `phase_diffusion_enhancement`.
3569        //
3570        // Users who need real neural diffusion: track the roadmap item
3571        // in the v4.x backlog and consider contributing the backend
3572        // (the `DiffusionBackend` trait is the integration point).
3573        if self.config.diffusion.enabled
3574            && (self.config.diffusion.backend == "neural"
3575                || self.config.diffusion.backend == "hybrid")
3576        {
3577            let neural = &self.config.diffusion.neural;
3578            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3579            stats.neural_hybrid_weight = Some(weight);
3580            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3581            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3582            warn!(
3583                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3584                 the neural/hybrid training path is not yet shipped. Config \
3585                 is captured in stats (weight={weight:.2}, strategy={}, \
3586                 columns={}) but no neural training runs. Statistical \
3587                 diffusion (backend='statistical') continues to work.",
3588                self.config.diffusion.backend,
3589                neural.hybrid_strategy,
3590                neural.neural_columns.len(),
3591            );
3592        }
3593
3594        // Phase 19b: Hypergraph Export (after all data is available)
3595        self.phase_hypergraph_export(
3596            &coa,
3597            &entries,
3598            &document_flows,
3599            &sourcing,
3600            &hr,
3601            &manufacturing_snap,
3602            &banking,
3603            &audit,
3604            &financial_reporting,
3605            &ocpm,
3606            &compliance_regulations,
3607            &mut stats,
3608        )?;
3609
3610        // Phase 10c: Additional graph builders (approval, entity, banking)
3611        // These run after all data is available since they need banking/IC data.
3612        if self.phase_config.generate_graph_export {
3613            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3614        }
3615
3616        // Log informational messages for config sections not yet fully wired
3617        if self.config.streaming.enabled {
3618            info!("Note: streaming config is enabled but batch mode does not use it");
3619        }
3620        if self.config.vendor_network.enabled {
3621            debug!("Vendor network config available; relationship graph generation is partial");
3622        }
3623        if self.config.customer_segmentation.enabled {
3624            debug!("Customer segmentation config available; segment-aware generation is partial");
3625        }
3626
3627        // Log final resource statistics
3628        let resource_stats = self.resource_guard.stats();
3629        info!(
3630            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3631            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3632            resource_stats.disk.estimated_bytes_written,
3633            resource_stats.degradation_level
3634        );
3635
3636        // Flush any remaining stream sink data
3637        if let Some(ref sink) = self.phase_sink {
3638            if let Err(e) = sink.flush() {
3639                warn!("Stream sink flush failed: {e}");
3640            }
3641        }
3642
3643        // Build data lineage graph
3644        let lineage = self.build_lineage_graph();
3645
3646        // Evaluate quality gates if enabled in config
3647        let gate_result = if self.config.quality_gates.enabled {
3648            let profile_name = &self.config.quality_gates.profile;
3649            match datasynth_eval::gates::get_profile(profile_name) {
3650                Some(profile) => {
3651                    // Build an evaluation populated with actual generation metrics.
3652                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3653
3654                    // Populate balance sheet evaluation from balance validation results
3655                    if balance_validation.validated {
3656                        eval.coherence.balance =
3657                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3658                                equation_balanced: balance_validation.is_balanced,
3659                                max_imbalance: (balance_validation.total_debits
3660                                    - balance_validation.total_credits)
3661                                    .abs(),
3662                                periods_evaluated: 1,
3663                                periods_imbalanced: if balance_validation.is_balanced {
3664                                    0
3665                                } else {
3666                                    1
3667                                },
3668                                period_results: Vec::new(),
3669                                companies_evaluated: self.config.companies.len(),
3670                            });
3671                    }
3672
3673                    // Set coherence passes based on balance validation
3674                    eval.coherence.passes = balance_validation.is_balanced;
3675                    if !balance_validation.is_balanced {
3676                        eval.coherence
3677                            .failures
3678                            .push("Balance sheet equation not satisfied".to_string());
3679                    }
3680
3681                    // Set statistical score based on entry count (basic sanity)
3682                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3683                    eval.statistical.passes = !entries.is_empty();
3684
3685                    // Set quality score from data quality stats
3686                    eval.quality.overall_score = 0.9; // Default high for generated data
3687                    eval.quality.passes = true;
3688
3689                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3690                    info!(
3691                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3692                        profile_name, result.gates_passed, result.gates_total, result.summary
3693                    );
3694                    Some(result)
3695                }
3696                None => {
3697                    warn!(
3698                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3699                        profile_name
3700                    );
3701                    None
3702                }
3703            }
3704        } else {
3705            None
3706        };
3707
3708        // Generate internal controls if enabled
3709        let internal_controls = if self.config.internal_controls.enabled {
3710            InternalControl::standard_controls()
3711        } else {
3712            Vec::new()
3713        };
3714
3715        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3716        // phases (including fraud-bias sweep at Phase 20b) so derived
3717        // outputs reflect final data.
3718        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3719
3720        // v3.5.1: statistical validation over the final amount
3721        // distribution. Runs *after* all JE-adding phases so the report
3722        // reflects everything the user will see in the output. Returns
3723        // `None` unless `distributions.validation.enabled = true`.
3724        let statistical_validation = self.phase_statistical_validation(&entries)?;
3725
3726        // v4.1.3+: interconnectivity snapshot — tier assignments,
3727        // value-segment labels, industry-specific metadata. Runs after
3728        // master data is settled so it can index stable IDs.
3729        let interconnectivity = self.phase_interconnectivity();
3730
3731        Ok(EnhancedGenerationResult {
3732            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3733            master_data: std::mem::take(&mut self.master_data),
3734            document_flows,
3735            subledger,
3736            ocpm,
3737            audit,
3738            banking,
3739            graph_export,
3740            sourcing,
3741            financial_reporting,
3742            hr,
3743            accounting_standards,
3744            manufacturing: manufacturing_snap,
3745            sales_kpi_budgets,
3746            tax,
3747            esg: esg_snap,
3748            treasury,
3749            project_accounting,
3750            process_evolution,
3751            organizational_events,
3752            disruption_events,
3753            intercompany,
3754            journal_entries: entries,
3755            anomaly_labels,
3756            balance_validation,
3757            data_quality_stats,
3758            quality_issues,
3759            statistics: stats,
3760            lineage: Some(lineage),
3761            gate_result,
3762            internal_controls,
3763            sod_violations,
3764            opening_balances,
3765            subledger_reconciliation,
3766            counterfactual_pairs,
3767            red_flags,
3768            collusion_rings,
3769            temporal_vendor_chains,
3770            entity_relationship_graph,
3771            cross_process_links,
3772            industry_output,
3773            compliance_regulations,
3774            analytics_metadata,
3775            statistical_validation,
3776            interconnectivity,
3777        })
3778    }
3779
3780    /// v4.1.3+: populate the interconnectivity snapshot from
3781    /// previously-inert schema sections. Empty when all sections are
3782    /// disabled.
3783    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3784        use rand::{RngExt, SeedableRng};
3785        use rand_chacha::ChaCha8Rng;
3786
3787        let mut snap = InterconnectivitySnapshot::default();
3788        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3789
3790        // --- Vendor network ---
3791        let vn = &self.config.vendor_network;
3792        if vn.enabled {
3793            let total = self.master_data.vendors.len();
3794            if total > 0 {
3795                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3796                let remaining_after_t1 = total.saturating_sub(tier1_count);
3797                let depth = vn.depth.clamp(1, 3);
3798                let tier2_count = if depth >= 2 {
3799                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3800                    (tier1_count * avg).min(remaining_after_t1)
3801                } else {
3802                    0
3803                };
3804                let tier3_count = total
3805                    .saturating_sub(tier1_count)
3806                    .saturating_sub(tier2_count);
3807
3808                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3809                    let tier = if idx < tier1_count {
3810                        1
3811                    } else if idx < tier1_count + tier2_count {
3812                        2
3813                    } else {
3814                        3
3815                    };
3816                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3817
3818                    // Cluster assignment via configured ratios.
3819                    let cl = &vn.clusters;
3820                    let roll: f64 = rng.random();
3821                    let cluster = if roll < cl.reliable_strategic {
3822                        "reliable_strategic"
3823                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3824                        "standard_operational"
3825                    } else if roll
3826                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3827                    {
3828                        "transactional"
3829                    } else {
3830                        "problematic"
3831                    };
3832                    snap.vendor_clusters
3833                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3834                }
3835                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3836            }
3837        }
3838
3839        // --- Customer segmentation ---
3840        let cs = &self.config.customer_segmentation;
3841        if cs.enabled {
3842            let seg = &cs.value_segments;
3843            for customer in &self.master_data.customers {
3844                let roll: f64 = rng.random();
3845                let value_segment = if roll < seg.enterprise.customer_share {
3846                    "enterprise"
3847                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3848                    "mid_market"
3849                } else if roll
3850                    < seg.enterprise.customer_share
3851                        + seg.mid_market.customer_share
3852                        + seg.smb.customer_share
3853                {
3854                    "smb"
3855                } else {
3856                    "consumer"
3857                };
3858                snap.customer_value_segments
3859                    .push((customer.customer_id.clone(), value_segment.to_string()));
3860
3861                let roll2: f64 = rng.random();
3862                let life = &cs.lifecycle;
3863                let lifecycle = if roll2 < life.prospect_rate {
3864                    "prospect"
3865                } else if roll2 < life.prospect_rate + life.new_rate {
3866                    "new"
3867                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3868                    "growth"
3869                } else if roll2
3870                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3871                {
3872                    "mature"
3873                } else if roll2
3874                    < life.prospect_rate
3875                        + life.new_rate
3876                        + life.growth_rate
3877                        + life.mature_rate
3878                        + life.at_risk_rate
3879                {
3880                    "at_risk"
3881                } else if roll2
3882                    < life.prospect_rate
3883                        + life.new_rate
3884                        + life.growth_rate
3885                        + life.mature_rate
3886                        + life.at_risk_rate
3887                        + life.churned_rate
3888                {
3889                    "churned"
3890                } else {
3891                    "won_back"
3892                };
3893                snap.customer_lifecycle_stages
3894                    .push((customer.customer_id.clone(), lifecycle.to_string()));
3895            }
3896        }
3897
3898        // --- Industry-specific metadata (minimal) ---
3899        let is = &self.config.industry_specific;
3900        if is.enabled {
3901            snap.industry_metadata.push(format!(
3902                "industry_specific.enabled=true (industry={:?})",
3903                self.config.global.industry
3904            ));
3905        }
3906
3907        snap
3908    }
3909
3910    // ========================================================================
3911    // Generation Phase Methods
3912    // ========================================================================
3913
3914    /// Phase 1: Generate Chart of Accounts and update statistics.
3915    fn phase_chart_of_accounts(
3916        &mut self,
3917        stats: &mut EnhancedGenerationStatistics,
3918    ) -> SynthResult<Arc<ChartOfAccounts>> {
3919        info!("Phase 1: Generating Chart of Accounts");
3920        let coa = self.generate_coa()?;
3921        stats.accounts_count = coa.account_count();
3922        info!(
3923            "Chart of Accounts generated: {} accounts",
3924            stats.accounts_count
3925        );
3926        self.check_resources_with_log("post-coa")?;
3927        Ok(coa)
3928    }
3929
3930    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
3931    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
3932        if self.phase_config.generate_master_data {
3933            info!("Phase 2: Generating Master Data");
3934            self.generate_master_data()?;
3935            stats.vendor_count = self.master_data.vendors.len();
3936            stats.customer_count = self.master_data.customers.len();
3937            stats.material_count = self.master_data.materials.len();
3938            stats.asset_count = self.master_data.assets.len();
3939            stats.employee_count = self.master_data.employees.len();
3940            info!(
3941                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
3942                stats.vendor_count, stats.customer_count, stats.material_count,
3943                stats.asset_count, stats.employee_count
3944            );
3945            self.check_resources_with_log("post-master-data")?;
3946        } else {
3947            debug!("Phase 2: Skipped (master data generation disabled)");
3948        }
3949        Ok(())
3950    }
3951
3952    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
3953    fn phase_document_flows(
3954        &mut self,
3955        stats: &mut EnhancedGenerationStatistics,
3956    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
3957        let mut document_flows = DocumentFlowSnapshot::default();
3958        let mut subledger = SubledgerSnapshot::default();
3959        // Dunning JEs (interest + charges) accumulated here and merged into the
3960        // main FA-JE list below so they appear in the GL.
3961        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
3962
3963        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
3964            info!("Phase 3: Generating Document Flows");
3965            self.generate_document_flows(&mut document_flows)?;
3966            stats.p2p_chain_count = document_flows.p2p_chains.len();
3967            stats.o2c_chain_count = document_flows.o2c_chains.len();
3968            info!(
3969                "Document flows generated: {} P2P chains, {} O2C chains",
3970                stats.p2p_chain_count, stats.o2c_chain_count
3971            );
3972
3973            // Phase 3b: Link document flows to subledgers (for data coherence)
3974            debug!("Phase 3b: Linking document flows to subledgers");
3975            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
3976            stats.ap_invoice_count = subledger.ap_invoices.len();
3977            stats.ar_invoice_count = subledger.ar_invoices.len();
3978            debug!(
3979                "Subledgers linked: {} AP invoices, {} AR invoices",
3980                stats.ap_invoice_count, stats.ar_invoice_count
3981            );
3982
3983            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
3984            // Without this step the subledger is systematically overstated because
3985            // amount_remaining is set at invoice creation and never reduced by
3986            // the payments that were generated in the document-flow phase.
3987            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
3988            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
3989            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
3990            debug!("Payment settlements applied to AP and AR subledgers");
3991
3992            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
3993            // The as-of date is the last day of the configured period.
3994            if let Ok(start_date) =
3995                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3996            {
3997                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
3998                    - chrono::Days::new(1);
3999                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4000                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4001                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4002                // derived from JE-level aggregation and will typically differ. This is a known
4003                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4004                // generated independently. A future reconciliation phase should align them by
4005                // using subledger totals as the authoritative source for BS Receivables.
4006                for company in &self.config.companies {
4007                    let ar_report = ARAgingReport::from_invoices(
4008                        company.code.clone(),
4009                        &subledger.ar_invoices,
4010                        as_of_date,
4011                    );
4012                    subledger.ar_aging_reports.push(ar_report);
4013
4014                    let ap_report = APAgingReport::from_invoices(
4015                        company.code.clone(),
4016                        &subledger.ap_invoices,
4017                        as_of_date,
4018                    );
4019                    subledger.ap_aging_reports.push(ap_report);
4020                }
4021                debug!(
4022                    "AR/AP aging reports built: {} AR, {} AP",
4023                    subledger.ar_aging_reports.len(),
4024                    subledger.ap_aging_reports.len()
4025                );
4026
4027                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4028                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4029                {
4030                    use datasynth_generators::DunningGenerator;
4031                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4032                    for company in &self.config.companies {
4033                        let currency = company.currency.as_str();
4034                        // Collect mutable references to AR invoices for this company
4035                        // (dunning generator updates dunning_info on invoices in-place).
4036                        let mut company_invoices: Vec<
4037                            datasynth_core::models::subledger::ar::ARInvoice,
4038                        > = subledger
4039                            .ar_invoices
4040                            .iter()
4041                            .filter(|inv| inv.company_code == company.code)
4042                            .cloned()
4043                            .collect();
4044
4045                        if company_invoices.is_empty() {
4046                            continue;
4047                        }
4048
4049                        let result = dunning_gen.execute_dunning_run(
4050                            &company.code,
4051                            as_of_date,
4052                            &mut company_invoices,
4053                            currency,
4054                        );
4055
4056                        // Write back updated dunning info to the main AR invoice list
4057                        for updated in &company_invoices {
4058                            if let Some(orig) = subledger
4059                                .ar_invoices
4060                                .iter_mut()
4061                                .find(|i| i.invoice_number == updated.invoice_number)
4062                            {
4063                                orig.dunning_info = updated.dunning_info.clone();
4064                            }
4065                        }
4066
4067                        subledger.dunning_runs.push(result.dunning_run);
4068                        subledger.dunning_letters.extend(result.letters);
4069                        // Dunning JEs (interest + charges) collected into local buffer.
4070                        dunning_journal_entries.extend(result.journal_entries);
4071                    }
4072                    debug!(
4073                        "Dunning runs complete: {} runs, {} letters",
4074                        subledger.dunning_runs.len(),
4075                        subledger.dunning_letters.len()
4076                    );
4077                }
4078            }
4079
4080            self.check_resources_with_log("post-document-flows")?;
4081        } else {
4082            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4083        }
4084
4085        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4086        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4087        if !self.master_data.assets.is_empty() {
4088            debug!("Generating FA subledger records");
4089            let company_code = self
4090                .config
4091                .companies
4092                .first()
4093                .map(|c| c.code.as_str())
4094                .unwrap_or("1000");
4095            let currency = self
4096                .config
4097                .companies
4098                .first()
4099                .map(|c| c.currency.as_str())
4100                .unwrap_or("USD");
4101
4102            let mut fa_gen = datasynth_generators::FAGenerator::new(
4103                datasynth_generators::FAGeneratorConfig::default(),
4104                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4105            );
4106
4107            for asset in &self.master_data.assets {
4108                let (record, je) = fa_gen.generate_asset_acquisition(
4109                    company_code,
4110                    &format!("{:?}", asset.asset_class),
4111                    &asset.description,
4112                    asset.acquisition_date,
4113                    currency,
4114                    asset.cost_center.as_deref(),
4115                );
4116                subledger.fa_records.push(record);
4117                fa_journal_entries.push(je);
4118            }
4119
4120            stats.fa_subledger_count = subledger.fa_records.len();
4121            debug!(
4122                "FA subledger records generated: {} (with {} acquisition JEs)",
4123                stats.fa_subledger_count,
4124                fa_journal_entries.len()
4125            );
4126        }
4127
4128        // Generate Inventory subledger records from master data materials
4129        if !self.master_data.materials.is_empty() {
4130            debug!("Generating Inventory subledger records");
4131            let first_company = self.config.companies.first();
4132            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4133            let inv_currency = first_company
4134                .map(|c| c.currency.clone())
4135                .unwrap_or_else(|| "USD".to_string());
4136
4137            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4138                datasynth_generators::InventoryGeneratorConfig::default(),
4139                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4140                inv_currency.clone(),
4141            );
4142
4143            for (i, material) in self.master_data.materials.iter().enumerate() {
4144                let plant = format!("PLANT{:02}", (i % 3) + 1);
4145                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4146                let initial_qty = rust_decimal::Decimal::from(
4147                    material
4148                        .safety_stock
4149                        .to_string()
4150                        .parse::<i64>()
4151                        .unwrap_or(100),
4152                );
4153
4154                let position = inv_gen.generate_position(
4155                    company_code,
4156                    &plant,
4157                    &storage_loc,
4158                    &material.material_id,
4159                    &material.description,
4160                    initial_qty,
4161                    Some(material.standard_cost),
4162                    &inv_currency,
4163                );
4164                subledger.inventory_positions.push(position);
4165            }
4166
4167            stats.inventory_subledger_count = subledger.inventory_positions.len();
4168            debug!(
4169                "Inventory subledger records generated: {}",
4170                stats.inventory_subledger_count
4171            );
4172        }
4173
4174        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4175        if !subledger.fa_records.is_empty() {
4176            if let Ok(start_date) =
4177                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4178            {
4179                let company_code = self
4180                    .config
4181                    .companies
4182                    .first()
4183                    .map(|c| c.code.as_str())
4184                    .unwrap_or("1000");
4185                let fiscal_year = start_date.year();
4186                let start_period = start_date.month();
4187                let end_period =
4188                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4189
4190                let depr_cfg = FaDepreciationScheduleConfig {
4191                    fiscal_year,
4192                    start_period,
4193                    end_period,
4194                    seed_offset: 800,
4195                };
4196                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4197                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4198                let run_count = runs.len();
4199                subledger.depreciation_runs = runs;
4200                debug!(
4201                    "Depreciation runs generated: {} runs for {} periods",
4202                    run_count, self.config.global.period_months
4203                );
4204            }
4205        }
4206
4207        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4208        if !subledger.inventory_positions.is_empty() {
4209            if let Ok(start_date) =
4210                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4211            {
4212                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4213                    - chrono::Days::new(1);
4214
4215                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4216                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4217
4218                for company in &self.config.companies {
4219                    let result = inv_val_gen.generate(
4220                        &company.code,
4221                        &subledger.inventory_positions,
4222                        as_of_date,
4223                    );
4224                    subledger.inventory_valuations.push(result);
4225                }
4226                debug!(
4227                    "Inventory valuations generated: {} company reports",
4228                    subledger.inventory_valuations.len()
4229                );
4230            }
4231        }
4232
4233        Ok((document_flows, subledger, fa_journal_entries))
4234    }
4235
4236    /// Phase 3c: Generate OCPM events from document flows.
4237    #[allow(clippy::too_many_arguments)]
4238    fn phase_ocpm_events(
4239        &mut self,
4240        document_flows: &DocumentFlowSnapshot,
4241        sourcing: &SourcingSnapshot,
4242        hr: &HrSnapshot,
4243        manufacturing: &ManufacturingSnapshot,
4244        banking: &BankingSnapshot,
4245        audit: &AuditSnapshot,
4246        financial_reporting: &FinancialReportingSnapshot,
4247        stats: &mut EnhancedGenerationStatistics,
4248    ) -> SynthResult<OcpmSnapshot> {
4249        let degradation = self.check_resources()?;
4250        if degradation >= DegradationLevel::Reduced {
4251            debug!(
4252                "Phase skipped due to resource pressure (degradation: {:?})",
4253                degradation
4254            );
4255            return Ok(OcpmSnapshot::default());
4256        }
4257        if self.phase_config.generate_ocpm_events {
4258            info!("Phase 3c: Generating OCPM Events");
4259            let ocpm_snapshot = self.generate_ocpm_events(
4260                document_flows,
4261                sourcing,
4262                hr,
4263                manufacturing,
4264                banking,
4265                audit,
4266                financial_reporting,
4267            )?;
4268            stats.ocpm_event_count = ocpm_snapshot.event_count;
4269            stats.ocpm_object_count = ocpm_snapshot.object_count;
4270            stats.ocpm_case_count = ocpm_snapshot.case_count;
4271            info!(
4272                "OCPM events generated: {} events, {} objects, {} cases",
4273                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4274            );
4275            self.check_resources_with_log("post-ocpm")?;
4276            Ok(ocpm_snapshot)
4277        } else {
4278            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4279            Ok(OcpmSnapshot::default())
4280        }
4281    }
4282
4283    /// Phase 4: Generate journal entries from document flows and standalone generation.
4284    fn phase_journal_entries(
4285        &mut self,
4286        coa: &Arc<ChartOfAccounts>,
4287        document_flows: &DocumentFlowSnapshot,
4288        _stats: &mut EnhancedGenerationStatistics,
4289    ) -> SynthResult<Vec<JournalEntry>> {
4290        let mut entries = Vec::new();
4291
4292        // Phase 4a: Generate JEs from document flows (for data coherence)
4293        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4294            debug!("Phase 4a: Generating JEs from document flows");
4295            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4296            debug!("Generated {} JEs from document flows", flow_entries.len());
4297            entries.extend(flow_entries);
4298        }
4299
4300        // Phase 4b: Generate standalone journal entries
4301        if self.phase_config.generate_journal_entries {
4302            info!("Phase 4: Generating Journal Entries");
4303            let je_entries = self.generate_journal_entries(coa)?;
4304            info!("Generated {} standalone journal entries", je_entries.len());
4305            entries.extend(je_entries);
4306        } else {
4307            debug!("Phase 4: Skipped (journal entry generation disabled)");
4308        }
4309
4310        // Phase 4c (shard mode): inject pre-built IC journal entries from
4311        // `ShardContext`. When running standalone (no group engine), this
4312        // is a no-op. See crate::shard_context::ShardContext for rationale.
4313        if let Some(ctx) = &self.shard_context {
4314            if !ctx.extra_journal_entries.is_empty() {
4315                debug!(
4316                    "Phase 4c: appending {} shard-mode IC journal entries",
4317                    ctx.extra_journal_entries.len()
4318                );
4319                entries.extend(ctx.extra_journal_entries.iter().cloned());
4320            }
4321        }
4322
4323        if !entries.is_empty() {
4324            // Note: stats.total_entries/total_line_items are set in generate()
4325            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4326            self.check_resources_with_log("post-journal-entries")?;
4327        }
4328
4329        Ok(entries)
4330    }
4331
4332    /// Phase 5: Inject anomalies into journal entries.
4333    fn phase_anomaly_injection(
4334        &mut self,
4335        entries: &mut [JournalEntry],
4336        actions: &DegradationActions,
4337        stats: &mut EnhancedGenerationStatistics,
4338    ) -> SynthResult<AnomalyLabels> {
4339        if self.phase_config.inject_anomalies
4340            && !entries.is_empty()
4341            && !actions.skip_anomaly_injection
4342        {
4343            info!("Phase 5: Injecting Anomalies");
4344            let result = self.inject_anomalies(entries)?;
4345            stats.anomalies_injected = result.labels.len();
4346            info!("Injected {} anomalies", stats.anomalies_injected);
4347            self.check_resources_with_log("post-anomaly-injection")?;
4348            Ok(result)
4349        } else if actions.skip_anomaly_injection {
4350            warn!("Phase 5: Skipped due to resource degradation");
4351            Ok(AnomalyLabels::default())
4352        } else {
4353            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4354            Ok(AnomalyLabels::default())
4355        }
4356    }
4357
4358    /// Phase 6: Validate balance sheet equation on journal entries.
4359    fn phase_balance_validation(
4360        &mut self,
4361        entries: &[JournalEntry],
4362    ) -> SynthResult<BalanceValidationResult> {
4363        if self.phase_config.validate_balances && !entries.is_empty() {
4364            debug!("Phase 6: Validating Balances");
4365            let balance_validation = self.validate_journal_entries(entries)?;
4366            if balance_validation.is_balanced {
4367                debug!("Balance validation passed");
4368            } else {
4369                warn!(
4370                    "Balance validation found {} errors",
4371                    balance_validation.validation_errors.len()
4372                );
4373            }
4374            Ok(balance_validation)
4375        } else {
4376            Ok(BalanceValidationResult::default())
4377        }
4378    }
4379
4380    /// Validate that every `gl_account` referenced in `entries` exists in the
4381    /// chart of accounts.
4382    ///
4383    /// Always emits a warn-level log when the COA is missing accounts; in
4384    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4385    /// `SynthError::generation` so the caller can fail fast.
4386    fn validate_coa_coverage(
4387        &self,
4388        entries: &[JournalEntry],
4389        coa: &ChartOfAccounts,
4390    ) -> SynthResult<()> {
4391        if entries.is_empty() {
4392            return Ok(());
4393        }
4394        let coa_set: std::collections::HashSet<&str> = coa
4395            .accounts
4396            .iter()
4397            .map(|a| a.account_number.as_str())
4398            .collect();
4399        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4400        for je in entries {
4401            for line in je.lines.iter() {
4402                if !coa_set.contains(line.gl_account.as_str()) {
4403                    missing.insert(line.gl_account.clone());
4404                }
4405            }
4406        }
4407        if missing.is_empty() {
4408            debug!("COA coverage validation passed");
4409            return Ok(());
4410        }
4411        let msg = format!(
4412            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4413            missing.len(),
4414            missing.iter().take(10).collect::<Vec<_>>()
4415        );
4416        if self.phase_config.validate_coa_coverage_strict {
4417            Err(SynthError::generation(msg))
4418        } else {
4419            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4420            Ok(())
4421        }
4422    }
4423
4424    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4425    fn phase_data_quality_injection(
4426        &mut self,
4427        entries: &mut [JournalEntry],
4428        actions: &DegradationActions,
4429        stats: &mut EnhancedGenerationStatistics,
4430    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4431        if self.phase_config.inject_data_quality
4432            && !entries.is_empty()
4433            && !actions.skip_data_quality
4434        {
4435            info!("Phase 7: Injecting Data Quality Variations");
4436            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4437            stats.data_quality_issues = dq_stats.records_with_issues;
4438            info!("Injected {} data quality issues", stats.data_quality_issues);
4439            self.check_resources_with_log("post-data-quality")?;
4440            Ok((dq_stats, quality_issues))
4441        } else if actions.skip_data_quality {
4442            warn!("Phase 7: Skipped due to resource degradation");
4443            // v4.4.1: report the denominator (entries seen) even when
4444            // injection is skipped, so downstream consumers can tell
4445            // "skipped, 0/N" apart from "ran but found nothing".
4446            Ok((stats_with_denominator(entries.len()), Vec::new()))
4447        } else {
4448            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4449            Ok((stats_with_denominator(entries.len()), Vec::new()))
4450        }
4451    }
4452
4453    /// Phase 10b: Generate period-close journal entries.
4454    ///
4455    /// Generates:
4456    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4457    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4458    ///    for the configured period.
4459    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4460    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4461    ///    earnings via the Income Summary (3600) clearing account.
4462    fn phase_period_close(
4463        &mut self,
4464        entries: &mut Vec<JournalEntry>,
4465        subledger: &SubledgerSnapshot,
4466        stats: &mut EnhancedGenerationStatistics,
4467    ) -> SynthResult<()> {
4468        if !self.phase_config.generate_period_close || entries.is_empty() {
4469            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4470            return Ok(());
4471        }
4472
4473        info!("Phase 10b: Generating period-close journal entries");
4474
4475        use datasynth_core::accounts::{
4476            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4477        };
4478        use rust_decimal::Decimal;
4479
4480        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4481            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4482        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4483        // Posting date for close entries is the last day of the period
4484        let close_date = end_date - chrono::Days::new(1);
4485
4486        // Statutory tax rate (21% — configurable rates come in later tiers)
4487        let tax_rate = Decimal::new(21, 2); // 0.21
4488
4489        // Collect company codes from config
4490        let company_codes: Vec<String> = self
4491            .config
4492            .companies
4493            .iter()
4494            .map(|c| c.code.clone())
4495            .collect();
4496
4497        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4498        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4499        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4500
4501        // --- Depreciation JEs (per asset) ---
4502        // Compute period depreciation for each active fixed asset using straight-line method.
4503        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4504        let period_months = self.config.global.period_months;
4505        for asset in &subledger.fa_records {
4506            // Skip assets that are inactive / fully depreciated / non-depreciable
4507            use datasynth_core::models::subledger::fa::AssetStatus;
4508            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4509                continue;
4510            }
4511            let useful_life_months = asset.useful_life_months();
4512            if useful_life_months == 0 {
4513                // Land or CIP — not depreciated
4514                continue;
4515            }
4516            let salvage_value = asset.salvage_value();
4517            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4518            if depreciable_base == Decimal::ZERO {
4519                continue;
4520            }
4521            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4522                * Decimal::from(period_months))
4523            .round_dp(2);
4524            if period_depr <= Decimal::ZERO {
4525                continue;
4526            }
4527
4528            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4529            depr_header.document_type = "CL".to_string();
4530            depr_header.header_text = Some(format!(
4531                "Depreciation - {} {}",
4532                asset.asset_number, asset.description
4533            ));
4534            depr_header.created_by = "CLOSE_ENGINE".to_string();
4535            depr_header.source = TransactionSource::Automated;
4536            depr_header.business_process = Some(BusinessProcess::R2R);
4537
4538            let doc_id = depr_header.document_id;
4539            let mut depr_je = JournalEntry::new(depr_header);
4540
4541            // DR Depreciation Expense (6000)
4542            depr_je.add_line(JournalEntryLine::debit(
4543                doc_id,
4544                1,
4545                expense_accounts::DEPRECIATION.to_string(),
4546                period_depr,
4547            ));
4548            // CR Accumulated Depreciation (1510)
4549            depr_je.add_line(JournalEntryLine::credit(
4550                doc_id,
4551                2,
4552                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4553                period_depr,
4554            ));
4555
4556            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4557            close_jes.push(depr_je);
4558        }
4559
4560        if !subledger.fa_records.is_empty() {
4561            debug!(
4562                "Generated {} depreciation JEs from {} FA records",
4563                close_jes.len(),
4564                subledger.fa_records.len()
4565            );
4566        }
4567
4568        // --- Accrual entries (standard period-end accruals per company) ---
4569        // Generate standard accrued expense entries (utilities, rent, interest) using
4570        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4571        {
4572            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4573            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4574            // v3.4.3: snap reversal dates to business days. No-op when
4575            // temporal_patterns.business_days is disabled.
4576            if let Some(ctx) = &self.temporal_context {
4577                accrual_gen.set_temporal_context(Arc::clone(ctx));
4578            }
4579
4580            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4581            let accrual_items: &[(&str, &str, &str)] = &[
4582                ("Accrued Utilities", "6200", "2100"),
4583                ("Accrued Rent", "6300", "2100"),
4584                ("Accrued Interest", "6100", "2150"),
4585            ];
4586
4587            for company_code in &company_codes {
4588                // Estimate company revenue from existing JEs
4589                let company_revenue: Decimal = entries
4590                    .iter()
4591                    .filter(|e| e.header.company_code == *company_code)
4592                    .flat_map(|e| e.lines.iter())
4593                    .filter(|l| l.gl_account.starts_with('4'))
4594                    .map(|l| l.credit_amount - l.debit_amount)
4595                    .fold(Decimal::ZERO, |acc, v| acc + v);
4596
4597                if company_revenue <= Decimal::ZERO {
4598                    continue;
4599                }
4600
4601                // Use 0.5% of period revenue per accrual item as a proxy
4602                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4603                if accrual_base <= Decimal::ZERO {
4604                    continue;
4605                }
4606
4607                for (description, expense_acct, liability_acct) in accrual_items {
4608                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4609                        company_code,
4610                        description,
4611                        accrual_base,
4612                        expense_acct,
4613                        liability_acct,
4614                        close_date,
4615                        None,
4616                    );
4617                    close_jes.push(accrual_je);
4618                    if let Some(rev_je) = reversal_je {
4619                        close_jes.push(rev_je);
4620                    }
4621                }
4622            }
4623
4624            debug!(
4625                "Generated accrual entries for {} companies",
4626                company_codes.len()
4627            );
4628        }
4629
4630        for company_code in &company_codes {
4631            // Calculate net income for this company from existing JEs:
4632            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4633            // Revenue (4xxx): credit-normal, so net = credits - debits
4634            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4635            let mut total_revenue = Decimal::ZERO;
4636            let mut total_expenses = Decimal::ZERO;
4637
4638            for entry in entries.iter() {
4639                if entry.header.company_code != *company_code {
4640                    continue;
4641                }
4642                for line in &entry.lines {
4643                    let category = AccountCategory::from_account(&line.gl_account);
4644                    match category {
4645                        AccountCategory::Revenue => {
4646                            // Revenue is credit-normal: net revenue = credits - debits
4647                            total_revenue += line.credit_amount - line.debit_amount;
4648                        }
4649                        AccountCategory::Cogs
4650                        | AccountCategory::OperatingExpense
4651                        | AccountCategory::OtherIncomeExpense
4652                        | AccountCategory::Tax => {
4653                            // Expenses are debit-normal: net expense = debits - credits
4654                            total_expenses += line.debit_amount - line.credit_amount;
4655                        }
4656                        _ => {}
4657                    }
4658                }
4659            }
4660
4661            let pre_tax_income = total_revenue - total_expenses;
4662
4663            // Skip if no income statement activity
4664            if pre_tax_income == Decimal::ZERO {
4665                debug!(
4666                    "Company {}: no pre-tax income, skipping period close",
4667                    company_code
4668                );
4669                continue;
4670            }
4671
4672            // --- Tax provision / DTA JE ---
4673            if pre_tax_income > Decimal::ZERO {
4674                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4675                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4676
4677                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4678                tax_header.document_type = "CL".to_string();
4679                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4680                tax_header.created_by = "CLOSE_ENGINE".to_string();
4681                tax_header.source = TransactionSource::Automated;
4682                tax_header.business_process = Some(BusinessProcess::R2R);
4683
4684                let doc_id = tax_header.document_id;
4685                let mut tax_je = JournalEntry::new(tax_header);
4686
4687                // DR Tax Expense (8000)
4688                tax_je.add_line(JournalEntryLine::debit(
4689                    doc_id,
4690                    1,
4691                    tax_accounts::TAX_EXPENSE.to_string(),
4692                    tax_amount,
4693                ));
4694                // CR Income Tax Payable (2130)
4695                tax_je.add_line(JournalEntryLine::credit(
4696                    doc_id,
4697                    2,
4698                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4699                    tax_amount,
4700                ));
4701
4702                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4703                close_jes.push(tax_je);
4704            } else {
4705                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4706                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4707                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4708                if dta_amount > Decimal::ZERO {
4709                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4710                    dta_header.document_type = "CL".to_string();
4711                    dta_header.header_text =
4712                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4713                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4714                    dta_header.source = TransactionSource::Automated;
4715                    dta_header.business_process = Some(BusinessProcess::R2R);
4716
4717                    let doc_id = dta_header.document_id;
4718                    let mut dta_je = JournalEntry::new(dta_header);
4719
4720                    // DR Deferred Tax Asset (1600)
4721                    dta_je.add_line(JournalEntryLine::debit(
4722                        doc_id,
4723                        1,
4724                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4725                        dta_amount,
4726                    ));
4727                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4728                    // reflecting the benefit of the future deductible temporary difference.
4729                    dta_je.add_line(JournalEntryLine::credit(
4730                        doc_id,
4731                        2,
4732                        tax_accounts::TAX_EXPENSE.to_string(),
4733                        dta_amount,
4734                    ));
4735
4736                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4737                    close_jes.push(dta_je);
4738                    debug!(
4739                        "Company {}: loss year — recognised DTA of {}",
4740                        company_code, dta_amount
4741                    );
4742                }
4743            }
4744
4745            // --- Dividend JEs (v2.4) ---
4746            // If the entity is profitable after tax, declare a 10% dividend payout.
4747            // This runs AFTER tax provision so the dividend is based on post-tax income
4748            // but BEFORE the retained earnings close so the RE transfer reflects the
4749            // reduced balance.
4750            let tax_provision = if pre_tax_income > Decimal::ZERO {
4751                (pre_tax_income * tax_rate).round_dp(2)
4752            } else {
4753                Decimal::ZERO
4754            };
4755            let net_income = pre_tax_income - tax_provision;
4756
4757            if net_income > Decimal::ZERO {
4758                use datasynth_generators::DividendGenerator;
4759                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
4760                let mut div_gen = DividendGenerator::new(self.seed + 460);
4761                let currency_str = self
4762                    .config
4763                    .companies
4764                    .iter()
4765                    .find(|c| c.code == *company_code)
4766                    .map(|c| c.currency.as_str())
4767                    .unwrap_or("USD");
4768                let div_result = div_gen.generate(
4769                    company_code,
4770                    close_date,
4771                    Decimal::new(1, 0), // $1 per share placeholder
4772                    dividend_amount,
4773                    currency_str,
4774                );
4775                let div_je_count = div_result.journal_entries.len();
4776                close_jes.extend(div_result.journal_entries);
4777                debug!(
4778                    "Company {}: declared dividend of {} ({} JEs)",
4779                    company_code, dividend_amount, div_je_count
4780                );
4781            }
4782
4783            // --- Income statement closing JE ---
4784            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
4785            // For a loss year the DTA JE above already recognises the deferred benefit; here we
4786            // close the pre-tax loss into Retained Earnings as-is.
4787            if net_income != Decimal::ZERO {
4788                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
4789                close_header.document_type = "CL".to_string();
4790                close_header.header_text =
4791                    Some(format!("Income statement close - {}", company_code));
4792                close_header.created_by = "CLOSE_ENGINE".to_string();
4793                close_header.source = TransactionSource::Automated;
4794                close_header.business_process = Some(BusinessProcess::R2R);
4795
4796                let doc_id = close_header.document_id;
4797                let mut close_je = JournalEntry::new(close_header);
4798
4799                let abs_net_income = net_income.abs();
4800
4801                if net_income > Decimal::ZERO {
4802                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
4803                    close_je.add_line(JournalEntryLine::debit(
4804                        doc_id,
4805                        1,
4806                        equity_accounts::INCOME_SUMMARY.to_string(),
4807                        abs_net_income,
4808                    ));
4809                    close_je.add_line(JournalEntryLine::credit(
4810                        doc_id,
4811                        2,
4812                        equity_accounts::RETAINED_EARNINGS.to_string(),
4813                        abs_net_income,
4814                    ));
4815                } else {
4816                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
4817                    close_je.add_line(JournalEntryLine::debit(
4818                        doc_id,
4819                        1,
4820                        equity_accounts::RETAINED_EARNINGS.to_string(),
4821                        abs_net_income,
4822                    ));
4823                    close_je.add_line(JournalEntryLine::credit(
4824                        doc_id,
4825                        2,
4826                        equity_accounts::INCOME_SUMMARY.to_string(),
4827                        abs_net_income,
4828                    ));
4829                }
4830
4831                debug_assert!(
4832                    close_je.is_balanced(),
4833                    "Income statement closing JE must be balanced"
4834                );
4835                close_jes.push(close_je);
4836            }
4837        }
4838
4839        let close_count = close_jes.len();
4840        if close_count > 0 {
4841            info!("Generated {} period-close journal entries", close_count);
4842            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
4843            entries.extend(close_jes);
4844            stats.period_close_je_count = close_count;
4845
4846            // Update total entry/line-item stats
4847            stats.total_entries = entries.len() as u64;
4848            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
4849        } else {
4850            debug!("No period-close entries generated (no income statement activity)");
4851        }
4852
4853        Ok(())
4854    }
4855
4856    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
4857    fn phase_audit_data(
4858        &mut self,
4859        entries: &[JournalEntry],
4860        stats: &mut EnhancedGenerationStatistics,
4861    ) -> SynthResult<AuditSnapshot> {
4862        if self.phase_config.generate_audit {
4863            info!("Phase 8: Generating Audit Data");
4864            let audit_snapshot = self.generate_audit_data(entries)?;
4865            stats.audit_engagement_count = audit_snapshot.engagements.len();
4866            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
4867            stats.audit_evidence_count = audit_snapshot.evidence.len();
4868            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
4869            stats.audit_finding_count = audit_snapshot.findings.len();
4870            stats.audit_judgment_count = audit_snapshot.judgments.len();
4871            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
4872            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
4873            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
4874            stats.audit_sample_count = audit_snapshot.samples.len();
4875            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
4876            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
4877            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
4878            stats.audit_related_party_count = audit_snapshot.related_parties.len();
4879            stats.audit_related_party_transaction_count =
4880                audit_snapshot.related_party_transactions.len();
4881            info!(
4882                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
4883                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
4884                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
4885                 {} RP transactions",
4886                stats.audit_engagement_count,
4887                stats.audit_workpaper_count,
4888                stats.audit_evidence_count,
4889                stats.audit_risk_count,
4890                stats.audit_finding_count,
4891                stats.audit_judgment_count,
4892                stats.audit_confirmation_count,
4893                stats.audit_procedure_step_count,
4894                stats.audit_sample_count,
4895                stats.audit_analytical_result_count,
4896                stats.audit_ia_function_count,
4897                stats.audit_ia_report_count,
4898                stats.audit_related_party_count,
4899                stats.audit_related_party_transaction_count,
4900            );
4901            self.check_resources_with_log("post-audit")?;
4902            Ok(audit_snapshot)
4903        } else {
4904            debug!("Phase 8: Skipped (audit generation disabled)");
4905            Ok(AuditSnapshot::default())
4906        }
4907    }
4908
4909    /// Phase 9: Generate banking KYC/AML data.
4910    fn phase_banking_data(
4911        &mut self,
4912        stats: &mut EnhancedGenerationStatistics,
4913    ) -> SynthResult<BankingSnapshot> {
4914        if self.phase_config.generate_banking {
4915            info!("Phase 9: Generating Banking KYC/AML Data");
4916            let banking_snapshot = self.generate_banking_data()?;
4917            stats.banking_customer_count = banking_snapshot.customers.len();
4918            stats.banking_account_count = banking_snapshot.accounts.len();
4919            stats.banking_transaction_count = banking_snapshot.transactions.len();
4920            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
4921            info!(
4922                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
4923                stats.banking_customer_count, stats.banking_account_count,
4924                stats.banking_transaction_count, stats.banking_suspicious_count
4925            );
4926            self.check_resources_with_log("post-banking")?;
4927            Ok(banking_snapshot)
4928        } else {
4929            debug!("Phase 9: Skipped (banking generation disabled)");
4930            Ok(BankingSnapshot::default())
4931        }
4932    }
4933
4934    /// Phase 10: Export accounting network graphs for ML training.
4935    fn phase_graph_export(
4936        &mut self,
4937        entries: &[JournalEntry],
4938        coa: &Arc<ChartOfAccounts>,
4939        stats: &mut EnhancedGenerationStatistics,
4940    ) -> SynthResult<GraphExportSnapshot> {
4941        if self.phase_config.generate_graph_export && !entries.is_empty() {
4942            info!("Phase 10: Exporting Accounting Network Graphs");
4943            match self.export_graphs(entries, coa, stats) {
4944                Ok(snapshot) => {
4945                    info!(
4946                        "Graph export complete: {} graphs ({} nodes, {} edges)",
4947                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
4948                    );
4949                    Ok(snapshot)
4950                }
4951                Err(e) => {
4952                    warn!("Phase 10: Graph export failed: {}", e);
4953                    Ok(GraphExportSnapshot::default())
4954                }
4955            }
4956        } else {
4957            debug!("Phase 10: Skipped (graph export disabled or no entries)");
4958            Ok(GraphExportSnapshot::default())
4959        }
4960    }
4961
4962    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
4963    #[allow(clippy::too_many_arguments)]
4964    fn phase_hypergraph_export(
4965        &self,
4966        coa: &Arc<ChartOfAccounts>,
4967        entries: &[JournalEntry],
4968        document_flows: &DocumentFlowSnapshot,
4969        sourcing: &SourcingSnapshot,
4970        hr: &HrSnapshot,
4971        manufacturing: &ManufacturingSnapshot,
4972        banking: &BankingSnapshot,
4973        audit: &AuditSnapshot,
4974        financial_reporting: &FinancialReportingSnapshot,
4975        ocpm: &OcpmSnapshot,
4976        compliance: &ComplianceRegulationsSnapshot,
4977        stats: &mut EnhancedGenerationStatistics,
4978    ) -> SynthResult<()> {
4979        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
4980            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
4981            match self.export_hypergraph(
4982                coa,
4983                entries,
4984                document_flows,
4985                sourcing,
4986                hr,
4987                manufacturing,
4988                banking,
4989                audit,
4990                financial_reporting,
4991                ocpm,
4992                compliance,
4993                stats,
4994            ) {
4995                Ok(info) => {
4996                    info!(
4997                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
4998                        info.node_count, info.edge_count, info.hyperedge_count
4999                    );
5000                }
5001                Err(e) => {
5002                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5003                }
5004            }
5005        } else {
5006            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5007        }
5008        Ok(())
5009    }
5010
5011    /// Phase 11: LLM Enrichment.
5012    ///
5013    /// Uses an LLM provider (mock by default) to enrich vendor names with
5014    /// realistic, context-aware names. This phase is non-blocking: failures
5015    /// log a warning but do not stop the generation pipeline.
5016    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5017        if !self.config.llm.enabled {
5018            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5019            return;
5020        }
5021
5022        info!("Phase 11: Starting LLM Enrichment");
5023        let start = std::time::Instant::now();
5024
5025        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5026            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5027            // and the corresponding API key environment variable is present.
5028            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5029                let schema_provider = &self.config.llm.provider;
5030                let api_key_env = match schema_provider.as_str() {
5031                    "openai" => Some("OPENAI_API_KEY"),
5032                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5033                    "custom" => Some("LLM_API_KEY"),
5034                    _ => None,
5035                };
5036                if let Some(key_env) = api_key_env {
5037                    if std::env::var(key_env).is_ok() {
5038                        let llm_config = datasynth_core::llm::LlmConfig {
5039                            model: self.config.llm.model.clone(),
5040                            api_key_env: key_env.to_string(),
5041                            ..datasynth_core::llm::LlmConfig::default()
5042                        };
5043                        match HttpLlmProvider::new(llm_config) {
5044                            Ok(p) => Arc::new(p),
5045                            Err(e) => {
5046                                warn!(
5047                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5048                                    e
5049                                );
5050                                Arc::new(MockLlmProvider::new(self.seed))
5051                            }
5052                        }
5053                    } else {
5054                        Arc::new(MockLlmProvider::new(self.seed))
5055                    }
5056                } else {
5057                    Arc::new(MockLlmProvider::new(self.seed))
5058                }
5059            };
5060            // v4.1.1+: multi-category enrichment. Vendors remain the
5061            // default path; customers and materials opt in via
5062            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5063            let industry = format!("{:?}", self.config.global.industry);
5064
5065            let vendor_enricher =
5066                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5067            let max_vendors = self
5068                .config
5069                .llm
5070                .max_vendor_enrichments
5071                .min(self.master_data.vendors.len());
5072            let mut vendors_enriched = 0usize;
5073            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5074                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5075                    Ok(name) => {
5076                        vendor.name = name;
5077                        vendors_enriched += 1;
5078                    }
5079                    Err(e) => warn!(
5080                        "LLM vendor enrichment failed for {}: {}",
5081                        vendor.vendor_id, e
5082                    ),
5083                }
5084            }
5085
5086            let mut customers_enriched = 0usize;
5087            if self.config.llm.enrich_customers {
5088                let customer_enricher =
5089                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5090                        &provider,
5091                    ));
5092                let max_customers = self
5093                    .config
5094                    .llm
5095                    .max_customer_enrichments
5096                    .min(self.master_data.customers.len());
5097                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5098                    match customer_enricher.enrich_customer_name(
5099                        &industry,
5100                        "general",
5101                        &customer.country,
5102                    ) {
5103                        Ok(name) => {
5104                            customer.name = name;
5105                            customers_enriched += 1;
5106                        }
5107                        Err(e) => warn!(
5108                            "LLM customer enrichment failed for {}: {}",
5109                            customer.customer_id, e
5110                        ),
5111                    }
5112                }
5113            }
5114
5115            let mut materials_enriched = 0usize;
5116            if self.config.llm.enrich_materials {
5117                let material_enricher =
5118                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5119                        &provider,
5120                    ));
5121                let max_materials = self
5122                    .config
5123                    .llm
5124                    .max_material_enrichments
5125                    .min(self.master_data.materials.len());
5126                for material in self.master_data.materials.iter_mut().take(max_materials) {
5127                    let material_type = format!("{:?}", material.material_type);
5128                    match material_enricher.enrich_material_description(&material_type, &industry) {
5129                        Ok(desc) => {
5130                            material.description = desc;
5131                            materials_enriched += 1;
5132                        }
5133                        Err(e) => warn!(
5134                            "LLM material enrichment failed for {}: {}",
5135                            material.material_id, e
5136                        ),
5137                    }
5138                }
5139            }
5140
5141            (vendors_enriched, customers_enriched, materials_enriched)
5142        }));
5143
5144        match result {
5145            Ok((v, c, m)) => {
5146                stats.llm_vendors_enriched = v;
5147                stats.llm_customers_enriched = c;
5148                stats.llm_materials_enriched = m;
5149                let elapsed = start.elapsed();
5150                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5151                info!(
5152                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5153                    v, c, m, stats.llm_enrichment_ms
5154                );
5155            }
5156            Err(_) => {
5157                let elapsed = start.elapsed();
5158                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5159                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5160            }
5161        }
5162    }
5163
5164    /// Phase 12: Diffusion Enhancement.
5165    ///
5166    /// Generates a sample set matching distribution properties from the
5167    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5168    /// - `"statistical"` (default) — moment-matching backend, always fast.
5169    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5170    ///   the `neural` Cargo feature; falls back to statistical when the
5171    ///   feature isn't compiled in, with a loud warning.
5172    ///
5173    /// This phase is non-blocking: failures log a warning but do not
5174    /// stop the pipeline.
5175    fn phase_diffusion_enhancement(
5176        &self,
5177        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5178        stats: &mut EnhancedGenerationStatistics,
5179    ) {
5180        if !self.config.diffusion.enabled {
5181            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5182            return;
5183        }
5184
5185        info!("Phase 12: Starting Diffusion Enhancement");
5186        let start = std::time::Instant::now();
5187
5188        let backend_choice = self.config.diffusion.backend.as_str();
5189        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5190
5191        if use_neural {
5192            #[cfg(feature = "neural")]
5193            {
5194                match self.run_neural_diffusion_phase(entries) {
5195                    Ok(sample_count) => {
5196                        stats.diffusion_samples_generated = sample_count;
5197                        let elapsed = start.elapsed();
5198                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5199                        info!(
5200                            "Phase 12 complete ({}): {} samples in {}ms",
5201                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5202                        );
5203                        return;
5204                    }
5205                    Err(e) => {
5206                        warn!(
5207                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5208                        );
5209                        // Fall through to statistical path below.
5210                    }
5211                }
5212            }
5213            #[cfg(not(feature = "neural"))]
5214            {
5215                warn!(
5216                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5217                     not compiled in — falling back to statistical. Rebuild with \
5218                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5219                    backend_choice
5220                );
5221            }
5222        } else if !matches!(backend_choice, "statistical" | "") {
5223            warn!(
5224                "Phase 12: unknown backend '{}', falling back to statistical",
5225                backend_choice
5226            );
5227        }
5228
5229        // Statistical path (default + fallback).
5230        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5231            let means = vec![5000.0, 3.0, 2.0];
5232            let stds = vec![2000.0, 1.5, 1.0];
5233
5234            let diffusion_config = DiffusionConfig {
5235                n_steps: self.config.diffusion.n_steps,
5236                seed: self.seed,
5237                ..Default::default()
5238            };
5239
5240            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5241            let n_samples = self.config.diffusion.sample_size;
5242            let n_features = 3;
5243            backend.generate(n_samples, n_features, self.seed).len()
5244        }));
5245
5246        match result {
5247            Ok(sample_count) => {
5248                stats.diffusion_samples_generated = sample_count;
5249                let elapsed = start.elapsed();
5250                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5251                info!(
5252                    "Phase 12 complete (statistical): {} samples in {}ms",
5253                    sample_count, stats.diffusion_enhancement_ms
5254                );
5255            }
5256            Err(_) => {
5257                let elapsed = start.elapsed();
5258                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5259                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5260            }
5261        }
5262    }
5263
5264    /// Neural-backend execution — either load a pre-trained checkpoint
5265    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5266    /// from the first batch of JE amounts. Returns the sample count
5267    /// produced; any error bubbles up to the statistical fallback.
5268    #[cfg(feature = "neural")]
5269    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5270        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5271
5272        if entries.is_empty() {
5273            return Err(SynthError::generation(
5274                "neural diffusion: no journal entries available as training data",
5275            ));
5276        }
5277
5278        let training_data: Vec<Vec<f64>> = entries
5279            .iter()
5280            .take(5000)
5281            .map(|je| {
5282                let total_amount: f64 = je
5283                    .lines
5284                    .iter()
5285                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5286                    .map(|l| {
5287                        use rust_decimal::prelude::ToPrimitive;
5288                        l.debit_amount.to_f64().unwrap_or(0.0)
5289                    })
5290                    .sum();
5291                let line_count = je.lines.len() as f64;
5292                // Use the approval-workflow depth as the third feature
5293                // (proxy for complexity / risk). `None` → 1.
5294                let approval_level = je
5295                    .header
5296                    .approval_workflow
5297                    .as_ref()
5298                    .map(|w| w.required_levels as f64)
5299                    .unwrap_or(1.0);
5300                vec![total_amount, line_count, approval_level]
5301            })
5302            .collect();
5303
5304        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5305
5306        let cfg = &self.config.diffusion;
5307        let neural_cfg = &cfg.neural;
5308
5309        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5310            neural_cfg.checkpoint_path.as_ref()
5311        {
5312            let path = std::path::Path::new(ckpt_path);
5313            info!(
5314                "  Neural diffusion: loading checkpoint from {}",
5315                path.display()
5316            );
5317            NeuralDiffusionBackend::load(path)
5318                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5319        } else {
5320            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5321            info!(
5322                "  Neural diffusion: training score network on {} rows × {} features, \
5323                     {} epochs, hidden_dims={:?}",
5324                training_data.len(),
5325                n_features,
5326                neural_cfg.training_epochs,
5327                neural_cfg.hidden_dims
5328            );
5329            let training_config = NeuralTrainingConfig {
5330                n_steps: cfg.n_steps,
5331                schedule: cfg.schedule.clone(),
5332                hidden_dims: neural_cfg.hidden_dims.clone(),
5333                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5334                learning_rate: neural_cfg.learning_rate,
5335                epochs: neural_cfg.training_epochs,
5336                batch_size: neural_cfg.batch_size,
5337            };
5338            let (backend, report) =
5339                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5340                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5341            info!(
5342                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5343                report.epochs_completed, report.final_loss
5344            );
5345            backend
5346        };
5347
5348        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5349        Ok(samples.len())
5350    }
5351
5352    /// Phase 13: Causal Overlay.
5353    ///
5354    /// Builds a structural causal model from a built-in template (e.g.,
5355    /// fraud_detection) and generates causal samples. Optionally validates
5356    /// that the output respects the causal structure. This phase is
5357    /// non-blocking: failures log a warning but do not stop the pipeline.
5358    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5359        if !self.config.causal.enabled {
5360            debug!("Phase 13: Skipped (causal generation disabled)");
5361            return;
5362        }
5363
5364        info!("Phase 13: Starting Causal Overlay");
5365        let start = std::time::Instant::now();
5366
5367        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5368            // Select template based on config
5369            let graph = match self.config.causal.template.as_str() {
5370                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5371                _ => CausalGraph::fraud_detection_template(),
5372            };
5373
5374            let scm = StructuralCausalModel::new(graph.clone())
5375                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5376
5377            let n_samples = self.config.causal.sample_size;
5378            let samples = scm
5379                .generate(n_samples, self.seed)
5380                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5381
5382            // Optionally validate causal structure
5383            let validation_passed = if self.config.causal.validate {
5384                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5385                if report.valid {
5386                    info!(
5387                        "Causal validation passed: all {} checks OK",
5388                        report.checks.len()
5389                    );
5390                } else {
5391                    warn!(
5392                        "Causal validation: {} violations detected: {:?}",
5393                        report.violations.len(),
5394                        report.violations
5395                    );
5396                }
5397                Some(report.valid)
5398            } else {
5399                None
5400            };
5401
5402            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5403        }));
5404
5405        match result {
5406            Ok(Ok((sample_count, validation_passed))) => {
5407                stats.causal_samples_generated = sample_count;
5408                stats.causal_validation_passed = validation_passed;
5409                let elapsed = start.elapsed();
5410                stats.causal_generation_ms = elapsed.as_millis() as u64;
5411                info!(
5412                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5413                    sample_count, stats.causal_generation_ms, validation_passed,
5414                );
5415            }
5416            Ok(Err(e)) => {
5417                let elapsed = start.elapsed();
5418                stats.causal_generation_ms = elapsed.as_millis() as u64;
5419                warn!("Phase 13: Causal generation failed: {}", e);
5420            }
5421            Err(_) => {
5422                let elapsed = start.elapsed();
5423                stats.causal_generation_ms = elapsed.as_millis() as u64;
5424                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5425            }
5426        }
5427    }
5428
5429    /// Phase 14: Generate S2C sourcing data.
5430    fn phase_sourcing_data(
5431        &mut self,
5432        stats: &mut EnhancedGenerationStatistics,
5433    ) -> SynthResult<SourcingSnapshot> {
5434        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5435            debug!("Phase 14: Skipped (sourcing generation disabled)");
5436            return Ok(SourcingSnapshot::default());
5437        }
5438        let degradation = self.check_resources()?;
5439        if degradation >= DegradationLevel::Reduced {
5440            debug!(
5441                "Phase skipped due to resource pressure (degradation: {:?})",
5442                degradation
5443            );
5444            return Ok(SourcingSnapshot::default());
5445        }
5446
5447        info!("Phase 14: Generating S2C Sourcing Data");
5448        let seed = self.seed;
5449
5450        // Gather vendor data from master data
5451        let vendor_ids: Vec<String> = self
5452            .master_data
5453            .vendors
5454            .iter()
5455            .map(|v| v.vendor_id.clone())
5456            .collect();
5457        if vendor_ids.is_empty() {
5458            debug!("Phase 14: Skipped (no vendors available)");
5459            return Ok(SourcingSnapshot::default());
5460        }
5461
5462        let categories: Vec<(String, String)> = vec![
5463            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5464            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5465            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5466            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5467            ("CAT-LOG".to_string(), "Logistics".to_string()),
5468        ];
5469        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5470            .iter()
5471            .map(|(id, name)| {
5472                (
5473                    id.clone(),
5474                    name.clone(),
5475                    rust_decimal::Decimal::from(100_000),
5476                )
5477            })
5478            .collect();
5479
5480        let company_code = self
5481            .config
5482            .companies
5483            .first()
5484            .map(|c| c.code.as_str())
5485            .unwrap_or("1000");
5486        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5487            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5488        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5489        let fiscal_year = start_date.year() as u16;
5490        let owner_ids: Vec<String> = self
5491            .master_data
5492            .employees
5493            .iter()
5494            .take(5)
5495            .map(|e| e.employee_id.clone())
5496            .collect();
5497        let owner_id = owner_ids
5498            .first()
5499            .map(std::string::String::as_str)
5500            .unwrap_or("BUYER-001");
5501
5502        // Step 1: Spend Analysis
5503        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5504        let spend_analyses =
5505            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5506
5507        // Step 2: Sourcing Projects
5508        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5509        let sourcing_projects = if owner_ids.is_empty() {
5510            Vec::new()
5511        } else {
5512            project_gen.generate(
5513                company_code,
5514                &categories_with_spend,
5515                &owner_ids,
5516                start_date,
5517                self.config.global.period_months,
5518            )
5519        };
5520        stats.sourcing_project_count = sourcing_projects.len();
5521
5522        // Step 3: Qualifications
5523        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5524        let mut qual_gen = QualificationGenerator::new(seed + 2);
5525        let qualifications = qual_gen.generate(
5526            company_code,
5527            &qual_vendor_ids,
5528            sourcing_projects.first().map(|p| p.project_id.as_str()),
5529            owner_id,
5530            start_date,
5531        );
5532
5533        // Step 4: RFx Events
5534        let mut rfx_gen = RfxGenerator::new(seed + 3);
5535        let rfx_events: Vec<RfxEvent> = sourcing_projects
5536            .iter()
5537            .map(|proj| {
5538                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5539                rfx_gen.generate(
5540                    company_code,
5541                    &proj.project_id,
5542                    &proj.category_id,
5543                    &qualified_vids,
5544                    owner_id,
5545                    start_date,
5546                    50000.0,
5547                )
5548            })
5549            .collect();
5550        stats.rfx_event_count = rfx_events.len();
5551
5552        // Step 5: Bids
5553        let mut bid_gen = BidGenerator::new(seed + 4);
5554        let mut all_bids = Vec::new();
5555        for rfx in &rfx_events {
5556            let bidder_count = vendor_ids.len().clamp(2, 5);
5557            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5558            let bids = bid_gen.generate(rfx, &responding, start_date);
5559            all_bids.extend(bids);
5560        }
5561        stats.bid_count = all_bids.len();
5562
5563        // Step 6: Bid Evaluations
5564        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5565        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5566            .iter()
5567            .map(|rfx| {
5568                let rfx_bids: Vec<SupplierBid> = all_bids
5569                    .iter()
5570                    .filter(|b| b.rfx_id == rfx.rfx_id)
5571                    .cloned()
5572                    .collect();
5573                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5574            })
5575            .collect();
5576
5577        // Step 7: Contracts from winning bids
5578        let mut contract_gen = ContractGenerator::new(seed + 6);
5579        let contracts: Vec<ProcurementContract> = bid_evaluations
5580            .iter()
5581            .zip(rfx_events.iter())
5582            .filter_map(|(eval, rfx)| {
5583                eval.ranked_bids.first().and_then(|winner| {
5584                    all_bids
5585                        .iter()
5586                        .find(|b| b.bid_id == winner.bid_id)
5587                        .map(|winning_bid| {
5588                            contract_gen.generate_from_bid(
5589                                winning_bid,
5590                                Some(&rfx.sourcing_project_id),
5591                                &rfx.category_id,
5592                                owner_id,
5593                                start_date,
5594                            )
5595                        })
5596                })
5597            })
5598            .collect();
5599        stats.contract_count = contracts.len();
5600
5601        // Step 8: Catalog Items
5602        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5603        let catalog_items = catalog_gen.generate(&contracts);
5604        stats.catalog_item_count = catalog_items.len();
5605
5606        // Step 9: Scorecards
5607        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5608        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5609            .iter()
5610            .fold(
5611                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5612                |mut acc, c| {
5613                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5614                    acc
5615                },
5616            )
5617            .into_iter()
5618            .collect();
5619        let scorecards = scorecard_gen.generate(
5620            company_code,
5621            &vendor_contracts,
5622            start_date,
5623            end_date,
5624            owner_id,
5625        );
5626        stats.scorecard_count = scorecards.len();
5627
5628        // Back-populate cross-references on sourcing projects (Task 35)
5629        // Link each project to its RFx events, contracts, and spend analyses
5630        let mut sourcing_projects = sourcing_projects;
5631        for project in &mut sourcing_projects {
5632            // Link RFx events generated for this project
5633            project.rfx_ids = rfx_events
5634                .iter()
5635                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5636                .map(|rfx| rfx.rfx_id.clone())
5637                .collect();
5638
5639            // Link contract awarded from this project's RFx
5640            project.contract_id = contracts
5641                .iter()
5642                .find(|c| {
5643                    c.sourcing_project_id
5644                        .as_deref()
5645                        .is_some_and(|sp| sp == project.project_id)
5646                })
5647                .map(|c| c.contract_id.clone());
5648
5649            // Link spend analysis for matching category (use category_id as the reference)
5650            project.spend_analysis_id = spend_analyses
5651                .iter()
5652                .find(|sa| sa.category_id == project.category_id)
5653                .map(|sa| sa.category_id.clone());
5654        }
5655
5656        info!(
5657            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5658            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5659            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5660        );
5661        self.check_resources_with_log("post-sourcing")?;
5662
5663        Ok(SourcingSnapshot {
5664            spend_analyses,
5665            sourcing_projects,
5666            qualifications,
5667            rfx_events,
5668            bids: all_bids,
5669            bid_evaluations,
5670            contracts,
5671            catalog_items,
5672            scorecards,
5673        })
5674    }
5675
5676    /// Build a [`GroupStructure`] from the current company configuration.
5677    ///
5678    /// The first company in the configuration is treated as the ultimate parent.
5679    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5680    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5681    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5682        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5683
5684        let parent_code = self
5685            .config
5686            .companies
5687            .first()
5688            .map(|c| c.code.clone())
5689            .unwrap_or_else(|| "PARENT".to_string());
5690
5691        let mut group = GroupStructure::new(parent_code);
5692
5693        for company in self.config.companies.iter().skip(1) {
5694            let sub =
5695                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5696            group.add_subsidiary(sub);
5697        }
5698
5699        group
5700    }
5701
5702    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5703    fn phase_intercompany(
5704        &mut self,
5705        journal_entries: &[JournalEntry],
5706        stats: &mut EnhancedGenerationStatistics,
5707    ) -> SynthResult<IntercompanySnapshot> {
5708        // Skip if intercompany is disabled in config
5709        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5710            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5711            return Ok(IntercompanySnapshot::default());
5712        }
5713
5714        // Intercompany requires at least 2 companies
5715        if self.config.companies.len() < 2 {
5716            debug!(
5717                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5718                self.config.companies.len()
5719            );
5720            return Ok(IntercompanySnapshot::default());
5721        }
5722
5723        info!("Phase 14b: Generating Intercompany Transactions");
5724
5725        // Build the group structure early — used by ISA 600 component auditor scope
5726        // and consolidated financial statement generators downstream.
5727        let group_structure = self.build_group_structure();
5728        debug!(
5729            "Group structure built: parent={}, subsidiaries={}",
5730            group_structure.parent_entity,
5731            group_structure.subsidiaries.len()
5732        );
5733
5734        let seed = self.seed;
5735        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5736            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5737        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5738
5739        // Build ownership structure from company configs
5740        // First company is treated as the parent, remaining are subsidiaries
5741        let parent_code = self.config.companies[0].code.clone();
5742        let mut ownership_structure =
5743            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5744
5745        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5746            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5747                format!("REL{:03}", i + 1),
5748                parent_code.clone(),
5749                company.code.clone(),
5750                rust_decimal::Decimal::from(100), // Default 100% ownership
5751                start_date,
5752            );
5753            ownership_structure.add_relationship(relationship);
5754        }
5755
5756        // Convert config transfer pricing method to core model enum
5757        let tp_method = match self.config.intercompany.transfer_pricing_method {
5758            datasynth_config::schema::TransferPricingMethod::CostPlus => {
5759                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
5760            }
5761            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
5762                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
5763            }
5764            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
5765                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
5766            }
5767            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
5768                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
5769            }
5770            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
5771                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
5772            }
5773        };
5774
5775        // Build IC generator config from schema config
5776        let ic_currency = self
5777            .config
5778            .companies
5779            .first()
5780            .map(|c| c.currency.clone())
5781            .unwrap_or_else(|| "USD".to_string());
5782        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
5783            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
5784            transfer_pricing_method: tp_method,
5785            markup_percent: rust_decimal::Decimal::from_f64_retain(
5786                self.config.intercompany.markup_percent,
5787            )
5788            .unwrap_or(rust_decimal::Decimal::from(5)),
5789            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
5790            default_currency: ic_currency,
5791            ..Default::default()
5792        };
5793
5794        // Create IC generator
5795        let mut ic_generator = datasynth_generators::ICGenerator::new(
5796            ic_gen_config,
5797            ownership_structure.clone(),
5798            seed + 50,
5799        );
5800
5801        // Generate IC transactions for the period
5802        // Use ~3 transactions per day as a reasonable default
5803        let transactions_per_day = 3;
5804        let matched_pairs = ic_generator.generate_transactions_for_period(
5805            start_date,
5806            end_date,
5807            transactions_per_day,
5808        );
5809
5810        // Generate IC source P2P/O2C documents
5811        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
5812        debug!(
5813            "Generated {} IC seller invoices, {} IC buyer POs",
5814            ic_doc_chains.seller_invoices.len(),
5815            ic_doc_chains.buyer_orders.len()
5816        );
5817
5818        // Generate journal entries from matched pairs
5819        let mut seller_entries = Vec::new();
5820        let mut buyer_entries = Vec::new();
5821        let fiscal_year = start_date.year();
5822
5823        for pair in &matched_pairs {
5824            let fiscal_period = pair.posting_date.month();
5825            let (seller_je, buyer_je) =
5826                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
5827            seller_entries.push(seller_je);
5828            buyer_entries.push(buyer_je);
5829        }
5830
5831        // Run matching engine
5832        let matching_config = datasynth_generators::ICMatchingConfig {
5833            base_currency: self
5834                .config
5835                .companies
5836                .first()
5837                .map(|c| c.currency.clone())
5838                .unwrap_or_else(|| "USD".to_string()),
5839            ..Default::default()
5840        };
5841        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
5842        matching_engine.load_matched_pairs(&matched_pairs);
5843        let matching_result = matching_engine.run_matching(end_date);
5844
5845        // Generate elimination entries if configured
5846        let mut elimination_entries = Vec::new();
5847        if self.config.intercompany.generate_eliminations {
5848            let elim_config = datasynth_generators::EliminationConfig {
5849                consolidation_entity: "GROUP".to_string(),
5850                base_currency: self
5851                    .config
5852                    .companies
5853                    .first()
5854                    .map(|c| c.currency.clone())
5855                    .unwrap_or_else(|| "USD".to_string()),
5856                ..Default::default()
5857            };
5858
5859            let mut elim_generator =
5860                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
5861
5862            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
5863            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
5864                matching_result
5865                    .matched_balances
5866                    .iter()
5867                    .chain(matching_result.unmatched_balances.iter())
5868                    .cloned()
5869                    .collect();
5870
5871            // Build investment and equity maps from the group structure so that the
5872            // elimination generator can produce equity-investment elimination entries
5873            // (parent's investment in subsidiary vs. subsidiary's equity capital).
5874            //
5875            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
5876            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
5877            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
5878            //
5879            // Net assets are derived from the journal entries using account-range heuristics:
5880            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
5881            // no JE data is available (IC phase runs early in the generation pipeline).
5882            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
5883                std::collections::HashMap::new();
5884            let mut equity_amounts: std::collections::HashMap<
5885                String,
5886                std::collections::HashMap<String, rust_decimal::Decimal>,
5887            > = std::collections::HashMap::new();
5888            {
5889                use rust_decimal::Decimal;
5890                let hundred = Decimal::from(100u32);
5891                let ten_pct = Decimal::new(10, 2); // 0.10
5892                let thirty_pct = Decimal::new(30, 2); // 0.30
5893                let sixty_pct = Decimal::new(60, 2); // 0.60
5894                let parent_code = &group_structure.parent_entity;
5895                for sub in &group_structure.subsidiaries {
5896                    let net_assets = {
5897                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5898                        if na > Decimal::ZERO {
5899                            na
5900                        } else {
5901                            Decimal::from(1_000_000u64)
5902                        }
5903                    };
5904                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
5905                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
5906                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
5907
5908                    // Split subsidiary equity into conventional components:
5909                    // 10 % share capital / 30 % APIC / 60 % retained earnings
5910                    let mut eq_map = std::collections::HashMap::new();
5911                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
5912                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
5913                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
5914                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
5915                }
5916            }
5917
5918            let journal = elim_generator.generate_eliminations(
5919                &fiscal_period,
5920                end_date,
5921                &all_balances,
5922                &matched_pairs,
5923                &investment_amounts,
5924                &equity_amounts,
5925            );
5926
5927            elimination_entries = journal.entries.clone();
5928        }
5929
5930        let matched_pair_count = matched_pairs.len();
5931        let elimination_entry_count = elimination_entries.len();
5932        let match_rate = matching_result.match_rate;
5933
5934        stats.ic_matched_pair_count = matched_pair_count;
5935        stats.ic_elimination_count = elimination_entry_count;
5936        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
5937
5938        info!(
5939            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
5940            matched_pair_count,
5941            stats.ic_transaction_count,
5942            seller_entries.len(),
5943            buyer_entries.len(),
5944            elimination_entry_count,
5945            match_rate * 100.0
5946        );
5947        self.check_resources_with_log("post-intercompany")?;
5948
5949        // ----------------------------------------------------------------
5950        // NCI measurements: derive from group structure ownership percentages
5951        // ----------------------------------------------------------------
5952        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
5953            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
5954            use rust_decimal::Decimal;
5955
5956            let eight_pct = Decimal::new(8, 2); // 0.08
5957
5958            group_structure
5959                .subsidiaries
5960                .iter()
5961                .filter(|sub| {
5962                    sub.nci_percentage > Decimal::ZERO
5963                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
5964                })
5965                .map(|sub| {
5966                    // Compute net assets from actual journal entries for this subsidiary.
5967                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
5968                    // IC phase runs before the main JE batch has been populated).
5969                    let net_assets_from_jes =
5970                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
5971
5972                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
5973                        net_assets_from_jes.round_dp(2)
5974                    } else {
5975                        // Fallback: use a plausible base amount
5976                        Decimal::from(1_000_000u64)
5977                    };
5978
5979                    // Net income approximated as 8% of net assets
5980                    let net_income = (net_assets * eight_pct).round_dp(2);
5981
5982                    NciMeasurement::compute(
5983                        sub.entity_code.clone(),
5984                        sub.nci_percentage,
5985                        net_assets,
5986                        net_income,
5987                    )
5988                })
5989                .collect()
5990        };
5991
5992        if !nci_measurements.is_empty() {
5993            info!(
5994                "NCI measurements: {} subsidiaries with non-controlling interests",
5995                nci_measurements.len()
5996            );
5997        }
5998
5999        Ok(IntercompanySnapshot {
6000            group_structure: Some(group_structure),
6001            matched_pairs,
6002            seller_journal_entries: seller_entries,
6003            buyer_journal_entries: buyer_entries,
6004            elimination_entries,
6005            nci_measurements,
6006            ic_document_chains: Some(ic_doc_chains),
6007            matched_pair_count,
6008            elimination_entry_count,
6009            match_rate,
6010        })
6011    }
6012
6013    /// Phase 15: Generate bank reconciliations and financial statements.
6014    fn phase_financial_reporting(
6015        &mut self,
6016        document_flows: &DocumentFlowSnapshot,
6017        journal_entries: &[JournalEntry],
6018        coa: &Arc<ChartOfAccounts>,
6019        _hr: &HrSnapshot,
6020        _audit: &AuditSnapshot,
6021        stats: &mut EnhancedGenerationStatistics,
6022    ) -> SynthResult<FinancialReportingSnapshot> {
6023        let fs_enabled = self.phase_config.generate_financial_statements
6024            || self.config.financial_reporting.enabled;
6025        let br_enabled = self.phase_config.generate_bank_reconciliation;
6026
6027        if !fs_enabled && !br_enabled {
6028            debug!("Phase 15: Skipped (financial reporting disabled)");
6029            return Ok(FinancialReportingSnapshot::default());
6030        }
6031
6032        info!("Phase 15: Generating Financial Reporting Data");
6033
6034        let seed = self.seed;
6035        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6036            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6037
6038        let mut financial_statements = Vec::new();
6039        let mut bank_reconciliations = Vec::new();
6040        let mut trial_balances = Vec::new();
6041        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6042        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6043            Vec::new();
6044        // Standalone statements keyed by entity code
6045        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6046            std::collections::HashMap::new();
6047        // Consolidated statements (one per period)
6048        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6049        // Consolidation schedules (one per period)
6050        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6051
6052        // Generate financial statements from JE-derived trial balances.
6053        //
6054        // When journal entries are available, we use cumulative trial balances for
6055        // balance sheet accounts and current-period trial balances for income
6056        // statement accounts. We also track prior-period trial balances so the
6057        // generator can produce comparative amounts, and we build a proper
6058        // cash flow statement from working capital changes rather than random data.
6059        if fs_enabled {
6060            let has_journal_entries = !journal_entries.is_empty();
6061
6062            // Use FinancialStatementGenerator for balance sheet and income statement,
6063            // but build cash flow ourselves from TB data when JEs are available.
6064            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6065            // Separate generator for consolidated statements (different seed offset)
6066            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6067
6068            // Collect elimination JEs once (reused across periods)
6069            let elimination_entries: Vec<&JournalEntry> = journal_entries
6070                .iter()
6071                .filter(|je| je.header.is_elimination)
6072                .collect();
6073
6074            // Generate one set of statements per period, per entity
6075            for period in 0..self.config.global.period_months {
6076                let period_start = start_date + chrono::Months::new(period);
6077                let period_end =
6078                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6079                let fiscal_year = period_end.year() as u16;
6080                let fiscal_period = period_end.month() as u8;
6081                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6082
6083                // Build per-entity trial balances for this period (non-elimination JEs)
6084                // We accumulate them for the consolidation step.
6085                let mut entity_tb_map: std::collections::HashMap<
6086                    String,
6087                    std::collections::HashMap<String, rust_decimal::Decimal>,
6088                > = std::collections::HashMap::new();
6089
6090                // --- Standalone: one set of statements per company ---
6091                for (company_idx, company) in self.config.companies.iter().enumerate() {
6092                    let company_code = company.code.as_str();
6093                    let currency = company.currency.as_str();
6094                    // Use a unique seed offset per company to keep statements deterministic
6095                    // and distinct across companies
6096                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6097                    let mut company_fs_gen =
6098                        FinancialStatementGenerator::new(seed + company_seed_offset);
6099
6100                    if has_journal_entries {
6101                        let tb_entries = Self::build_cumulative_trial_balance(
6102                            journal_entries,
6103                            coa,
6104                            company_code,
6105                            start_date,
6106                            period_end,
6107                            fiscal_year,
6108                            fiscal_period,
6109                        );
6110
6111                        // Accumulate per-entity category balances for consolidation
6112                        let entity_cat_map =
6113                            entity_tb_map.entry(company_code.to_string()).or_default();
6114                        for tb_entry in &tb_entries {
6115                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6116                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6117                        }
6118
6119                        let stmts = company_fs_gen.generate(
6120                            company_code,
6121                            currency,
6122                            &tb_entries,
6123                            period_start,
6124                            period_end,
6125                            fiscal_year,
6126                            fiscal_period,
6127                            None,
6128                            "SYS-AUTOCLOSE",
6129                        );
6130
6131                        let mut entity_stmts = Vec::new();
6132                        for stmt in stmts {
6133                            if stmt.statement_type == StatementType::CashFlowStatement {
6134                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6135                                let cf_items = Self::build_cash_flow_from_trial_balances(
6136                                    &tb_entries,
6137                                    None,
6138                                    net_income,
6139                                );
6140                                entity_stmts.push(FinancialStatement {
6141                                    cash_flow_items: cf_items,
6142                                    ..stmt
6143                                });
6144                            } else {
6145                                entity_stmts.push(stmt);
6146                            }
6147                        }
6148
6149                        // Add to the flat financial_statements list (used by KPI/budget)
6150                        financial_statements.extend(entity_stmts.clone());
6151
6152                        // Store standalone per-entity
6153                        standalone_statements
6154                            .entry(company_code.to_string())
6155                            .or_default()
6156                            .extend(entity_stmts);
6157
6158                        // Only store trial balance for the first company in the period
6159                        // to avoid duplicates in the trial_balances list
6160                        if company_idx == 0 {
6161                            trial_balances.push(PeriodTrialBalance {
6162                                fiscal_year,
6163                                fiscal_period,
6164                                period_start,
6165                                period_end,
6166                                entries: tb_entries,
6167                            });
6168                        }
6169                    } else {
6170                        // Fallback: no JEs available
6171                        let tb_entries = Self::build_trial_balance_from_entries(
6172                            journal_entries,
6173                            coa,
6174                            company_code,
6175                            fiscal_year,
6176                            fiscal_period,
6177                        );
6178
6179                        let stmts = company_fs_gen.generate(
6180                            company_code,
6181                            currency,
6182                            &tb_entries,
6183                            period_start,
6184                            period_end,
6185                            fiscal_year,
6186                            fiscal_period,
6187                            None,
6188                            "SYS-AUTOCLOSE",
6189                        );
6190                        financial_statements.extend(stmts.clone());
6191                        standalone_statements
6192                            .entry(company_code.to_string())
6193                            .or_default()
6194                            .extend(stmts);
6195
6196                        if company_idx == 0 && !tb_entries.is_empty() {
6197                            trial_balances.push(PeriodTrialBalance {
6198                                fiscal_year,
6199                                fiscal_period,
6200                                period_start,
6201                                period_end,
6202                                entries: tb_entries,
6203                            });
6204                        }
6205                    }
6206                }
6207
6208                // --- Consolidated: aggregate all entities + apply eliminations ---
6209                // Use the primary (first) company's currency for the consolidated statement
6210                let group_currency = self
6211                    .config
6212                    .companies
6213                    .first()
6214                    .map(|c| c.currency.as_str())
6215                    .unwrap_or("USD");
6216
6217                // Build owned elimination entries for this period
6218                let period_eliminations: Vec<JournalEntry> = elimination_entries
6219                    .iter()
6220                    .filter(|je| {
6221                        je.header.fiscal_year == fiscal_year
6222                            && je.header.fiscal_period == fiscal_period
6223                    })
6224                    .map(|je| (*je).clone())
6225                    .collect();
6226
6227                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6228                    &entity_tb_map,
6229                    &period_eliminations,
6230                    &period_label,
6231                );
6232
6233                // Build a pseudo trial balance from consolidated line items for the
6234                // FinancialStatementGenerator to use (only for cash flow direction).
6235                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6236                    .line_items
6237                    .iter()
6238                    .map(|li| {
6239                        let net = li.post_elimination_total;
6240                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6241                            (net, rust_decimal::Decimal::ZERO)
6242                        } else {
6243                            (rust_decimal::Decimal::ZERO, -net)
6244                        };
6245                        datasynth_generators::TrialBalanceEntry {
6246                            account_code: li.account_category.clone(),
6247                            account_name: li.account_category.clone(),
6248                            category: li.account_category.clone(),
6249                            debit_balance: debit,
6250                            credit_balance: credit,
6251                        }
6252                    })
6253                    .collect();
6254
6255                let mut cons_stmts = cons_gen.generate(
6256                    "GROUP",
6257                    group_currency,
6258                    &cons_tb,
6259                    period_start,
6260                    period_end,
6261                    fiscal_year,
6262                    fiscal_period,
6263                    None,
6264                    "SYS-AUTOCLOSE",
6265                );
6266
6267                // Split consolidated line items by statement type.
6268                // The consolidation generator returns BS items first, then IS items,
6269                // identified by their CONS- prefix and category.
6270                let bs_categories: &[&str] = &[
6271                    "CASH",
6272                    "RECEIVABLES",
6273                    "INVENTORY",
6274                    "FIXEDASSETS",
6275                    "PAYABLES",
6276                    "ACCRUEDLIABILITIES",
6277                    "LONGTERMDEBT",
6278                    "EQUITY",
6279                ];
6280                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6281                    cons_line_items.into_iter().partition(|li| {
6282                        let upper = li.label.to_uppercase();
6283                        bs_categories.iter().any(|c| upper == *c)
6284                    });
6285
6286                for stmt in &mut cons_stmts {
6287                    stmt.is_consolidated = true;
6288                    match stmt.statement_type {
6289                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6290                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6291                        _ => {} // CF and equity change statements keep generator output
6292                    }
6293                }
6294
6295                consolidated_statements.extend(cons_stmts);
6296                consolidation_schedules.push(schedule);
6297            }
6298
6299            // Backward compat: if only 1 company, use existing code path logic
6300            // (prior_cumulative_tb for comparative amounts). Already handled above;
6301            // the prior_ref is omitted to keep this change minimal.
6302            let _ = &mut fs_gen; // suppress unused warning
6303
6304            stats.financial_statement_count = financial_statements.len();
6305            info!(
6306                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6307                stats.financial_statement_count,
6308                consolidated_statements.len(),
6309                has_journal_entries
6310            );
6311
6312            // ----------------------------------------------------------------
6313            // IFRS 8 / ASC 280: Operating Segment Reporting
6314            // ----------------------------------------------------------------
6315            // Build entity seeds from the company configuration.
6316            let entity_seeds: Vec<SegmentSeed> = self
6317                .config
6318                .companies
6319                .iter()
6320                .map(|c| SegmentSeed {
6321                    code: c.code.clone(),
6322                    name: c.name.clone(),
6323                    currency: c.currency.clone(),
6324                })
6325                .collect();
6326
6327            let mut seg_gen = SegmentGenerator::new(seed + 30);
6328
6329            // Generate one set of segment reports per period.
6330            // We extract consolidated revenue / profit / assets from the consolidated
6331            // financial statements produced above, falling back to simple sums when
6332            // no consolidated statements were generated (single-entity path).
6333            for period in 0..self.config.global.period_months {
6334                let period_end =
6335                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6336                let fiscal_year = period_end.year() as u16;
6337                let fiscal_period = period_end.month() as u8;
6338                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6339
6340                use datasynth_core::models::StatementType;
6341
6342                // Try to find consolidated income statement for this period
6343                let cons_is = consolidated_statements.iter().find(|s| {
6344                    s.fiscal_year == fiscal_year
6345                        && s.fiscal_period == fiscal_period
6346                        && s.statement_type == StatementType::IncomeStatement
6347                });
6348                let cons_bs = consolidated_statements.iter().find(|s| {
6349                    s.fiscal_year == fiscal_year
6350                        && s.fiscal_period == fiscal_period
6351                        && s.statement_type == StatementType::BalanceSheet
6352                });
6353
6354                // If consolidated statements not available fall back to the flat list
6355                let is_stmt = cons_is.or_else(|| {
6356                    financial_statements.iter().find(|s| {
6357                        s.fiscal_year == fiscal_year
6358                            && s.fiscal_period == fiscal_period
6359                            && s.statement_type == StatementType::IncomeStatement
6360                    })
6361                });
6362                let bs_stmt = cons_bs.or_else(|| {
6363                    financial_statements.iter().find(|s| {
6364                        s.fiscal_year == fiscal_year
6365                            && s.fiscal_period == fiscal_period
6366                            && s.statement_type == StatementType::BalanceSheet
6367                    })
6368                });
6369
6370                let consolidated_revenue = is_stmt
6371                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6372                    .map(|li| -li.amount) // revenue is stored as negative in IS
6373                    .unwrap_or(rust_decimal::Decimal::ZERO);
6374
6375                let consolidated_profit = is_stmt
6376                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6377                    .map(|li| li.amount)
6378                    .unwrap_or(rust_decimal::Decimal::ZERO);
6379
6380                let consolidated_assets = bs_stmt
6381                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6382                    .map(|li| li.amount)
6383                    .unwrap_or(rust_decimal::Decimal::ZERO);
6384
6385                // Skip periods where we have no financial data
6386                if consolidated_revenue == rust_decimal::Decimal::ZERO
6387                    && consolidated_assets == rust_decimal::Decimal::ZERO
6388                {
6389                    continue;
6390                }
6391
6392                let group_code = self
6393                    .config
6394                    .companies
6395                    .first()
6396                    .map(|c| c.code.as_str())
6397                    .unwrap_or("GROUP");
6398
6399                // Compute period depreciation from JEs with document type "CL" hitting account
6400                // 6000 (depreciation expense).  These are generated by phase_period_close.
6401                let total_depr: rust_decimal::Decimal = journal_entries
6402                    .iter()
6403                    .filter(|je| je.header.document_type == "CL")
6404                    .flat_map(|je| je.lines.iter())
6405                    .filter(|l| l.gl_account.starts_with("6000"))
6406                    .map(|l| l.debit_amount)
6407                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6408                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6409                    Some(total_depr)
6410                } else {
6411                    None
6412                };
6413
6414                let (segs, recon) = seg_gen.generate(
6415                    group_code,
6416                    &period_label,
6417                    consolidated_revenue,
6418                    consolidated_profit,
6419                    consolidated_assets,
6420                    &entity_seeds,
6421                    depr_param,
6422                );
6423                segment_reports.extend(segs);
6424                segment_reconciliations.push(recon);
6425            }
6426
6427            info!(
6428                "Segment reports generated: {} segments, {} reconciliations",
6429                segment_reports.len(),
6430                segment_reconciliations.len()
6431            );
6432        }
6433
6434        // Generate bank reconciliations from payment data
6435        if br_enabled && !document_flows.payments.is_empty() {
6436            let employee_ids: Vec<String> = self
6437                .master_data
6438                .employees
6439                .iter()
6440                .map(|e| e.employee_id.clone())
6441                .collect();
6442            let mut br_gen =
6443                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6444
6445            // Group payments by company code and period
6446            for company in &self.config.companies {
6447                let company_payments: Vec<PaymentReference> = document_flows
6448                    .payments
6449                    .iter()
6450                    .filter(|p| p.header.company_code == company.code)
6451                    .map(|p| PaymentReference {
6452                        id: p.header.document_id.clone(),
6453                        amount: if p.is_vendor { p.amount } else { -p.amount },
6454                        date: p.header.document_date,
6455                        reference: p
6456                            .check_number
6457                            .clone()
6458                            .or_else(|| p.wire_reference.clone())
6459                            .unwrap_or_else(|| p.header.document_id.clone()),
6460                    })
6461                    .collect();
6462
6463                if company_payments.is_empty() {
6464                    continue;
6465                }
6466
6467                let bank_account_id = format!("{}-MAIN", company.code);
6468
6469                // Generate one reconciliation per period
6470                for period in 0..self.config.global.period_months {
6471                    let period_start = start_date + chrono::Months::new(period);
6472                    let period_end =
6473                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6474
6475                    let period_payments: Vec<PaymentReference> = company_payments
6476                        .iter()
6477                        .filter(|p| p.date >= period_start && p.date <= period_end)
6478                        .cloned()
6479                        .collect();
6480
6481                    let recon = br_gen.generate(
6482                        &company.code,
6483                        &bank_account_id,
6484                        period_start,
6485                        period_end,
6486                        &company.currency,
6487                        &period_payments,
6488                    );
6489                    bank_reconciliations.push(recon);
6490                }
6491            }
6492            info!(
6493                "Bank reconciliations generated: {} reconciliations",
6494                bank_reconciliations.len()
6495            );
6496        }
6497
6498        stats.bank_reconciliation_count = bank_reconciliations.len();
6499        self.check_resources_with_log("post-financial-reporting")?;
6500
6501        if !trial_balances.is_empty() {
6502            info!(
6503                "Period-close trial balances captured: {} periods",
6504                trial_balances.len()
6505            );
6506        }
6507
6508        // Notes to financial statements are generated in a separate post-processing step
6509        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6510        // phases have completed, so that deferred tax and provision data can be wired in.
6511        let notes_to_financial_statements = Vec::new();
6512
6513        Ok(FinancialReportingSnapshot {
6514            financial_statements,
6515            standalone_statements,
6516            consolidated_statements,
6517            consolidation_schedules,
6518            bank_reconciliations,
6519            trial_balances,
6520            segment_reports,
6521            segment_reconciliations,
6522            notes_to_financial_statements,
6523        })
6524    }
6525
6526    /// Populate notes to financial statements using fully-resolved snapshots.
6527    ///
6528    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6529    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6530    /// can be wired into the notes context.  The method mutates
6531    /// `financial_reporting.notes_to_financial_statements` in-place.
6532    fn generate_notes_to_financial_statements(
6533        &self,
6534        financial_reporting: &mut FinancialReportingSnapshot,
6535        accounting_standards: &AccountingStandardsSnapshot,
6536        tax: &TaxSnapshot,
6537        hr: &HrSnapshot,
6538        audit: &AuditSnapshot,
6539        treasury: &TreasurySnapshot,
6540    ) {
6541        use datasynth_config::schema::AccountingFrameworkConfig;
6542        use datasynth_core::models::StatementType;
6543        use datasynth_generators::period_close::notes_generator::{
6544            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6545        };
6546
6547        let seed = self.seed;
6548        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6549        {
6550            Ok(d) => d,
6551            Err(_) => return,
6552        };
6553
6554        let mut notes_gen = NotesGenerator::new(seed + 4235);
6555
6556        for company in &self.config.companies {
6557            let last_period_end = start_date
6558                + chrono::Months::new(self.config.global.period_months)
6559                - chrono::Days::new(1);
6560            let fiscal_year = last_period_end.year() as u16;
6561
6562            // Extract relevant amounts from the already-generated financial statements
6563            let entity_is = financial_reporting
6564                .standalone_statements
6565                .get(&company.code)
6566                .and_then(|stmts| {
6567                    stmts.iter().find(|s| {
6568                        s.fiscal_year == fiscal_year
6569                            && s.statement_type == StatementType::IncomeStatement
6570                    })
6571                });
6572            let entity_bs = financial_reporting
6573                .standalone_statements
6574                .get(&company.code)
6575                .and_then(|stmts| {
6576                    stmts.iter().find(|s| {
6577                        s.fiscal_year == fiscal_year
6578                            && s.statement_type == StatementType::BalanceSheet
6579                    })
6580                });
6581
6582            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6583            let revenue_amount = entity_is
6584                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6585                .map(|li| li.amount);
6586            let ppe_gross = entity_bs
6587                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6588                .map(|li| li.amount);
6589
6590            let framework = match self
6591                .config
6592                .accounting_standards
6593                .framework
6594                .unwrap_or_default()
6595            {
6596                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6597                    "IFRS".to_string()
6598                }
6599                _ => "US GAAP".to_string(),
6600            };
6601
6602            // ---- Deferred tax (IAS 12 / ASC 740) ----
6603            // Sum closing DTA and DTL from rollforward entries for this entity.
6604            let (entity_dta, entity_dtl) = {
6605                let mut dta = rust_decimal::Decimal::ZERO;
6606                let mut dtl = rust_decimal::Decimal::ZERO;
6607                for rf in &tax.deferred_tax.rollforwards {
6608                    if rf.entity_code == company.code {
6609                        dta += rf.closing_dta;
6610                        dtl += rf.closing_dtl;
6611                    }
6612                }
6613                (
6614                    if dta > rust_decimal::Decimal::ZERO {
6615                        Some(dta)
6616                    } else {
6617                        None
6618                    },
6619                    if dtl > rust_decimal::Decimal::ZERO {
6620                        Some(dtl)
6621                    } else {
6622                        None
6623                    },
6624                )
6625            };
6626
6627            // ---- Provisions (IAS 37 / ASC 450) ----
6628            // Filter provisions to this entity; sum best_estimate amounts.
6629            let entity_provisions: Vec<_> = accounting_standards
6630                .provisions
6631                .iter()
6632                .filter(|p| p.entity_code == company.code)
6633                .collect();
6634            let provision_count = entity_provisions.len();
6635            let total_provisions = if provision_count > 0 {
6636                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6637            } else {
6638                None
6639            };
6640
6641            // ---- Pension data from HR snapshot ----
6642            let entity_pension_plan_count = hr
6643                .pension_plans
6644                .iter()
6645                .filter(|p| p.entity_code == company.code)
6646                .count();
6647            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6648                let sum: rust_decimal::Decimal = hr
6649                    .pension_disclosures
6650                    .iter()
6651                    .filter(|d| {
6652                        hr.pension_plans
6653                            .iter()
6654                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6655                    })
6656                    .map(|d| d.net_pension_liability)
6657                    .sum();
6658                let plan_assets_sum: rust_decimal::Decimal = hr
6659                    .pension_plan_assets
6660                    .iter()
6661                    .filter(|a| {
6662                        hr.pension_plans
6663                            .iter()
6664                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6665                    })
6666                    .map(|a| a.fair_value_closing)
6667                    .sum();
6668                if entity_pension_plan_count > 0 {
6669                    Some(sum + plan_assets_sum)
6670                } else {
6671                    None
6672                }
6673            };
6674            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6675                let sum: rust_decimal::Decimal = hr
6676                    .pension_plan_assets
6677                    .iter()
6678                    .filter(|a| {
6679                        hr.pension_plans
6680                            .iter()
6681                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6682                    })
6683                    .map(|a| a.fair_value_closing)
6684                    .sum();
6685                if entity_pension_plan_count > 0 {
6686                    Some(sum)
6687                } else {
6688                    None
6689                }
6690            };
6691
6692            // ---- Audit data: related parties + subsequent events ----
6693            // Audit snapshot covers all entities; use total counts (common case = single entity).
6694            let rp_count = audit.related_party_transactions.len();
6695            let se_count = audit.subsequent_events.len();
6696            let adjusting_count = audit
6697                .subsequent_events
6698                .iter()
6699                .filter(|e| {
6700                    matches!(
6701                        e.classification,
6702                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6703                    )
6704                })
6705                .count();
6706
6707            let ctx = NotesGeneratorContext {
6708                entity_code: company.code.clone(),
6709                framework,
6710                period: format!("FY{}", fiscal_year),
6711                period_end: last_period_end,
6712                currency: company.currency.clone(),
6713                revenue_amount,
6714                total_ppe_gross: ppe_gross,
6715                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6716                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6717                deferred_tax_asset: entity_dta,
6718                deferred_tax_liability: entity_dtl,
6719                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6720                provision_count,
6721                total_provisions,
6722                // Pension data from HR snapshot
6723                pension_plan_count: entity_pension_plan_count,
6724                total_dbo: entity_total_dbo,
6725                total_plan_assets: entity_total_plan_assets,
6726                // Audit data
6727                related_party_transaction_count: rp_count,
6728                subsequent_event_count: se_count,
6729                adjusting_event_count: adjusting_count,
6730                ..NotesGeneratorContext::default()
6731            };
6732
6733            let entity_notes = notes_gen.generate(&ctx);
6734            let standard_note_count = entity_notes.len() as u32;
6735            info!(
6736                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
6737                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
6738            );
6739            financial_reporting
6740                .notes_to_financial_statements
6741                .extend(entity_notes);
6742
6743            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
6744            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
6745                .debt_instruments
6746                .iter()
6747                .filter(|d| d.entity_id == company.code)
6748                .map(|d| {
6749                    (
6750                        format!("{:?}", d.instrument_type),
6751                        d.principal,
6752                        d.maturity_date.to_string(),
6753                    )
6754                })
6755                .collect();
6756
6757            let hedge_count = treasury.hedge_relationships.len();
6758            let effective_hedges = treasury
6759                .hedge_relationships
6760                .iter()
6761                .filter(|h| h.is_effective)
6762                .count();
6763            let total_notional: rust_decimal::Decimal = treasury
6764                .hedging_instruments
6765                .iter()
6766                .map(|h| h.notional_amount)
6767                .sum();
6768            let total_fair_value: rust_decimal::Decimal = treasury
6769                .hedging_instruments
6770                .iter()
6771                .map(|h| h.fair_value)
6772                .sum();
6773
6774            // Join provision_movements with provisions to get entity/type info
6775            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
6776                .provisions
6777                .iter()
6778                .filter(|p| p.entity_code == company.code)
6779                .map(|p| p.id.as_str())
6780                .collect();
6781            let provision_movements: Vec<(
6782                String,
6783                rust_decimal::Decimal,
6784                rust_decimal::Decimal,
6785                rust_decimal::Decimal,
6786            )> = accounting_standards
6787                .provision_movements
6788                .iter()
6789                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
6790                .map(|m| {
6791                    let prov_type = accounting_standards
6792                        .provisions
6793                        .iter()
6794                        .find(|p| p.id == m.provision_id)
6795                        .map(|p| format!("{:?}", p.provision_type))
6796                        .unwrap_or_else(|| "Unknown".to_string());
6797                    (prov_type, m.opening, m.additions, m.closing)
6798                })
6799                .collect();
6800
6801            let enhanced_ctx = EnhancedNotesContext {
6802                entity_code: company.code.clone(),
6803                period: format!("FY{}", fiscal_year),
6804                currency: company.currency.clone(),
6805                // Inventory breakdown: best-effort using zero (would need balance tracker)
6806                finished_goods_value: rust_decimal::Decimal::ZERO,
6807                wip_value: rust_decimal::Decimal::ZERO,
6808                raw_materials_value: rust_decimal::Decimal::ZERO,
6809                debt_instruments,
6810                hedge_count,
6811                effective_hedges,
6812                total_notional,
6813                total_fair_value,
6814                provision_movements,
6815            };
6816
6817            let enhanced_notes =
6818                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
6819            if !enhanced_notes.is_empty() {
6820                info!(
6821                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
6822                    company.code,
6823                    enhanced_notes.len(),
6824                    enhanced_ctx.debt_instruments.len(),
6825                    hedge_count,
6826                    enhanced_ctx.provision_movements.len(),
6827                );
6828                financial_reporting
6829                    .notes_to_financial_statements
6830                    .extend(enhanced_notes);
6831            }
6832        }
6833    }
6834
6835    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
6836    ///
6837    /// This ensures the trial balance is coherent with the JEs: every debit and credit
6838    /// posted in the journal entries flows through to the trial balance, using the real
6839    /// GL account numbers from the CoA.
6840    fn build_trial_balance_from_entries(
6841        journal_entries: &[JournalEntry],
6842        coa: &ChartOfAccounts,
6843        company_code: &str,
6844        fiscal_year: u16,
6845        fiscal_period: u8,
6846    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6847        use rust_decimal::Decimal;
6848
6849        // Accumulate total debits and credits per GL account
6850        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
6851        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
6852
6853        for je in journal_entries {
6854            // Filter to matching company, fiscal year, and period
6855            if je.header.company_code != company_code
6856                || je.header.fiscal_year != fiscal_year
6857                || je.header.fiscal_period != fiscal_period
6858            {
6859                continue;
6860            }
6861
6862            for line in &je.lines {
6863                let acct = &line.gl_account;
6864                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
6865                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
6866            }
6867        }
6868
6869        // Build a TrialBalanceEntry for each account that had activity
6870        let mut all_accounts: Vec<&String> = account_debits
6871            .keys()
6872            .chain(account_credits.keys())
6873            .collect::<std::collections::HashSet<_>>()
6874            .into_iter()
6875            .collect();
6876        all_accounts.sort();
6877
6878        let mut entries = Vec::new();
6879
6880        for acct_number in all_accounts {
6881            let debit = account_debits
6882                .get(acct_number)
6883                .copied()
6884                .unwrap_or(Decimal::ZERO);
6885            let credit = account_credits
6886                .get(acct_number)
6887                .copied()
6888                .unwrap_or(Decimal::ZERO);
6889
6890            if debit.is_zero() && credit.is_zero() {
6891                continue;
6892            }
6893
6894            // Look up account name from CoA, fall back to "Account {code}"
6895            let account_name = coa
6896                .get_account(acct_number)
6897                .map(|gl| gl.short_description.clone())
6898                .unwrap_or_else(|| format!("Account {acct_number}"));
6899
6900            // Map account code prefix to the category strings expected by
6901            // FinancialStatementGenerator (Cash, Receivables, Inventory,
6902            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
6903            // OperatingExpenses).
6904            let category = Self::category_from_account_code(acct_number);
6905
6906            entries.push(datasynth_generators::TrialBalanceEntry {
6907                account_code: acct_number.clone(),
6908                account_name,
6909                category,
6910                debit_balance: debit,
6911                credit_balance: credit,
6912            });
6913        }
6914
6915        entries
6916    }
6917
6918    /// Build a cumulative trial balance by aggregating all JEs from the start up to
6919    /// (and including) the given period end date.
6920    ///
6921    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
6922    /// while income statement accounts (revenue, expenses) show only the current period.
6923    /// The two are merged into a single Vec for the FinancialStatementGenerator.
6924    fn build_cumulative_trial_balance(
6925        journal_entries: &[JournalEntry],
6926        coa: &ChartOfAccounts,
6927        company_code: &str,
6928        start_date: NaiveDate,
6929        period_end: NaiveDate,
6930        fiscal_year: u16,
6931        fiscal_period: u8,
6932    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
6933        use rust_decimal::Decimal;
6934
6935        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
6936        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
6937        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
6938
6939        // Accumulate debits/credits for income statement accounts (current period only)
6940        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
6941        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
6942
6943        for je in journal_entries {
6944            if je.header.company_code != company_code {
6945                continue;
6946            }
6947
6948            for line in &je.lines {
6949                let acct = &line.gl_account;
6950                let category = Self::category_from_account_code(acct);
6951                let is_bs_account = matches!(
6952                    category.as_str(),
6953                    "Cash"
6954                        | "Receivables"
6955                        | "Inventory"
6956                        | "FixedAssets"
6957                        | "Payables"
6958                        | "AccruedLiabilities"
6959                        | "LongTermDebt"
6960                        | "Equity"
6961                );
6962
6963                if is_bs_account {
6964                    // Balance sheet: accumulate from start through period_end
6965                    if je.header.document_date <= period_end
6966                        && je.header.document_date >= start_date
6967                    {
6968                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6969                            line.debit_amount;
6970                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6971                            line.credit_amount;
6972                    }
6973                } else {
6974                    // Income statement: current period only
6975                    if je.header.fiscal_year == fiscal_year
6976                        && je.header.fiscal_period == fiscal_period
6977                    {
6978                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6979                            line.debit_amount;
6980                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
6981                            line.credit_amount;
6982                    }
6983                }
6984            }
6985        }
6986
6987        // Merge all accounts
6988        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
6989        all_accounts.extend(bs_debits.keys().cloned());
6990        all_accounts.extend(bs_credits.keys().cloned());
6991        all_accounts.extend(is_debits.keys().cloned());
6992        all_accounts.extend(is_credits.keys().cloned());
6993
6994        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
6995        sorted_accounts.sort();
6996
6997        let mut entries = Vec::new();
6998
6999        for acct_number in &sorted_accounts {
7000            let category = Self::category_from_account_code(acct_number);
7001            let is_bs_account = matches!(
7002                category.as_str(),
7003                "Cash"
7004                    | "Receivables"
7005                    | "Inventory"
7006                    | "FixedAssets"
7007                    | "Payables"
7008                    | "AccruedLiabilities"
7009                    | "LongTermDebt"
7010                    | "Equity"
7011            );
7012
7013            let (debit, credit) = if is_bs_account {
7014                (
7015                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7016                    bs_credits
7017                        .get(acct_number)
7018                        .copied()
7019                        .unwrap_or(Decimal::ZERO),
7020                )
7021            } else {
7022                (
7023                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7024                    is_credits
7025                        .get(acct_number)
7026                        .copied()
7027                        .unwrap_or(Decimal::ZERO),
7028                )
7029            };
7030
7031            if debit.is_zero() && credit.is_zero() {
7032                continue;
7033            }
7034
7035            let account_name = coa
7036                .get_account(acct_number)
7037                .map(|gl| gl.short_description.clone())
7038                .unwrap_or_else(|| format!("Account {acct_number}"));
7039
7040            entries.push(datasynth_generators::TrialBalanceEntry {
7041                account_code: acct_number.clone(),
7042                account_name,
7043                category,
7044                debit_balance: debit,
7045                credit_balance: credit,
7046            });
7047        }
7048
7049        entries
7050    }
7051
7052    /// Build a JE-derived cash flow statement using the indirect method.
7053    ///
7054    /// Compares current and prior cumulative trial balances to derive working capital
7055    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7056    fn build_cash_flow_from_trial_balances(
7057        current_tb: &[datasynth_generators::TrialBalanceEntry],
7058        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7059        net_income: rust_decimal::Decimal,
7060    ) -> Vec<CashFlowItem> {
7061        use rust_decimal::Decimal;
7062
7063        // Helper: aggregate a TB by category and return net (debit - credit)
7064        let aggregate =
7065            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7066                let mut map: HashMap<String, Decimal> = HashMap::new();
7067                for entry in tb {
7068                    let net = entry.debit_balance - entry.credit_balance;
7069                    *map.entry(entry.category.clone()).or_default() += net;
7070                }
7071                map
7072            };
7073
7074        let current = aggregate(current_tb);
7075        let prior = prior_tb.map(aggregate);
7076
7077        // Get balance for a category, defaulting to zero
7078        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7079            *map.get(key).unwrap_or(&Decimal::ZERO)
7080        };
7081
7082        // Compute change: current - prior (or current if no prior)
7083        let change = |key: &str| -> Decimal {
7084            let curr = get(&current, key);
7085            match &prior {
7086                Some(p) => curr - get(p, key),
7087                None => curr,
7088            }
7089        };
7090
7091        // Operating activities (indirect method)
7092        // Depreciation add-back: approximate from FixedAssets decrease
7093        let fixed_asset_change = change("FixedAssets");
7094        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7095            -fixed_asset_change
7096        } else {
7097            Decimal::ZERO
7098        };
7099
7100        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7101        let ar_change = change("Receivables");
7102        let inventory_change = change("Inventory");
7103        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7104        let ap_change = change("Payables");
7105        let accrued_change = change("AccruedLiabilities");
7106
7107        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7108            + (-ap_change)
7109            + (-accrued_change);
7110
7111        // Investing activities
7112        let capex = if fixed_asset_change > Decimal::ZERO {
7113            -fixed_asset_change
7114        } else {
7115            Decimal::ZERO
7116        };
7117        let investing_cf = capex;
7118
7119        // Financing activities
7120        let debt_change = -change("LongTermDebt");
7121        let equity_change = -change("Equity");
7122        let financing_cf = debt_change + equity_change;
7123
7124        let net_change = operating_cf + investing_cf + financing_cf;
7125
7126        vec![
7127            CashFlowItem {
7128                item_code: "CF-NI".to_string(),
7129                label: "Net Income".to_string(),
7130                category: CashFlowCategory::Operating,
7131                amount: net_income,
7132                amount_prior: None,
7133                sort_order: 1,
7134                is_total: false,
7135            },
7136            CashFlowItem {
7137                item_code: "CF-DEP".to_string(),
7138                label: "Depreciation & Amortization".to_string(),
7139                category: CashFlowCategory::Operating,
7140                amount: depreciation_addback,
7141                amount_prior: None,
7142                sort_order: 2,
7143                is_total: false,
7144            },
7145            CashFlowItem {
7146                item_code: "CF-AR".to_string(),
7147                label: "Change in Accounts Receivable".to_string(),
7148                category: CashFlowCategory::Operating,
7149                amount: -ar_change,
7150                amount_prior: None,
7151                sort_order: 3,
7152                is_total: false,
7153            },
7154            CashFlowItem {
7155                item_code: "CF-AP".to_string(),
7156                label: "Change in Accounts Payable".to_string(),
7157                category: CashFlowCategory::Operating,
7158                amount: -ap_change,
7159                amount_prior: None,
7160                sort_order: 4,
7161                is_total: false,
7162            },
7163            CashFlowItem {
7164                item_code: "CF-INV".to_string(),
7165                label: "Change in Inventory".to_string(),
7166                category: CashFlowCategory::Operating,
7167                amount: -inventory_change,
7168                amount_prior: None,
7169                sort_order: 5,
7170                is_total: false,
7171            },
7172            CashFlowItem {
7173                item_code: "CF-OP".to_string(),
7174                label: "Net Cash from Operating Activities".to_string(),
7175                category: CashFlowCategory::Operating,
7176                amount: operating_cf,
7177                amount_prior: None,
7178                sort_order: 6,
7179                is_total: true,
7180            },
7181            CashFlowItem {
7182                item_code: "CF-CAPEX".to_string(),
7183                label: "Capital Expenditures".to_string(),
7184                category: CashFlowCategory::Investing,
7185                amount: capex,
7186                amount_prior: None,
7187                sort_order: 7,
7188                is_total: false,
7189            },
7190            CashFlowItem {
7191                item_code: "CF-INV-T".to_string(),
7192                label: "Net Cash from Investing Activities".to_string(),
7193                category: CashFlowCategory::Investing,
7194                amount: investing_cf,
7195                amount_prior: None,
7196                sort_order: 8,
7197                is_total: true,
7198            },
7199            CashFlowItem {
7200                item_code: "CF-DEBT".to_string(),
7201                label: "Net Borrowings / (Repayments)".to_string(),
7202                category: CashFlowCategory::Financing,
7203                amount: debt_change,
7204                amount_prior: None,
7205                sort_order: 9,
7206                is_total: false,
7207            },
7208            CashFlowItem {
7209                item_code: "CF-EQ".to_string(),
7210                label: "Equity Changes".to_string(),
7211                category: CashFlowCategory::Financing,
7212                amount: equity_change,
7213                amount_prior: None,
7214                sort_order: 10,
7215                is_total: false,
7216            },
7217            CashFlowItem {
7218                item_code: "CF-FIN-T".to_string(),
7219                label: "Net Cash from Financing Activities".to_string(),
7220                category: CashFlowCategory::Financing,
7221                amount: financing_cf,
7222                amount_prior: None,
7223                sort_order: 11,
7224                is_total: true,
7225            },
7226            CashFlowItem {
7227                item_code: "CF-NET".to_string(),
7228                label: "Net Change in Cash".to_string(),
7229                category: CashFlowCategory::Operating,
7230                amount: net_change,
7231                amount_prior: None,
7232                sort_order: 12,
7233                is_total: true,
7234            },
7235        ]
7236    }
7237
7238    /// Calculate net income from a set of trial balance entries.
7239    ///
7240    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7241    fn calculate_net_income_from_tb(
7242        tb: &[datasynth_generators::TrialBalanceEntry],
7243    ) -> rust_decimal::Decimal {
7244        use rust_decimal::Decimal;
7245
7246        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7247        for entry in tb {
7248            let net = entry.debit_balance - entry.credit_balance;
7249            *aggregated.entry(entry.category.clone()).or_default() += net;
7250        }
7251
7252        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7253        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7254        let opex = *aggregated
7255            .get("OperatingExpenses")
7256            .unwrap_or(&Decimal::ZERO);
7257        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7258        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7259
7260        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7261        // other_income is typically negative (credit), other_expenses is typically positive
7262        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7263        let tax_rate = Decimal::new(25, 2); // 0.25
7264        let tax = operating_income * tax_rate;
7265        operating_income - tax
7266    }
7267
7268    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7269    ///
7270    /// Uses the first two digits of the account code to classify into the categories
7271    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7272    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7273    /// OperatingExpenses, OtherIncome, OtherExpenses.
7274    fn category_from_account_code(code: &str) -> String {
7275        let prefix: String = code.chars().take(2).collect();
7276        match prefix.as_str() {
7277            "10" => "Cash",
7278            "11" => "Receivables",
7279            "12" | "13" | "14" => "Inventory",
7280            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7281            "20" => "Payables",
7282            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7283            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7284            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7285            "40" | "41" | "42" | "43" | "44" => "Revenue",
7286            "50" | "51" | "52" => "CostOfSales",
7287            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7288                "OperatingExpenses"
7289            }
7290            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7291            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7292            _ => "OperatingExpenses",
7293        }
7294        .to_string()
7295    }
7296
7297    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7298    fn phase_hr_data(
7299        &mut self,
7300        stats: &mut EnhancedGenerationStatistics,
7301    ) -> SynthResult<HrSnapshot> {
7302        if !self.phase_config.generate_hr {
7303            debug!("Phase 16: Skipped (HR generation disabled)");
7304            return Ok(HrSnapshot::default());
7305        }
7306
7307        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7308
7309        let seed = self.seed;
7310        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7311            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7312        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7313        let company_code = self
7314            .config
7315            .companies
7316            .first()
7317            .map(|c| c.code.as_str())
7318            .unwrap_or("1000");
7319        let currency = self
7320            .config
7321            .companies
7322            .first()
7323            .map(|c| c.currency.as_str())
7324            .unwrap_or("USD");
7325
7326        let employee_ids: Vec<String> = self
7327            .master_data
7328            .employees
7329            .iter()
7330            .map(|e| e.employee_id.clone())
7331            .collect();
7332
7333        if employee_ids.is_empty() {
7334            debug!("Phase 16: Skipped (no employees available)");
7335            return Ok(HrSnapshot::default());
7336        }
7337
7338        // Extract cost-center pool from master data employees for cross-reference
7339        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7340        let cost_center_ids: Vec<String> = self
7341            .master_data
7342            .employees
7343            .iter()
7344            .filter_map(|e| e.cost_center.clone())
7345            .collect::<std::collections::HashSet<_>>()
7346            .into_iter()
7347            .collect();
7348
7349        let mut snapshot = HrSnapshot::default();
7350
7351        // Generate payroll runs (one per month)
7352        if self.config.hr.payroll.enabled {
7353            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7354                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7355
7356            // Look up country pack for payroll deductions and labels
7357            let payroll_pack = self.primary_pack();
7358
7359            // Store the pack on the generator so generate() resolves
7360            // localized deduction rates and labels from it.
7361            payroll_gen.set_country_pack(payroll_pack.clone());
7362
7363            let employees_with_salary: Vec<(
7364                String,
7365                rust_decimal::Decimal,
7366                Option<String>,
7367                Option<String>,
7368            )> = self
7369                .master_data
7370                .employees
7371                .iter()
7372                .map(|e| {
7373                    // Use the employee's actual annual base salary.
7374                    // Fall back to $60,000 / yr if somehow zero.
7375                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7376                        e.base_salary
7377                    } else {
7378                        rust_decimal::Decimal::from(60_000)
7379                    };
7380                    (
7381                        e.employee_id.clone(),
7382                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7383                        e.cost_center.clone(),
7384                        e.department_id.clone(),
7385                    )
7386                })
7387                .collect();
7388
7389            // Use generate_with_changes when employee change history is available
7390            // so that salary adjustments, transfers, etc. are reflected in payroll.
7391            let change_history = &self.master_data.employee_change_history;
7392            let has_changes = !change_history.is_empty();
7393            if has_changes {
7394                debug!(
7395                    "Payroll will incorporate {} employee change events",
7396                    change_history.len()
7397                );
7398            }
7399
7400            for month in 0..self.config.global.period_months {
7401                let period_start = start_date + chrono::Months::new(month);
7402                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7403                let (run, items) = if has_changes {
7404                    payroll_gen.generate_with_changes(
7405                        company_code,
7406                        &employees_with_salary,
7407                        period_start,
7408                        period_end,
7409                        currency,
7410                        change_history,
7411                    )
7412                } else {
7413                    payroll_gen.generate(
7414                        company_code,
7415                        &employees_with_salary,
7416                        period_start,
7417                        period_end,
7418                        currency,
7419                    )
7420                };
7421                snapshot.payroll_runs.push(run);
7422                snapshot.payroll_run_count += 1;
7423                snapshot.payroll_line_item_count += items.len();
7424                snapshot.payroll_line_items.extend(items);
7425            }
7426        }
7427
7428        // Generate time entries
7429        if self.config.hr.time_attendance.enabled {
7430            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7431                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7432            // v3.4.2: when a temporal context is configured, time entries
7433            // respect holidays (not just weekends) and submitted_at lag
7434            // snaps to business days.
7435            if let Some(ctx) = &self.temporal_context {
7436                time_gen.set_temporal_context(Arc::clone(ctx));
7437            }
7438            let entries = time_gen.generate(
7439                &employee_ids,
7440                start_date,
7441                end_date,
7442                &self.config.hr.time_attendance,
7443            );
7444            snapshot.time_entry_count = entries.len();
7445            snapshot.time_entries = entries;
7446        }
7447
7448        // Generate expense reports
7449        if self.config.hr.expenses.enabled {
7450            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7451                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7452            expense_gen.set_country_pack(self.primary_pack().clone());
7453            // v3.4.2: snap submission / approval / paid / line-item dates
7454            // to business days when temporal_context is present.
7455            if let Some(ctx) = &self.temporal_context {
7456                expense_gen.set_temporal_context(Arc::clone(ctx));
7457            }
7458            let company_currency = self
7459                .config
7460                .companies
7461                .first()
7462                .map(|c| c.currency.as_str())
7463                .unwrap_or("USD");
7464            let reports = expense_gen.generate_with_currency(
7465                &employee_ids,
7466                start_date,
7467                end_date,
7468                &self.config.hr.expenses,
7469                company_currency,
7470            );
7471            snapshot.expense_report_count = reports.len();
7472            snapshot.expense_reports = reports;
7473        }
7474
7475        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7476        if self.config.hr.payroll.enabled {
7477            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7478            let employee_pairs: Vec<(String, String)> = self
7479                .master_data
7480                .employees
7481                .iter()
7482                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7483                .collect();
7484            let enrollments =
7485                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7486            snapshot.benefit_enrollment_count = enrollments.len();
7487            snapshot.benefit_enrollments = enrollments;
7488        }
7489
7490        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7491        if self.phase_config.generate_hr {
7492            let entity_name = self
7493                .config
7494                .companies
7495                .first()
7496                .map(|c| c.name.as_str())
7497                .unwrap_or("Entity");
7498            let period_months = self.config.global.period_months;
7499            let period_label = {
7500                let y = start_date.year();
7501                let m = start_date.month();
7502                if period_months >= 12 {
7503                    format!("FY{y}")
7504                } else {
7505                    format!("{y}-{m:02}")
7506                }
7507            };
7508            let reporting_date =
7509                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7510
7511            // Compute average annual salary from actual payroll data when available.
7512            // PayrollRun.total_gross covers all employees for one pay period; we sum
7513            // across all runs and divide by employee_count to get per-employee total,
7514            // then annualise for sub-annual periods.
7515            let avg_salary: Option<rust_decimal::Decimal> = {
7516                let employee_count = employee_ids.len();
7517                if self.config.hr.payroll.enabled
7518                    && employee_count > 0
7519                    && !snapshot.payroll_runs.is_empty()
7520                {
7521                    // Sum total gross pay across all payroll runs for this company
7522                    let total_gross: rust_decimal::Decimal = snapshot
7523                        .payroll_runs
7524                        .iter()
7525                        .filter(|r| r.company_code == company_code)
7526                        .map(|r| r.total_gross)
7527                        .sum();
7528                    if total_gross > rust_decimal::Decimal::ZERO {
7529                        // Annualise: total_gross covers `period_months` months of pay
7530                        let annual_total = if period_months > 0 && period_months < 12 {
7531                            total_gross * rust_decimal::Decimal::from(12u32)
7532                                / rust_decimal::Decimal::from(period_months)
7533                        } else {
7534                            total_gross
7535                        };
7536                        Some(
7537                            (annual_total / rust_decimal::Decimal::from(employee_count))
7538                                .round_dp(2),
7539                        )
7540                    } else {
7541                        None
7542                    }
7543                } else {
7544                    None
7545                }
7546            };
7547
7548            let mut pension_gen =
7549                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7550            let pension_snap = pension_gen.generate(
7551                company_code,
7552                entity_name,
7553                &period_label,
7554                reporting_date,
7555                employee_ids.len(),
7556                currency,
7557                avg_salary,
7558                period_months,
7559            );
7560            snapshot.pension_plan_count = pension_snap.plans.len();
7561            snapshot.pension_plans = pension_snap.plans;
7562            snapshot.pension_obligations = pension_snap.obligations;
7563            snapshot.pension_plan_assets = pension_snap.plan_assets;
7564            snapshot.pension_disclosures = pension_snap.disclosures;
7565            // Pension JEs are returned here so they can be added to entries
7566            // in the caller (stored temporarily on snapshot for transfer).
7567            // We embed them in the hr snapshot for simplicity; the orchestrator
7568            // will extract and extend `entries`.
7569            snapshot.pension_journal_entries = pension_snap.journal_entries;
7570        }
7571
7572        // Generate stock-based compensation (ASC 718 / IFRS 2)
7573        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7574            let period_months = self.config.global.period_months;
7575            let period_label = {
7576                let y = start_date.year();
7577                let m = start_date.month();
7578                if period_months >= 12 {
7579                    format!("FY{y}")
7580                } else {
7581                    format!("{y}-{m:02}")
7582                }
7583            };
7584            let reporting_date =
7585                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7586
7587            let mut stock_comp_gen =
7588                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7589            let stock_snap = stock_comp_gen.generate(
7590                company_code,
7591                &employee_ids,
7592                start_date,
7593                &period_label,
7594                reporting_date,
7595                currency,
7596            );
7597            snapshot.stock_grant_count = stock_snap.grants.len();
7598            snapshot.stock_grants = stock_snap.grants;
7599            snapshot.stock_comp_expenses = stock_snap.expenses;
7600            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7601        }
7602
7603        stats.payroll_run_count = snapshot.payroll_run_count;
7604        stats.time_entry_count = snapshot.time_entry_count;
7605        stats.expense_report_count = snapshot.expense_report_count;
7606        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7607        stats.pension_plan_count = snapshot.pension_plan_count;
7608        stats.stock_grant_count = snapshot.stock_grant_count;
7609
7610        info!(
7611            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7612            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7613            snapshot.time_entry_count, snapshot.expense_report_count,
7614            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7615            snapshot.stock_grant_count
7616        );
7617        self.check_resources_with_log("post-hr")?;
7618
7619        Ok(snapshot)
7620    }
7621
7622    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7623    fn phase_accounting_standards(
7624        &mut self,
7625        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7626        journal_entries: &[JournalEntry],
7627        stats: &mut EnhancedGenerationStatistics,
7628    ) -> SynthResult<AccountingStandardsSnapshot> {
7629        if !self.phase_config.generate_accounting_standards {
7630            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7631            return Ok(AccountingStandardsSnapshot::default());
7632        }
7633        info!("Phase 17: Generating Accounting Standards Data");
7634
7635        let seed = self.seed;
7636        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7637            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7638        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7639        let company_code = self
7640            .config
7641            .companies
7642            .first()
7643            .map(|c| c.code.as_str())
7644            .unwrap_or("1000");
7645        let currency = self
7646            .config
7647            .companies
7648            .first()
7649            .map(|c| c.currency.as_str())
7650            .unwrap_or("USD");
7651
7652        // Convert config framework to standards framework.
7653        // If the user explicitly set a framework in the YAML config, use that.
7654        // Otherwise, fall back to the country pack's accounting.framework field,
7655        // and if that is also absent or unrecognised, default to US GAAP.
7656        let framework = match self.config.accounting_standards.framework {
7657            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
7658                datasynth_standards::framework::AccountingFramework::UsGaap
7659            }
7660            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
7661                datasynth_standards::framework::AccountingFramework::Ifrs
7662            }
7663            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
7664                datasynth_standards::framework::AccountingFramework::DualReporting
7665            }
7666            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
7667                datasynth_standards::framework::AccountingFramework::FrenchGaap
7668            }
7669            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
7670                datasynth_standards::framework::AccountingFramework::GermanGaap
7671            }
7672            None => {
7673                // Derive framework from the primary company's country pack
7674                let pack = self.primary_pack();
7675                let pack_fw = pack.accounting.framework.as_str();
7676                match pack_fw {
7677                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
7678                    "dual_reporting" => {
7679                        datasynth_standards::framework::AccountingFramework::DualReporting
7680                    }
7681                    "french_gaap" => {
7682                        datasynth_standards::framework::AccountingFramework::FrenchGaap
7683                    }
7684                    "german_gaap" | "hgb" => {
7685                        datasynth_standards::framework::AccountingFramework::GermanGaap
7686                    }
7687                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
7688                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
7689                }
7690            }
7691        };
7692
7693        let mut snapshot = AccountingStandardsSnapshot::default();
7694
7695        // Revenue recognition
7696        if self.config.accounting_standards.revenue_recognition.enabled {
7697            let customer_ids: Vec<String> = self
7698                .master_data
7699                .customers
7700                .iter()
7701                .map(|c| c.customer_id.clone())
7702                .collect();
7703
7704            if !customer_ids.is_empty() {
7705                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
7706                let contracts = rev_gen.generate(
7707                    company_code,
7708                    &customer_ids,
7709                    start_date,
7710                    end_date,
7711                    currency,
7712                    &self.config.accounting_standards.revenue_recognition,
7713                    framework,
7714                );
7715                snapshot.revenue_contract_count = contracts.len();
7716                snapshot.contracts = contracts;
7717            }
7718        }
7719
7720        // Impairment testing
7721        if self.config.accounting_standards.impairment.enabled {
7722            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
7723                .master_data
7724                .assets
7725                .iter()
7726                .map(|a| {
7727                    (
7728                        a.asset_id.clone(),
7729                        a.description.clone(),
7730                        a.acquisition_cost,
7731                    )
7732                })
7733                .collect();
7734
7735            if !asset_data.is_empty() {
7736                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
7737                let tests = imp_gen.generate(
7738                    company_code,
7739                    &asset_data,
7740                    end_date,
7741                    &self.config.accounting_standards.impairment,
7742                    framework,
7743                );
7744                snapshot.impairment_test_count = tests.len();
7745                snapshot.impairment_tests = tests;
7746            }
7747        }
7748
7749        // Business combinations (IFRS 3 / ASC 805)
7750        if self
7751            .config
7752            .accounting_standards
7753            .business_combinations
7754            .enabled
7755        {
7756            let bc_config = &self.config.accounting_standards.business_combinations;
7757            let framework_str = match framework {
7758                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7759                _ => "US_GAAP",
7760            };
7761            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
7762            let bc_snap = bc_gen.generate(
7763                company_code,
7764                currency,
7765                start_date,
7766                end_date,
7767                bc_config.acquisition_count,
7768                framework_str,
7769            );
7770            snapshot.business_combination_count = bc_snap.combinations.len();
7771            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
7772            snapshot.business_combinations = bc_snap.combinations;
7773        }
7774
7775        // Expected Credit Loss (IFRS 9 / ASC 326)
7776        if self
7777            .config
7778            .accounting_standards
7779            .expected_credit_loss
7780            .enabled
7781        {
7782            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
7783            let framework_str = match framework {
7784                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
7785                _ => "ASC_326",
7786            };
7787
7788            // Use AR aging data from the subledger snapshot if available;
7789            // otherwise generate synthetic bucket exposures.
7790            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7791
7792            let mut ecl_gen = EclGenerator::new(seed + 43);
7793
7794            // Collect combined bucket totals across all company AR aging reports.
7795            let bucket_exposures: Vec<(
7796                datasynth_core::models::subledger::ar::AgingBucket,
7797                rust_decimal::Decimal,
7798            )> = if ar_aging_reports.is_empty() {
7799                // No AR aging data — synthesise plausible bucket exposures.
7800                use datasynth_core::models::subledger::ar::AgingBucket;
7801                vec![
7802                    (
7803                        AgingBucket::Current,
7804                        rust_decimal::Decimal::from(500_000_u32),
7805                    ),
7806                    (
7807                        AgingBucket::Days1To30,
7808                        rust_decimal::Decimal::from(120_000_u32),
7809                    ),
7810                    (
7811                        AgingBucket::Days31To60,
7812                        rust_decimal::Decimal::from(45_000_u32),
7813                    ),
7814                    (
7815                        AgingBucket::Days61To90,
7816                        rust_decimal::Decimal::from(15_000_u32),
7817                    ),
7818                    (
7819                        AgingBucket::Over90Days,
7820                        rust_decimal::Decimal::from(8_000_u32),
7821                    ),
7822                ]
7823            } else {
7824                use datasynth_core::models::subledger::ar::AgingBucket;
7825                // Sum bucket totals from all reports.
7826                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
7827                    std::collections::HashMap::new();
7828                for report in ar_aging_reports {
7829                    for (bucket, amount) in &report.bucket_totals {
7830                        *totals.entry(*bucket).or_default() += amount;
7831                    }
7832                }
7833                AgingBucket::all()
7834                    .into_iter()
7835                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
7836                    .collect()
7837            };
7838
7839            let ecl_snap = ecl_gen.generate(
7840                company_code,
7841                end_date,
7842                &bucket_exposures,
7843                ecl_config,
7844                &period_label,
7845                framework_str,
7846            );
7847
7848            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
7849            snapshot.ecl_models = ecl_snap.ecl_models;
7850            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
7851            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
7852        }
7853
7854        // Provisions and contingencies (IAS 37 / ASC 450)
7855        {
7856            let framework_str = match framework {
7857                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
7858                _ => "US_GAAP",
7859            };
7860
7861            // Compute actual revenue from the journal entries generated so far.
7862            // The `journal_entries` slice passed to this phase contains all GL entries
7863            // up to and including Period Close. Fall back to a minimum of 100_000 to
7864            // avoid degenerate zero-based provision amounts on first-period datasets.
7865            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
7866                .max(rust_decimal::Decimal::from(100_000_u32));
7867
7868            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7869
7870            let mut prov_gen = ProvisionGenerator::new(seed + 44);
7871            let prov_snap = prov_gen.generate(
7872                company_code,
7873                currency,
7874                revenue_proxy,
7875                end_date,
7876                &period_label,
7877                framework_str,
7878                None, // prior_opening: no carry-forward data in single-period runs
7879            );
7880
7881            snapshot.provision_count = prov_snap.provisions.len();
7882            snapshot.provisions = prov_snap.provisions;
7883            snapshot.provision_movements = prov_snap.movements;
7884            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
7885            snapshot.provision_journal_entries = prov_snap.journal_entries;
7886        }
7887
7888        // IAS 21 Functional Currency Translation
7889        // For each company whose functional currency differs from the presentation
7890        // currency, generate a CurrencyTranslationResult with CTA (OCI).
7891        {
7892            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
7893
7894            let presentation_currency = self
7895                .config
7896                .global
7897                .presentation_currency
7898                .clone()
7899                .unwrap_or_else(|| self.config.global.group_currency.clone());
7900
7901            // Build a minimal rate table populated with approximate rates from
7902            // the FX model base rates (USD-based) so we can do the translation.
7903            let mut rate_table = FxRateTable::new(&presentation_currency);
7904
7905            // Populate with base rates against USD; if presentation_currency is
7906            // not USD we do a best-effort two-step conversion using the table's
7907            // triangulation support.
7908            let base_rates = base_rates_usd();
7909            for (ccy, rate) in &base_rates {
7910                rate_table.add_rate(FxRate::new(
7911                    ccy,
7912                    "USD",
7913                    RateType::Closing,
7914                    end_date,
7915                    *rate,
7916                    "SYNTHETIC",
7917                ));
7918                // Average rate = 98% of closing (approximation).
7919                // 0.98 = 98/100 = Decimal::new(98, 2)
7920                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
7921                rate_table.add_rate(FxRate::new(
7922                    ccy,
7923                    "USD",
7924                    RateType::Average,
7925                    end_date,
7926                    avg,
7927                    "SYNTHETIC",
7928                ));
7929            }
7930
7931            let mut translation_results = Vec::new();
7932            for company in &self.config.companies {
7933                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
7934                // to ensure the translation produces non-trivial CTA amounts.
7935                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
7936                    .max(rust_decimal::Decimal::from(100_000_u32));
7937
7938                let func_ccy = company
7939                    .functional_currency
7940                    .clone()
7941                    .unwrap_or_else(|| company.currency.clone());
7942
7943                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
7944                    &company.code,
7945                    &func_ccy,
7946                    &presentation_currency,
7947                    &ias21_period_label,
7948                    end_date,
7949                    company_revenue,
7950                    &rate_table,
7951                );
7952                translation_results.push(result);
7953            }
7954
7955            snapshot.currency_translation_count = translation_results.len();
7956            snapshot.currency_translation_results = translation_results;
7957        }
7958
7959        stats.revenue_contract_count = snapshot.revenue_contract_count;
7960        stats.impairment_test_count = snapshot.impairment_test_count;
7961        stats.business_combination_count = snapshot.business_combination_count;
7962        stats.ecl_model_count = snapshot.ecl_model_count;
7963        stats.provision_count = snapshot.provision_count;
7964
7965        // ------------------------------------------------------------
7966        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
7967        // ------------------------------------------------------------
7968        if self.config.accounting_standards.leases.enabled {
7969            use datasynth_generators::standards::LeaseGenerator;
7970            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7971                .unwrap_or_else(|_| {
7972                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
7973                });
7974            let framework =
7975                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
7976            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
7977            for company in &self.config.companies {
7978                let leases = lease_gen.generate(
7979                    &company.code,
7980                    start_date,
7981                    &self.config.accounting_standards.leases,
7982                    framework,
7983                );
7984                snapshot.lease_count += leases.len();
7985                snapshot.leases.extend(leases);
7986            }
7987            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
7988        }
7989
7990        // ------------------------------------------------------------
7991        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
7992        // ------------------------------------------------------------
7993        if self.config.accounting_standards.fair_value.enabled {
7994            use datasynth_generators::standards::FairValueGenerator;
7995            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7996                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
7997                + chrono::Months::new(self.config.global.period_months);
7998            let framework =
7999                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8000            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8001            for company in &self.config.companies {
8002                let measurements = fv_gen.generate(
8003                    &company.code,
8004                    end_date,
8005                    &company.currency,
8006                    &self.config.accounting_standards.fair_value,
8007                    framework,
8008                );
8009                snapshot.fair_value_measurement_count += measurements.len();
8010                snapshot.fair_value_measurements.extend(measurements);
8011            }
8012            info!(
8013                "v3.3.1 fair value measurements: {}",
8014                snapshot.fair_value_measurement_count
8015            );
8016        }
8017
8018        // ------------------------------------------------------------
8019        // v3.3.1: Framework reconciliation (dual reporting only)
8020        // ------------------------------------------------------------
8021        if self.config.accounting_standards.generate_differences
8022            && matches!(
8023                self.config.accounting_standards.framework,
8024                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8025            )
8026        {
8027            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8028            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8029                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8030                + chrono::Months::new(self.config.global.period_months);
8031            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8032            for company in &self.config.companies {
8033                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8034                snapshot.framework_difference_count += records.len();
8035                snapshot.framework_differences.extend(records);
8036                snapshot.framework_reconciliations.push(reconciliation);
8037            }
8038            info!(
8039                "v3.3.1 framework reconciliation: {} differences across {} entities",
8040                snapshot.framework_difference_count,
8041                snapshot.framework_reconciliations.len()
8042            );
8043        }
8044
8045        info!(
8046            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8047            snapshot.revenue_contract_count,
8048            snapshot.impairment_test_count,
8049            snapshot.business_combination_count,
8050            snapshot.ecl_model_count,
8051            snapshot.provision_count,
8052            snapshot.currency_translation_count,
8053            snapshot.lease_count,
8054            snapshot.fair_value_measurement_count,
8055            snapshot.framework_difference_count,
8056        );
8057        self.check_resources_with_log("post-accounting-standards")?;
8058
8059        Ok(snapshot)
8060    }
8061
8062    /// v3.3.1: helper to resolve the accounting-standards framework enum
8063    /// from config into the `datasynth_standards::framework::AccountingFramework`
8064    /// type expected by standards generators. Falls back to US GAAP.
8065    fn resolve_accounting_framework(
8066        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8067    ) -> datasynth_standards::framework::AccountingFramework {
8068        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8069        use datasynth_standards::framework::AccountingFramework as Fw;
8070        match cfg {
8071            Some(Cfg::Ifrs) => Fw::Ifrs,
8072            Some(Cfg::DualReporting) => Fw::DualReporting,
8073            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8074            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8075            _ => Fw::UsGaap,
8076        }
8077    }
8078
8079    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8080    fn phase_manufacturing(
8081        &mut self,
8082        stats: &mut EnhancedGenerationStatistics,
8083    ) -> SynthResult<ManufacturingSnapshot> {
8084        if !self.phase_config.generate_manufacturing {
8085            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8086            return Ok(ManufacturingSnapshot::default());
8087        }
8088        info!("Phase 18: Generating Manufacturing Data");
8089
8090        let seed = self.seed;
8091        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8092            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8093        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8094        let company_code = self
8095            .config
8096            .companies
8097            .first()
8098            .map(|c| c.code.as_str())
8099            .unwrap_or("1000");
8100
8101        let material_data: Vec<(String, String)> = self
8102            .master_data
8103            .materials
8104            .iter()
8105            .map(|m| (m.material_id.clone(), m.description.clone()))
8106            .collect();
8107
8108        if material_data.is_empty() {
8109            debug!("Phase 18: Skipped (no materials available)");
8110            return Ok(ManufacturingSnapshot::default());
8111        }
8112
8113        let mut snapshot = ManufacturingSnapshot::default();
8114
8115        // Generate production orders
8116        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8117        // v3.4.3: snap planned / actual / operation dates to business days.
8118        if let Some(ctx) = &self.temporal_context {
8119            prod_gen.set_temporal_context(Arc::clone(ctx));
8120        }
8121        let production_orders = prod_gen.generate(
8122            company_code,
8123            &material_data,
8124            start_date,
8125            end_date,
8126            &self.config.manufacturing.production_orders,
8127            &self.config.manufacturing.costing,
8128            &self.config.manufacturing.routing,
8129        );
8130        snapshot.production_order_count = production_orders.len();
8131
8132        // Generate quality inspections from production orders
8133        let inspection_data: Vec<(String, String, String)> = production_orders
8134            .iter()
8135            .map(|po| {
8136                (
8137                    po.order_id.clone(),
8138                    po.material_id.clone(),
8139                    po.material_description.clone(),
8140                )
8141            })
8142            .collect();
8143
8144        snapshot.production_orders = production_orders;
8145
8146        if !inspection_data.is_empty() {
8147            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8148            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8149            snapshot.quality_inspection_count = inspections.len();
8150            snapshot.quality_inspections = inspections;
8151        }
8152
8153        // Generate cycle counts (one per month)
8154        let storage_locations: Vec<(String, String)> = material_data
8155            .iter()
8156            .enumerate()
8157            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8158            .collect();
8159
8160        let employee_ids: Vec<String> = self
8161            .master_data
8162            .employees
8163            .iter()
8164            .map(|e| e.employee_id.clone())
8165            .collect();
8166        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8167            .with_employee_pool(employee_ids);
8168        let mut cycle_count_total = 0usize;
8169        for month in 0..self.config.global.period_months {
8170            let count_date = start_date + chrono::Months::new(month);
8171            let items_per_count = storage_locations.len().clamp(10, 50);
8172            let cc = cc_gen.generate(
8173                company_code,
8174                &storage_locations,
8175                count_date,
8176                items_per_count,
8177            );
8178            snapshot.cycle_counts.push(cc);
8179            cycle_count_total += 1;
8180        }
8181        snapshot.cycle_count_count = cycle_count_total;
8182
8183        // Generate BOM components
8184        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8185        let bom_components = bom_gen.generate(company_code, &material_data);
8186        snapshot.bom_component_count = bom_components.len();
8187        snapshot.bom_components = bom_components;
8188
8189        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8190        let currency = self
8191            .config
8192            .companies
8193            .first()
8194            .map(|c| c.currency.as_str())
8195            .unwrap_or("USD");
8196        let production_order_ids: Vec<String> = snapshot
8197            .production_orders
8198            .iter()
8199            .map(|po| po.order_id.clone())
8200            .collect();
8201        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8202        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8203            company_code,
8204            &material_data,
8205            start_date,
8206            end_date,
8207            2,
8208            currency,
8209            &production_order_ids,
8210        );
8211        snapshot.inventory_movement_count = inventory_movements.len();
8212        snapshot.inventory_movements = inventory_movements;
8213
8214        stats.production_order_count = snapshot.production_order_count;
8215        stats.quality_inspection_count = snapshot.quality_inspection_count;
8216        stats.cycle_count_count = snapshot.cycle_count_count;
8217        stats.bom_component_count = snapshot.bom_component_count;
8218        stats.inventory_movement_count = snapshot.inventory_movement_count;
8219
8220        info!(
8221            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8222            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8223            snapshot.bom_component_count, snapshot.inventory_movement_count
8224        );
8225        self.check_resources_with_log("post-manufacturing")?;
8226
8227        Ok(snapshot)
8228    }
8229
8230    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8231    fn phase_sales_kpi_budgets(
8232        &mut self,
8233        coa: &Arc<ChartOfAccounts>,
8234        financial_reporting: &FinancialReportingSnapshot,
8235        stats: &mut EnhancedGenerationStatistics,
8236    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8237        if !self.phase_config.generate_sales_kpi_budgets {
8238            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8239            return Ok(SalesKpiBudgetsSnapshot::default());
8240        }
8241        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8242
8243        let seed = self.seed;
8244        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8245            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8246        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8247        let company_code = self
8248            .config
8249            .companies
8250            .first()
8251            .map(|c| c.code.as_str())
8252            .unwrap_or("1000");
8253
8254        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8255
8256        // Sales Quotes
8257        if self.config.sales_quotes.enabled {
8258            let customer_data: Vec<(String, String)> = self
8259                .master_data
8260                .customers
8261                .iter()
8262                .map(|c| (c.customer_id.clone(), c.name.clone()))
8263                .collect();
8264            let material_data: Vec<(String, String)> = self
8265                .master_data
8266                .materials
8267                .iter()
8268                .map(|m| (m.material_id.clone(), m.description.clone()))
8269                .collect();
8270
8271            if !customer_data.is_empty() && !material_data.is_empty() {
8272                let employee_ids: Vec<String> = self
8273                    .master_data
8274                    .employees
8275                    .iter()
8276                    .map(|e| e.employee_id.clone())
8277                    .collect();
8278                let customer_ids: Vec<String> = self
8279                    .master_data
8280                    .customers
8281                    .iter()
8282                    .map(|c| c.customer_id.clone())
8283                    .collect();
8284                let company_currency = self
8285                    .config
8286                    .companies
8287                    .first()
8288                    .map(|c| c.currency.as_str())
8289                    .unwrap_or("USD");
8290
8291                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8292                    .with_pools(employee_ids, customer_ids);
8293                let quotes = quote_gen.generate_with_currency(
8294                    company_code,
8295                    &customer_data,
8296                    &material_data,
8297                    start_date,
8298                    end_date,
8299                    &self.config.sales_quotes,
8300                    company_currency,
8301                );
8302                snapshot.sales_quote_count = quotes.len();
8303                snapshot.sales_quotes = quotes;
8304            }
8305        }
8306
8307        // Management KPIs
8308        if self.config.financial_reporting.management_kpis.enabled {
8309            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8310            let mut kpis = kpi_gen.generate(
8311                company_code,
8312                start_date,
8313                end_date,
8314                &self.config.financial_reporting.management_kpis,
8315            );
8316
8317            // Override financial KPIs with actual data from financial statements
8318            {
8319                use rust_decimal::Decimal;
8320
8321                if let Some(income_stmt) =
8322                    financial_reporting.financial_statements.iter().find(|fs| {
8323                        fs.statement_type == StatementType::IncomeStatement
8324                            && fs.company_code == company_code
8325                    })
8326                {
8327                    // Extract revenue and COGS from income statement line items
8328                    let total_revenue: Decimal = income_stmt
8329                        .line_items
8330                        .iter()
8331                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8332                        .map(|li| li.amount)
8333                        .sum();
8334                    let total_cogs: Decimal = income_stmt
8335                        .line_items
8336                        .iter()
8337                        .filter(|li| {
8338                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8339                                && !li.is_total
8340                        })
8341                        .map(|li| li.amount.abs())
8342                        .sum();
8343                    let total_opex: Decimal = income_stmt
8344                        .line_items
8345                        .iter()
8346                        .filter(|li| {
8347                            li.section.contains("Expense")
8348                                && !li.is_total
8349                                && !li.section.contains("Cost")
8350                        })
8351                        .map(|li| li.amount.abs())
8352                        .sum();
8353
8354                    if total_revenue > Decimal::ZERO {
8355                        let hundred = Decimal::from(100);
8356                        let gross_margin_pct =
8357                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8358                        let operating_income = total_revenue - total_cogs - total_opex;
8359                        let op_margin_pct =
8360                            (operating_income * hundred / total_revenue).round_dp(2);
8361
8362                        // Override gross margin and operating margin KPIs
8363                        for kpi in &mut kpis {
8364                            if kpi.name == "Gross Margin" {
8365                                kpi.value = gross_margin_pct;
8366                            } else if kpi.name == "Operating Margin" {
8367                                kpi.value = op_margin_pct;
8368                            }
8369                        }
8370                    }
8371                }
8372
8373                // Override Current Ratio from balance sheet
8374                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8375                    fs.statement_type == StatementType::BalanceSheet
8376                        && fs.company_code == company_code
8377                }) {
8378                    let current_assets: Decimal = bs
8379                        .line_items
8380                        .iter()
8381                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8382                        .map(|li| li.amount)
8383                        .sum();
8384                    let current_liabilities: Decimal = bs
8385                        .line_items
8386                        .iter()
8387                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8388                        .map(|li| li.amount.abs())
8389                        .sum();
8390
8391                    if current_liabilities > Decimal::ZERO {
8392                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8393                        for kpi in &mut kpis {
8394                            if kpi.name == "Current Ratio" {
8395                                kpi.value = current_ratio;
8396                            }
8397                        }
8398                    }
8399                }
8400            }
8401
8402            snapshot.kpi_count = kpis.len();
8403            snapshot.kpis = kpis;
8404        }
8405
8406        // Budgets
8407        if self.config.financial_reporting.budgets.enabled {
8408            let account_data: Vec<(String, String)> = coa
8409                .accounts
8410                .iter()
8411                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8412                .collect();
8413
8414            if !account_data.is_empty() {
8415                let fiscal_year = start_date.year() as u32;
8416                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8417                let budget = budget_gen.generate(
8418                    company_code,
8419                    fiscal_year,
8420                    &account_data,
8421                    &self.config.financial_reporting.budgets,
8422                );
8423                snapshot.budget_line_count = budget.line_items.len();
8424                snapshot.budgets.push(budget);
8425            }
8426        }
8427
8428        stats.sales_quote_count = snapshot.sales_quote_count;
8429        stats.kpi_count = snapshot.kpi_count;
8430        stats.budget_line_count = snapshot.budget_line_count;
8431
8432        info!(
8433            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8434            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8435        );
8436        self.check_resources_with_log("post-sales-kpi-budgets")?;
8437
8438        Ok(snapshot)
8439    }
8440
8441    /// Compute pre-tax income for a single company from actual journal entries.
8442    ///
8443    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8444    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8445    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8446    /// and the period-close engine so that all three use a consistent definition.
8447    fn compute_pre_tax_income(
8448        company_code: &str,
8449        journal_entries: &[JournalEntry],
8450    ) -> rust_decimal::Decimal {
8451        use datasynth_core::accounts::AccountCategory;
8452        use rust_decimal::Decimal;
8453
8454        let mut total_revenue = Decimal::ZERO;
8455        let mut total_expenses = Decimal::ZERO;
8456
8457        for je in journal_entries {
8458            if je.header.company_code != company_code {
8459                continue;
8460            }
8461            for line in &je.lines {
8462                let cat = AccountCategory::from_account(&line.gl_account);
8463                match cat {
8464                    AccountCategory::Revenue => {
8465                        total_revenue += line.credit_amount - line.debit_amount;
8466                    }
8467                    AccountCategory::Cogs
8468                    | AccountCategory::OperatingExpense
8469                    | AccountCategory::OtherIncomeExpense => {
8470                        total_expenses += line.debit_amount - line.credit_amount;
8471                    }
8472                    _ => {}
8473                }
8474            }
8475        }
8476
8477        let pti = (total_revenue - total_expenses).round_dp(2);
8478        if pti == rust_decimal::Decimal::ZERO {
8479            // No income statement activity yet — fall back to a synthetic value so the
8480            // tax provision generator can still produce meaningful output.
8481            rust_decimal::Decimal::from(1_000_000u32)
8482        } else {
8483            pti
8484        }
8485    }
8486
8487    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8488    fn phase_tax_generation(
8489        &mut self,
8490        document_flows: &DocumentFlowSnapshot,
8491        journal_entries: &[JournalEntry],
8492        stats: &mut EnhancedGenerationStatistics,
8493    ) -> SynthResult<TaxSnapshot> {
8494        if !self.phase_config.generate_tax {
8495            debug!("Phase 20: Skipped (tax generation disabled)");
8496            return Ok(TaxSnapshot::default());
8497        }
8498        info!("Phase 20: Generating Tax Data");
8499
8500        let seed = self.seed;
8501        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8502            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8503        let fiscal_year = start_date.year();
8504        let company_code = self
8505            .config
8506            .companies
8507            .first()
8508            .map(|c| c.code.as_str())
8509            .unwrap_or("1000");
8510
8511        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8512            seed + 370,
8513            self.config.tax.clone(),
8514        );
8515
8516        let pack = self.primary_pack().clone();
8517        let (jurisdictions, codes) =
8518            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8519
8520        // Generate tax provisions for each company
8521        let mut provisions = Vec::new();
8522        if self.config.tax.provisions.enabled {
8523            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
8524            for company in &self.config.companies {
8525                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
8526                let statutory_rate = rust_decimal::Decimal::new(
8527                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
8528                    2,
8529                );
8530                let provision = provision_gen.generate(
8531                    &company.code,
8532                    start_date,
8533                    pre_tax_income,
8534                    statutory_rate,
8535                );
8536                provisions.push(provision);
8537            }
8538        }
8539
8540        // Generate tax lines from document invoices
8541        let mut tax_lines = Vec::new();
8542        if !codes.is_empty() {
8543            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
8544                datasynth_generators::TaxLineGeneratorConfig::default(),
8545                codes.clone(),
8546                seed + 372,
8547            );
8548
8549            // Tax lines from vendor invoices (input tax)
8550            // Use the first company's country as buyer country
8551            let buyer_country = self
8552                .config
8553                .companies
8554                .first()
8555                .map(|c| c.country.as_str())
8556                .unwrap_or("US");
8557            for vi in &document_flows.vendor_invoices {
8558                let lines = tax_line_gen.generate_for_document(
8559                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
8560                    &vi.header.document_id,
8561                    buyer_country, // seller approx same country
8562                    buyer_country,
8563                    vi.payable_amount,
8564                    vi.header.document_date,
8565                    None,
8566                );
8567                tax_lines.extend(lines);
8568            }
8569
8570            // Tax lines from customer invoices (output tax)
8571            for ci in &document_flows.customer_invoices {
8572                let lines = tax_line_gen.generate_for_document(
8573                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
8574                    &ci.header.document_id,
8575                    buyer_country, // seller is the company
8576                    buyer_country,
8577                    ci.total_gross_amount,
8578                    ci.header.document_date,
8579                    None,
8580                );
8581                tax_lines.extend(lines);
8582            }
8583        }
8584
8585        // Generate deferred tax data (IAS 12 / ASC 740) for each company
8586        let deferred_tax = {
8587            let companies: Vec<(&str, &str)> = self
8588                .config
8589                .companies
8590                .iter()
8591                .map(|c| (c.code.as_str(), c.country.as_str()))
8592                .collect();
8593            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
8594            deferred_gen.generate(&companies, start_date, journal_entries)
8595        };
8596
8597        // Build a document_id → posting_date map so each tax JE uses its
8598        // source document's date rather than a blanket period-end date.
8599        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
8600            std::collections::HashMap::new();
8601        for vi in &document_flows.vendor_invoices {
8602            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
8603        }
8604        for ci in &document_flows.customer_invoices {
8605            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
8606        }
8607
8608        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
8609        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610        let tax_posting_journal_entries = if !tax_lines.is_empty() {
8611            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
8612                &tax_lines,
8613                company_code,
8614                &doc_dates,
8615                end_date,
8616            );
8617            debug!("Generated {} tax posting JEs", jes.len());
8618            jes
8619        } else {
8620            Vec::new()
8621        };
8622
8623        let snapshot = TaxSnapshot {
8624            jurisdiction_count: jurisdictions.len(),
8625            code_count: codes.len(),
8626            jurisdictions,
8627            codes,
8628            tax_provisions: provisions,
8629            tax_lines,
8630            tax_returns: Vec::new(),
8631            withholding_records: Vec::new(),
8632            tax_anomaly_labels: Vec::new(),
8633            deferred_tax,
8634            tax_posting_journal_entries,
8635        };
8636
8637        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
8638        stats.tax_code_count = snapshot.code_count;
8639        stats.tax_provision_count = snapshot.tax_provisions.len();
8640        stats.tax_line_count = snapshot.tax_lines.len();
8641
8642        info!(
8643            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
8644            snapshot.jurisdiction_count,
8645            snapshot.code_count,
8646            snapshot.tax_provisions.len(),
8647            snapshot.deferred_tax.temporary_differences.len(),
8648            snapshot.deferred_tax.journal_entries.len(),
8649            snapshot.tax_posting_journal_entries.len(),
8650        );
8651        self.check_resources_with_log("post-tax")?;
8652
8653        Ok(snapshot)
8654    }
8655
8656    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
8657    fn phase_esg_generation(
8658        &mut self,
8659        document_flows: &DocumentFlowSnapshot,
8660        manufacturing: &ManufacturingSnapshot,
8661        stats: &mut EnhancedGenerationStatistics,
8662    ) -> SynthResult<EsgSnapshot> {
8663        if !self.phase_config.generate_esg {
8664            debug!("Phase 21: Skipped (ESG generation disabled)");
8665            return Ok(EsgSnapshot::default());
8666        }
8667        let degradation = self.check_resources()?;
8668        if degradation >= DegradationLevel::Reduced {
8669            debug!(
8670                "Phase skipped due to resource pressure (degradation: {:?})",
8671                degradation
8672            );
8673            return Ok(EsgSnapshot::default());
8674        }
8675        info!("Phase 21: Generating ESG Data");
8676
8677        let seed = self.seed;
8678        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8679            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8680        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8681        let entity_id = self
8682            .config
8683            .companies
8684            .first()
8685            .map(|c| c.code.as_str())
8686            .unwrap_or("1000");
8687
8688        let esg_cfg = &self.config.esg;
8689        let mut snapshot = EsgSnapshot::default();
8690
8691        // Energy consumption (feeds into scope 1 & 2 emissions)
8692        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
8693            esg_cfg.environmental.energy.clone(),
8694            seed + 80,
8695        );
8696        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
8697
8698        // Water usage
8699        let facility_count = esg_cfg.environmental.energy.facility_count;
8700        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
8701        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
8702
8703        // Waste
8704        let mut waste_gen = datasynth_generators::WasteGenerator::new(
8705            seed + 82,
8706            esg_cfg.environmental.waste.diversion_target,
8707            facility_count,
8708        );
8709        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
8710
8711        // Emissions (scope 1, 2, 3)
8712        let mut emission_gen =
8713            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
8714
8715        // Build EnergyInput from energy_records
8716        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
8717            .iter()
8718            .map(|e| datasynth_generators::EnergyInput {
8719                facility_id: e.facility_id.clone(),
8720                energy_type: match e.energy_source {
8721                    EnergySourceType::NaturalGas => {
8722                        datasynth_generators::EnergyInputType::NaturalGas
8723                    }
8724                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
8725                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
8726                    _ => datasynth_generators::EnergyInputType::Electricity,
8727                },
8728                consumption_kwh: e.consumption_kwh,
8729                period: e.period,
8730            })
8731            .collect();
8732
8733        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
8734        if !manufacturing.production_orders.is_empty() {
8735            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
8736                &manufacturing.production_orders,
8737                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
8738                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
8739            );
8740            if !mfg_energy.is_empty() {
8741                info!(
8742                    "ESG: {} energy inputs derived from {} production orders",
8743                    mfg_energy.len(),
8744                    manufacturing.production_orders.len(),
8745                );
8746                energy_inputs.extend(mfg_energy);
8747            }
8748        }
8749
8750        let mut emissions = Vec::new();
8751        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
8752        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
8753
8754        // Scope 3: use vendor spend data from actual payments
8755        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
8756            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
8757            for payment in &document_flows.payments {
8758                if payment.is_vendor {
8759                    *totals
8760                        .entry(payment.business_partner_id.clone())
8761                        .or_default() += payment.amount;
8762                }
8763            }
8764            totals
8765        };
8766        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
8767            .master_data
8768            .vendors
8769            .iter()
8770            .map(|v| {
8771                let spend = vendor_payment_totals
8772                    .get(&v.vendor_id)
8773                    .copied()
8774                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
8775                datasynth_generators::VendorSpendInput {
8776                    vendor_id: v.vendor_id.clone(),
8777                    category: format!("{:?}", v.vendor_type).to_lowercase(),
8778                    spend,
8779                    country: v.country.clone(),
8780                }
8781            })
8782            .collect();
8783        if !vendor_spend.is_empty() {
8784            emissions.extend(emission_gen.generate_scope3_purchased_goods(
8785                entity_id,
8786                &vendor_spend,
8787                start_date,
8788                end_date,
8789            ));
8790        }
8791
8792        // Business travel & commuting (scope 3)
8793        let headcount = self.master_data.employees.len() as u32;
8794        if headcount > 0 {
8795            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
8796            emissions.extend(emission_gen.generate_scope3_business_travel(
8797                entity_id,
8798                travel_spend,
8799                start_date,
8800            ));
8801            emissions
8802                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
8803        }
8804
8805        snapshot.emission_count = emissions.len();
8806        snapshot.emissions = emissions;
8807        snapshot.energy = energy_records;
8808
8809        // Social: Workforce diversity, pay equity, safety
8810        let mut workforce_gen =
8811            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
8812        let total_headcount = headcount.max(100);
8813        snapshot.diversity =
8814            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
8815        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
8816
8817        // v2.4: Derive additional workforce diversity metrics from actual employee data
8818        if !self.master_data.employees.is_empty() {
8819            let hr_diversity = workforce_gen.generate_diversity_from_employees(
8820                entity_id,
8821                &self.master_data.employees,
8822                end_date,
8823            );
8824            if !hr_diversity.is_empty() {
8825                info!(
8826                    "ESG: {} diversity metrics derived from {} actual employees",
8827                    hr_diversity.len(),
8828                    self.master_data.employees.len(),
8829                );
8830                snapshot.diversity.extend(hr_diversity);
8831            }
8832        }
8833
8834        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
8835            entity_id,
8836            facility_count,
8837            start_date,
8838            end_date,
8839        );
8840
8841        // Compute safety metrics
8842        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
8843        let safety_metric = workforce_gen.compute_safety_metrics(
8844            entity_id,
8845            &snapshot.safety_incidents,
8846            total_hours,
8847            start_date,
8848        );
8849        snapshot.safety_metrics = vec![safety_metric];
8850
8851        // Governance
8852        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
8853            seed + 85,
8854            esg_cfg.governance.board_size,
8855            esg_cfg.governance.independence_target,
8856        );
8857        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
8858
8859        // Supplier ESG assessments
8860        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
8861            esg_cfg.supply_chain_esg.clone(),
8862            seed + 86,
8863        );
8864        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
8865            .master_data
8866            .vendors
8867            .iter()
8868            .map(|v| datasynth_generators::VendorInput {
8869                vendor_id: v.vendor_id.clone(),
8870                country: v.country.clone(),
8871                industry: format!("{:?}", v.vendor_type).to_lowercase(),
8872                quality_score: None,
8873            })
8874            .collect();
8875        snapshot.supplier_assessments =
8876            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
8877
8878        // Disclosures
8879        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
8880            seed + 87,
8881            esg_cfg.reporting.clone(),
8882            esg_cfg.climate_scenarios.clone(),
8883        );
8884        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
8885        snapshot.disclosures = disclosure_gen.generate_disclosures(
8886            entity_id,
8887            &snapshot.materiality,
8888            start_date,
8889            end_date,
8890        );
8891        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
8892        snapshot.disclosure_count = snapshot.disclosures.len();
8893
8894        // Anomaly injection
8895        if esg_cfg.anomaly_rate > 0.0 {
8896            let mut anomaly_injector =
8897                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
8898            let mut labels = Vec::new();
8899            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
8900            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
8901            labels.extend(
8902                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
8903            );
8904            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
8905            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
8906            snapshot.anomaly_labels = labels;
8907        }
8908
8909        stats.esg_emission_count = snapshot.emission_count;
8910        stats.esg_disclosure_count = snapshot.disclosure_count;
8911
8912        info!(
8913            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
8914            snapshot.emission_count,
8915            snapshot.disclosure_count,
8916            snapshot.supplier_assessments.len()
8917        );
8918        self.check_resources_with_log("post-esg")?;
8919
8920        Ok(snapshot)
8921    }
8922
8923    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
8924    fn phase_treasury_data(
8925        &mut self,
8926        document_flows: &DocumentFlowSnapshot,
8927        subledger: &SubledgerSnapshot,
8928        intercompany: &IntercompanySnapshot,
8929        stats: &mut EnhancedGenerationStatistics,
8930    ) -> SynthResult<TreasurySnapshot> {
8931        if !self.phase_config.generate_treasury {
8932            debug!("Phase 22: Skipped (treasury generation disabled)");
8933            return Ok(TreasurySnapshot::default());
8934        }
8935        let degradation = self.check_resources()?;
8936        if degradation >= DegradationLevel::Reduced {
8937            debug!(
8938                "Phase skipped due to resource pressure (degradation: {:?})",
8939                degradation
8940            );
8941            return Ok(TreasurySnapshot::default());
8942        }
8943        info!("Phase 22: Generating Treasury Data");
8944
8945        let seed = self.seed;
8946        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8947            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8948        let currency = self
8949            .config
8950            .companies
8951            .first()
8952            .map(|c| c.currency.as_str())
8953            .unwrap_or("USD");
8954        let entity_id = self
8955            .config
8956            .companies
8957            .first()
8958            .map(|c| c.code.as_str())
8959            .unwrap_or("1000");
8960
8961        let mut snapshot = TreasurySnapshot::default();
8962
8963        // Generate debt instruments
8964        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
8965            self.config.treasury.debt.clone(),
8966            seed + 90,
8967        );
8968        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
8969
8970        // Generate hedging instruments (IR swaps for floating-rate debt)
8971        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
8972            self.config.treasury.hedging.clone(),
8973            seed + 91,
8974        );
8975        for debt in &snapshot.debt_instruments {
8976            if debt.rate_type == InterestRateType::Variable {
8977                let swap = hedge_gen.generate_ir_swap(
8978                    currency,
8979                    debt.principal,
8980                    debt.origination_date,
8981                    debt.maturity_date,
8982                );
8983                snapshot.hedging_instruments.push(swap);
8984            }
8985        }
8986
8987        // Build FX exposures from foreign-currency payments and generate
8988        // FX forwards + hedge relationship designations via generate() API.
8989        {
8990            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
8991            for payment in &document_flows.payments {
8992                if payment.currency != currency {
8993                    let entry = fx_map
8994                        .entry(payment.currency.clone())
8995                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
8996                    entry.0 += payment.amount;
8997                    // Use the latest settlement date among grouped payments
8998                    if payment.header.document_date > entry.1 {
8999                        entry.1 = payment.header.document_date;
9000                    }
9001                }
9002            }
9003            if !fx_map.is_empty() {
9004                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9005                    .into_iter()
9006                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9007                        datasynth_generators::treasury::FxExposure {
9008                            currency_pair: format!("{foreign_ccy}/{currency}"),
9009                            foreign_currency: foreign_ccy,
9010                            net_amount,
9011                            settlement_date,
9012                            description: "AP payment FX exposure".to_string(),
9013                        }
9014                    })
9015                    .collect();
9016                let (fx_instruments, fx_relationships) =
9017                    hedge_gen.generate(start_date, &fx_exposures);
9018                snapshot.hedging_instruments.extend(fx_instruments);
9019                snapshot.hedge_relationships.extend(fx_relationships);
9020            }
9021        }
9022
9023        // Inject anomalies if configured
9024        if self.config.treasury.anomaly_rate > 0.0 {
9025            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9026                seed + 92,
9027                self.config.treasury.anomaly_rate,
9028            );
9029            let mut labels = Vec::new();
9030            labels.extend(
9031                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9032            );
9033            snapshot.treasury_anomaly_labels = labels;
9034        }
9035
9036        // Generate cash positions from payment flows
9037        if self.config.treasury.cash_positioning.enabled {
9038            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9039
9040            // AP payments as outflows
9041            for payment in &document_flows.payments {
9042                cash_flows.push(datasynth_generators::treasury::CashFlow {
9043                    date: payment.header.document_date,
9044                    account_id: format!("{entity_id}-MAIN"),
9045                    amount: payment.amount,
9046                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9047                });
9048            }
9049
9050            // Customer receipts (from O2C chains) as inflows
9051            for chain in &document_flows.o2c_chains {
9052                if let Some(ref receipt) = chain.customer_receipt {
9053                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9054                        date: receipt.header.document_date,
9055                        account_id: format!("{entity_id}-MAIN"),
9056                        amount: receipt.amount,
9057                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9058                    });
9059                }
9060                // Remainder receipts (follow-up to partial payments)
9061                for receipt in &chain.remainder_receipts {
9062                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9063                        date: receipt.header.document_date,
9064                        account_id: format!("{entity_id}-MAIN"),
9065                        amount: receipt.amount,
9066                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9067                    });
9068                }
9069            }
9070
9071            if !cash_flows.is_empty() {
9072                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9073                    self.config.treasury.cash_positioning.clone(),
9074                    seed + 93,
9075                );
9076                let account_id = format!("{entity_id}-MAIN");
9077                snapshot.cash_positions = cash_gen.generate(
9078                    entity_id,
9079                    &account_id,
9080                    currency,
9081                    &cash_flows,
9082                    start_date,
9083                    start_date + chrono::Months::new(self.config.global.period_months),
9084                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9085                );
9086            }
9087        }
9088
9089        // Generate cash forecasts from AR/AP aging
9090        if self.config.treasury.cash_forecasting.enabled {
9091            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9092
9093            // Build AR aging items from subledger AR invoices
9094            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9095                .ar_invoices
9096                .iter()
9097                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9098                .map(|inv| {
9099                    let days_past_due = if inv.due_date < end_date {
9100                        (end_date - inv.due_date).num_days().max(0) as u32
9101                    } else {
9102                        0
9103                    };
9104                    datasynth_generators::treasury::ArAgingItem {
9105                        expected_date: inv.due_date,
9106                        amount: inv.amount_remaining,
9107                        days_past_due,
9108                        document_id: inv.invoice_number.clone(),
9109                    }
9110                })
9111                .collect();
9112
9113            // Build AP aging items from subledger AP invoices
9114            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9115                .ap_invoices
9116                .iter()
9117                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9118                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9119                    payment_date: inv.due_date,
9120                    amount: inv.amount_remaining,
9121                    document_id: inv.invoice_number.clone(),
9122                })
9123                .collect();
9124
9125            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9126                self.config.treasury.cash_forecasting.clone(),
9127                seed + 94,
9128            );
9129            let forecast = forecast_gen.generate(
9130                entity_id,
9131                currency,
9132                end_date,
9133                &ar_items,
9134                &ap_items,
9135                &[], // scheduled disbursements - empty for now
9136            );
9137            snapshot.cash_forecasts.push(forecast);
9138        }
9139
9140        // Generate cash pools and sweeps
9141        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9142            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9143            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9144                self.config.treasury.cash_pooling.clone(),
9145                seed + 95,
9146            );
9147
9148            // Create a pool from available accounts
9149            let account_ids: Vec<String> = snapshot
9150                .cash_positions
9151                .iter()
9152                .map(|cp| cp.bank_account_id.clone())
9153                .collect::<std::collections::HashSet<_>>()
9154                .into_iter()
9155                .collect();
9156
9157            if let Some(pool) =
9158                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9159            {
9160                // Generate sweeps - build participant balances from last cash position per account
9161                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9162                for cp in &snapshot.cash_positions {
9163                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9164                }
9165
9166                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9167                    latest_balances
9168                        .into_iter()
9169                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9170                        .map(
9171                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9172                                account_id: id,
9173                                balance,
9174                            },
9175                        )
9176                        .collect();
9177
9178                let sweeps =
9179                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9180                snapshot.cash_pool_sweeps = sweeps;
9181                snapshot.cash_pools.push(pool);
9182            }
9183        }
9184
9185        // Generate bank guarantees
9186        if self.config.treasury.bank_guarantees.enabled {
9187            let vendor_names: Vec<String> = self
9188                .master_data
9189                .vendors
9190                .iter()
9191                .map(|v| v.name.clone())
9192                .collect();
9193            if !vendor_names.is_empty() {
9194                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9195                    self.config.treasury.bank_guarantees.clone(),
9196                    seed + 96,
9197                );
9198                snapshot.bank_guarantees =
9199                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9200            }
9201        }
9202
9203        // Generate netting runs from intercompany matched pairs
9204        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9205            let entity_ids: Vec<String> = self
9206                .config
9207                .companies
9208                .iter()
9209                .map(|c| c.code.clone())
9210                .collect();
9211            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9212                .matched_pairs
9213                .iter()
9214                .map(|mp| {
9215                    (
9216                        mp.seller_company.clone(),
9217                        mp.buyer_company.clone(),
9218                        mp.amount,
9219                    )
9220                })
9221                .collect();
9222            if entity_ids.len() >= 2 {
9223                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9224                    self.config.treasury.netting.clone(),
9225                    seed + 97,
9226                );
9227                snapshot.netting_runs = netting_gen.generate(
9228                    &entity_ids,
9229                    currency,
9230                    start_date,
9231                    self.config.global.period_months,
9232                    &ic_amounts,
9233                );
9234            }
9235        }
9236
9237        // Generate treasury journal entries from the instruments we just created.
9238        {
9239            use datasynth_generators::treasury::TreasuryAccounting;
9240
9241            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9242            let mut treasury_jes = Vec::new();
9243
9244            // Debt interest accrual JEs
9245            if !snapshot.debt_instruments.is_empty() {
9246                let debt_jes =
9247                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9248                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9249                treasury_jes.extend(debt_jes);
9250            }
9251
9252            // Hedge mark-to-market JEs
9253            if !snapshot.hedging_instruments.is_empty() {
9254                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9255                    &snapshot.hedging_instruments,
9256                    &snapshot.hedge_relationships,
9257                    end_date,
9258                    entity_id,
9259                );
9260                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9261                treasury_jes.extend(hedge_jes);
9262            }
9263
9264            // Cash pool sweep JEs
9265            if !snapshot.cash_pool_sweeps.is_empty() {
9266                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9267                    &snapshot.cash_pool_sweeps,
9268                    entity_id,
9269                );
9270                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9271                treasury_jes.extend(sweep_jes);
9272            }
9273
9274            if !treasury_jes.is_empty() {
9275                debug!("Total treasury journal entries: {}", treasury_jes.len());
9276            }
9277            snapshot.journal_entries = treasury_jes;
9278        }
9279
9280        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9281        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9282        stats.cash_position_count = snapshot.cash_positions.len();
9283        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9284        stats.cash_pool_count = snapshot.cash_pools.len();
9285
9286        info!(
9287            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9288            snapshot.debt_instruments.len(),
9289            snapshot.hedging_instruments.len(),
9290            snapshot.cash_positions.len(),
9291            snapshot.cash_forecasts.len(),
9292            snapshot.cash_pools.len(),
9293            snapshot.bank_guarantees.len(),
9294            snapshot.netting_runs.len(),
9295            snapshot.journal_entries.len(),
9296        );
9297        self.check_resources_with_log("post-treasury")?;
9298
9299        Ok(snapshot)
9300    }
9301
9302    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9303    fn phase_project_accounting(
9304        &mut self,
9305        document_flows: &DocumentFlowSnapshot,
9306        hr: &HrSnapshot,
9307        stats: &mut EnhancedGenerationStatistics,
9308    ) -> SynthResult<ProjectAccountingSnapshot> {
9309        if !self.phase_config.generate_project_accounting {
9310            debug!("Phase 23: Skipped (project accounting disabled)");
9311            return Ok(ProjectAccountingSnapshot::default());
9312        }
9313        let degradation = self.check_resources()?;
9314        if degradation >= DegradationLevel::Reduced {
9315            debug!(
9316                "Phase skipped due to resource pressure (degradation: {:?})",
9317                degradation
9318            );
9319            return Ok(ProjectAccountingSnapshot::default());
9320        }
9321        info!("Phase 23: Generating Project Accounting Data");
9322
9323        let seed = self.seed;
9324        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9325            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9326        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9327        let company_code = self
9328            .config
9329            .companies
9330            .first()
9331            .map(|c| c.code.as_str())
9332            .unwrap_or("1000");
9333
9334        let mut snapshot = ProjectAccountingSnapshot::default();
9335
9336        // Generate projects with WBS hierarchies
9337        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9338            self.config.project_accounting.clone(),
9339            seed + 95,
9340        );
9341        let pool = project_gen.generate(company_code, start_date, end_date);
9342        snapshot.projects = pool.projects.clone();
9343
9344        // Link source documents to projects for cost allocation
9345        {
9346            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9347                Vec::new();
9348
9349            // Time entries
9350            for te in &hr.time_entries {
9351                let total_hours = te.hours_regular + te.hours_overtime;
9352                if total_hours > 0.0 {
9353                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9354                        id: te.entry_id.clone(),
9355                        entity_id: company_code.to_string(),
9356                        date: te.date,
9357                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9358                            .unwrap_or(rust_decimal::Decimal::ZERO),
9359                        source_type: CostSourceType::TimeEntry,
9360                        hours: Some(
9361                            rust_decimal::Decimal::from_f64_retain(total_hours)
9362                                .unwrap_or(rust_decimal::Decimal::ZERO),
9363                        ),
9364                    });
9365                }
9366            }
9367
9368            // Expense reports
9369            for er in &hr.expense_reports {
9370                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9371                    id: er.report_id.clone(),
9372                    entity_id: company_code.to_string(),
9373                    date: er.submission_date,
9374                    amount: er.total_amount,
9375                    source_type: CostSourceType::ExpenseReport,
9376                    hours: None,
9377                });
9378            }
9379
9380            // Purchase orders
9381            for po in &document_flows.purchase_orders {
9382                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9383                    id: po.header.document_id.clone(),
9384                    entity_id: company_code.to_string(),
9385                    date: po.header.document_date,
9386                    amount: po.total_net_amount,
9387                    source_type: CostSourceType::PurchaseOrder,
9388                    hours: None,
9389                });
9390            }
9391
9392            // Vendor invoices
9393            for vi in &document_flows.vendor_invoices {
9394                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9395                    id: vi.header.document_id.clone(),
9396                    entity_id: company_code.to_string(),
9397                    date: vi.header.document_date,
9398                    amount: vi.payable_amount,
9399                    source_type: CostSourceType::VendorInvoice,
9400                    hours: None,
9401                });
9402            }
9403
9404            if !source_docs.is_empty() && !pool.projects.is_empty() {
9405                let mut cost_gen =
9406                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9407                        self.config.project_accounting.cost_allocation.clone(),
9408                        seed + 99,
9409                    );
9410                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9411            }
9412        }
9413
9414        // Generate change orders
9415        if self.config.project_accounting.change_orders.enabled {
9416            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9417                self.config.project_accounting.change_orders.clone(),
9418                seed + 96,
9419            );
9420            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9421        }
9422
9423        // Generate milestones
9424        if self.config.project_accounting.milestones.enabled {
9425            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9426                self.config.project_accounting.milestones.clone(),
9427                seed + 97,
9428            );
9429            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9430        }
9431
9432        // Generate earned value metrics (needs cost lines, so only if we have projects)
9433        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9434            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9435                self.config.project_accounting.earned_value.clone(),
9436                seed + 98,
9437            );
9438            snapshot.earned_value_metrics =
9439                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9440        }
9441
9442        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9443        if self.config.project_accounting.revenue_recognition.enabled
9444            && !snapshot.projects.is_empty()
9445            && !snapshot.cost_lines.is_empty()
9446        {
9447            use datasynth_generators::project_accounting::RevenueGenerator;
9448            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9449            let avg_contract_value =
9450                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9451                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9452
9453            // Build contract value tuples: only customer-type projects get revenue recognition.
9454            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9455            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9456                snapshot
9457                    .projects
9458                    .iter()
9459                    .filter(|p| {
9460                        matches!(
9461                            p.project_type,
9462                            datasynth_core::models::ProjectType::Customer
9463                        )
9464                    })
9465                    .map(|p| {
9466                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9467                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9468                        // budget × 1.25 → contract value
9469                        } else {
9470                            avg_contract_value
9471                        };
9472                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9473                        (p.project_id.clone(), cv, etc)
9474                    })
9475                    .collect();
9476
9477            if !contract_values.is_empty() {
9478                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9479                snapshot.revenue_records = rev_gen.generate(
9480                    &snapshot.projects,
9481                    &snapshot.cost_lines,
9482                    &contract_values,
9483                    start_date,
9484                    end_date,
9485                );
9486                debug!(
9487                    "Generated {} revenue recognition records for {} customer projects",
9488                    snapshot.revenue_records.len(),
9489                    contract_values.len()
9490                );
9491            }
9492        }
9493
9494        stats.project_count = snapshot.projects.len();
9495        stats.project_change_order_count = snapshot.change_orders.len();
9496        stats.project_cost_line_count = snapshot.cost_lines.len();
9497
9498        info!(
9499            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9500            snapshot.projects.len(),
9501            snapshot.change_orders.len(),
9502            snapshot.milestones.len(),
9503            snapshot.earned_value_metrics.len()
9504        );
9505        self.check_resources_with_log("post-project-accounting")?;
9506
9507        Ok(snapshot)
9508    }
9509
9510    /// Phase 24: Generate process evolution and organizational events.
9511    fn phase_evolution_events(
9512        &mut self,
9513        stats: &mut EnhancedGenerationStatistics,
9514    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9515        if !self.phase_config.generate_evolution_events {
9516            debug!("Phase 24: Skipped (evolution events disabled)");
9517            return Ok((Vec::new(), Vec::new()));
9518        }
9519        info!("Phase 24: Generating Process Evolution + Organizational Events");
9520
9521        let seed = self.seed;
9522        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9523            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9524        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9525
9526        // Process evolution events
9527        let mut proc_gen =
9528            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
9529                seed + 100,
9530            );
9531        let process_events = proc_gen.generate_events(start_date, end_date);
9532
9533        // Organizational events
9534        let company_codes: Vec<String> = self
9535            .config
9536            .companies
9537            .iter()
9538            .map(|c| c.code.clone())
9539            .collect();
9540        let mut org_gen =
9541            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
9542                seed + 101,
9543            );
9544        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
9545
9546        stats.process_evolution_event_count = process_events.len();
9547        stats.organizational_event_count = org_events.len();
9548
9549        info!(
9550            "Evolution events generated: {} process evolution, {} organizational",
9551            process_events.len(),
9552            org_events.len()
9553        );
9554        self.check_resources_with_log("post-evolution-events")?;
9555
9556        Ok((process_events, org_events))
9557    }
9558
9559    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
9560    /// data recovery, and regulatory changes).
9561    fn phase_disruption_events(
9562        &self,
9563        stats: &mut EnhancedGenerationStatistics,
9564    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
9565        if !self.config.organizational_events.enabled {
9566            debug!("Phase 24b: Skipped (organizational events disabled)");
9567            return Ok(Vec::new());
9568        }
9569        info!("Phase 24b: Generating Disruption Events");
9570
9571        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9572            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9573        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9574
9575        let company_codes: Vec<String> = self
9576            .config
9577            .companies
9578            .iter()
9579            .map(|c| c.code.clone())
9580            .collect();
9581
9582        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
9583        let events = gen.generate(start_date, end_date, &company_codes);
9584
9585        stats.disruption_event_count = events.len();
9586        info!("Disruption events generated: {} events", events.len());
9587        self.check_resources_with_log("post-disruption-events")?;
9588
9589        Ok(events)
9590    }
9591
9592    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
9593    ///
9594    /// Produces paired examples where each pair contains the original clean JE
9595    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
9596    /// split transaction). Useful for training anomaly detection models with
9597    /// known ground truth.
9598    fn phase_counterfactuals(
9599        &self,
9600        journal_entries: &[JournalEntry],
9601        stats: &mut EnhancedGenerationStatistics,
9602    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
9603        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
9604            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
9605            return Ok(Vec::new());
9606        }
9607        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
9608
9609        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
9610
9611        let mut gen = CounterfactualGenerator::new(self.seed + 110);
9612
9613        // Rotating set of specs to produce diverse mutation types
9614        let specs = [
9615            CounterfactualSpec::ScaleAmount { factor: 2.5 },
9616            CounterfactualSpec::ShiftDate { days: -14 },
9617            CounterfactualSpec::SelfApprove,
9618            CounterfactualSpec::SplitTransaction { split_count: 3 },
9619        ];
9620
9621        let pairs: Vec<_> = journal_entries
9622            .iter()
9623            .enumerate()
9624            .map(|(i, je)| {
9625                let spec = &specs[i % specs.len()];
9626                gen.generate(je, spec)
9627            })
9628            .collect();
9629
9630        stats.counterfactual_pair_count = pairs.len();
9631        info!(
9632            "Counterfactual pairs generated: {} pairs from {} journal entries",
9633            pairs.len(),
9634            journal_entries.len()
9635        );
9636        self.check_resources_with_log("post-counterfactuals")?;
9637
9638        Ok(pairs)
9639    }
9640
9641    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
9642    ///
9643    /// Uses the anomaly labels (from Phase 8) to determine which documents are
9644    /// fraudulent, then generates probabilistic red flags on all chain documents.
9645    /// Non-fraud documents also receive red flags at a lower rate (false positives)
9646    /// to produce realistic ML training data.
9647    fn phase_red_flags(
9648        &self,
9649        anomaly_labels: &AnomalyLabels,
9650        document_flows: &DocumentFlowSnapshot,
9651        stats: &mut EnhancedGenerationStatistics,
9652    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
9653        if !self.config.fraud.enabled {
9654            debug!("Phase 26: Skipped (fraud generation disabled)");
9655            return Ok(Vec::new());
9656        }
9657        info!("Phase 26: Generating Fraud Red-Flag Indicators");
9658
9659        use datasynth_generators::fraud::RedFlagGenerator;
9660
9661        let generator = RedFlagGenerator::new();
9662        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
9663
9664        // Build a set of document IDs that are known-fraudulent from anomaly labels.
9665        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
9666            .labels
9667            .iter()
9668            .filter(|label| label.anomaly_type.is_intentional())
9669            .map(|label| label.document_id.as_str())
9670            .collect();
9671
9672        let mut flags = Vec::new();
9673
9674        // Iterate P2P chains: use the purchase order document ID as the chain key.
9675        for chain in &document_flows.p2p_chains {
9676            let doc_id = &chain.purchase_order.header.document_id;
9677            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9678            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9679        }
9680
9681        // Iterate O2C chains: use the sales order document ID as the chain key.
9682        for chain in &document_flows.o2c_chains {
9683            let doc_id = &chain.sales_order.header.document_id;
9684            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
9685            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
9686        }
9687
9688        stats.red_flag_count = flags.len();
9689        info!(
9690            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
9691            flags.len(),
9692            document_flows.p2p_chains.len(),
9693            document_flows.o2c_chains.len(),
9694            fraud_doc_ids.len()
9695        );
9696        self.check_resources_with_log("post-red-flags")?;
9697
9698        Ok(flags)
9699    }
9700
9701    /// Phase 26b: Generate collusion rings from employee/vendor pools.
9702    ///
9703    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
9704    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
9705    /// advance them over the simulation period.
9706    fn phase_collusion_rings(
9707        &mut self,
9708        stats: &mut EnhancedGenerationStatistics,
9709    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
9710        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
9711            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
9712            return Ok(Vec::new());
9713        }
9714        info!("Phase 26b: Generating Collusion Rings");
9715
9716        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9717            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9718        let months = self.config.global.period_months;
9719
9720        let employee_ids: Vec<String> = self
9721            .master_data
9722            .employees
9723            .iter()
9724            .map(|e| e.employee_id.clone())
9725            .collect();
9726        let vendor_ids: Vec<String> = self
9727            .master_data
9728            .vendors
9729            .iter()
9730            .map(|v| v.vendor_id.clone())
9731            .collect();
9732
9733        let mut generator =
9734            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
9735        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
9736
9737        stats.collusion_ring_count = rings.len();
9738        info!(
9739            "Collusion rings generated: {} rings, total members: {}",
9740            rings.len(),
9741            rings
9742                .iter()
9743                .map(datasynth_generators::fraud::CollusionRing::size)
9744                .sum::<usize>()
9745        );
9746        self.check_resources_with_log("post-collusion-rings")?;
9747
9748        Ok(rings)
9749    }
9750
9751    /// Phase 27: Generate bi-temporal version chains for vendor entities.
9752    ///
9753    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
9754    /// master data changes over time, supporting bi-temporal audit queries.
9755    fn phase_temporal_attributes(
9756        &mut self,
9757        stats: &mut EnhancedGenerationStatistics,
9758    ) -> SynthResult<
9759        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
9760    > {
9761        if !self.config.temporal_attributes.enabled {
9762            debug!("Phase 27: Skipped (temporal attributes disabled)");
9763            return Ok(Vec::new());
9764        }
9765        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
9766
9767        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9768            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9769
9770        // Build a TemporalAttributeConfig from the user's config.
9771        // Since Phase 27 is already gated on temporal_attributes.enabled,
9772        // default to enabling version chains so users get actual mutations.
9773        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
9774            || self.config.temporal_attributes.enabled;
9775        let temporal_config = {
9776            let ta = &self.config.temporal_attributes;
9777            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
9778                .enabled(ta.enabled)
9779                .closed_probability(ta.valid_time.closed_probability)
9780                .avg_validity_days(ta.valid_time.avg_validity_days)
9781                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
9782                .with_version_chains(if generate_version_chains {
9783                    ta.avg_versions_per_entity
9784                } else {
9785                    1.0
9786                })
9787                .build()
9788        };
9789        // Apply backdating settings if configured
9790        let temporal_config = if self
9791            .config
9792            .temporal_attributes
9793            .transaction_time
9794            .allow_backdating
9795        {
9796            let mut c = temporal_config;
9797            c.transaction_time.allow_backdating = true;
9798            c.transaction_time.backdating_probability = self
9799                .config
9800                .temporal_attributes
9801                .transaction_time
9802                .backdating_probability;
9803            c.transaction_time.max_backdate_days = self
9804                .config
9805                .temporal_attributes
9806                .transaction_time
9807                .max_backdate_days;
9808            c
9809        } else {
9810            temporal_config
9811        };
9812        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
9813            temporal_config,
9814            self.seed + 130,
9815            start_date,
9816        );
9817
9818        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
9819            self.seed + 130,
9820            datasynth_core::GeneratorType::Vendor,
9821        );
9822
9823        let chains: Vec<_> = self
9824            .master_data
9825            .vendors
9826            .iter()
9827            .map(|vendor| {
9828                let id = uuid_factory.next();
9829                gen.generate_version_chain(vendor.clone(), id)
9830            })
9831            .collect();
9832
9833        stats.temporal_version_chain_count = chains.len();
9834        info!("Temporal version chains generated: {} chains", chains.len());
9835        self.check_resources_with_log("post-temporal-attributes")?;
9836
9837        Ok(chains)
9838    }
9839
9840    /// Phase 28: Build entity relationship graph and cross-process links.
9841    ///
9842    /// Part 1 (gated on `relationship_strength.enabled`): builds an
9843    /// `EntityGraph` from master-data vendor/customer entities and
9844    /// journal-entry-derived transaction summaries.
9845    ///
9846    /// Part 2 (gated on `cross_process_links.enabled`): extracts
9847    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
9848    /// generates inventory-movement cross-process links.
9849    fn phase_entity_relationships(
9850        &self,
9851        journal_entries: &[JournalEntry],
9852        document_flows: &DocumentFlowSnapshot,
9853        stats: &mut EnhancedGenerationStatistics,
9854    ) -> SynthResult<(
9855        Option<datasynth_core::models::EntityGraph>,
9856        Vec<datasynth_core::models::CrossProcessLink>,
9857    )> {
9858        use datasynth_generators::relationships::{
9859            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
9860            TransactionSummary,
9861        };
9862
9863        let rs_enabled = self.config.relationship_strength.enabled;
9864        let cpl_enabled = self.config.cross_process_links.enabled
9865            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
9866
9867        if !rs_enabled && !cpl_enabled {
9868            debug!(
9869                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
9870            );
9871            return Ok((None, Vec::new()));
9872        }
9873
9874        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
9875
9876        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9877            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9878
9879        let company_code = self
9880            .config
9881            .companies
9882            .first()
9883            .map(|c| c.code.as_str())
9884            .unwrap_or("1000");
9885
9886        // Build the generator with matching config flags
9887        let gen_config = EntityGraphConfig {
9888            enabled: rs_enabled,
9889            cross_process: datasynth_generators::relationships::CrossProcessConfig {
9890                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
9891                enable_return_flows: false,
9892                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
9893                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
9894                // Use higher link rate for small datasets to avoid probabilistic empty results
9895                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
9896                    1.0
9897                } else {
9898                    0.30
9899                },
9900                ..Default::default()
9901            },
9902            strength_config: datasynth_generators::relationships::StrengthConfig {
9903                transaction_volume_weight: self
9904                    .config
9905                    .relationship_strength
9906                    .calculation
9907                    .transaction_volume_weight,
9908                transaction_count_weight: self
9909                    .config
9910                    .relationship_strength
9911                    .calculation
9912                    .transaction_count_weight,
9913                duration_weight: self
9914                    .config
9915                    .relationship_strength
9916                    .calculation
9917                    .relationship_duration_weight,
9918                recency_weight: self.config.relationship_strength.calculation.recency_weight,
9919                mutual_connections_weight: self
9920                    .config
9921                    .relationship_strength
9922                    .calculation
9923                    .mutual_connections_weight,
9924                recency_half_life_days: self
9925                    .config
9926                    .relationship_strength
9927                    .calculation
9928                    .recency_half_life_days,
9929            },
9930            ..Default::default()
9931        };
9932
9933        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
9934
9935        // --- Part 1: Entity Relationship Graph ---
9936        let entity_graph = if rs_enabled {
9937            // Build EntitySummary lists from master data
9938            let vendor_summaries: Vec<EntitySummary> = self
9939                .master_data
9940                .vendors
9941                .iter()
9942                .map(|v| {
9943                    EntitySummary::new(
9944                        &v.vendor_id,
9945                        &v.name,
9946                        datasynth_core::models::GraphEntityType::Vendor,
9947                        start_date,
9948                    )
9949                })
9950                .collect();
9951
9952            let customer_summaries: Vec<EntitySummary> = self
9953                .master_data
9954                .customers
9955                .iter()
9956                .map(|c| {
9957                    EntitySummary::new(
9958                        &c.customer_id,
9959                        &c.name,
9960                        datasynth_core::models::GraphEntityType::Customer,
9961                        start_date,
9962                    )
9963                })
9964                .collect();
9965
9966            // Build transaction summaries from journal entries.
9967            // Key = (company_code, trading_partner) for entries that have a
9968            // trading partner.  This captures intercompany flows and any JE
9969            // whose line items carry a trading_partner reference.
9970            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
9971                std::collections::HashMap::new();
9972
9973            for je in journal_entries {
9974                let cc = je.header.company_code.clone();
9975                let posting_date = je.header.posting_date;
9976                for line in &je.lines {
9977                    if let Some(ref tp) = line.trading_partner {
9978                        let amount = if line.debit_amount > line.credit_amount {
9979                            line.debit_amount
9980                        } else {
9981                            line.credit_amount
9982                        };
9983                        let entry = txn_summaries
9984                            .entry((cc.clone(), tp.clone()))
9985                            .or_insert_with(|| TransactionSummary {
9986                                total_volume: rust_decimal::Decimal::ZERO,
9987                                transaction_count: 0,
9988                                first_transaction_date: posting_date,
9989                                last_transaction_date: posting_date,
9990                                related_entities: std::collections::HashSet::new(),
9991                            });
9992                        entry.total_volume += amount;
9993                        entry.transaction_count += 1;
9994                        if posting_date < entry.first_transaction_date {
9995                            entry.first_transaction_date = posting_date;
9996                        }
9997                        if posting_date > entry.last_transaction_date {
9998                            entry.last_transaction_date = posting_date;
9999                        }
10000                        entry.related_entities.insert(cc.clone());
10001                    }
10002                }
10003            }
10004
10005            // Also extract transaction relationships from document flow chains.
10006            // P2P chains: Company → Vendor relationships
10007            for chain in &document_flows.p2p_chains {
10008                let cc = chain.purchase_order.header.company_code.clone();
10009                let vendor_id = chain.purchase_order.vendor_id.clone();
10010                let po_date = chain.purchase_order.header.document_date;
10011                let amount = chain.purchase_order.total_net_amount;
10012
10013                let entry = txn_summaries
10014                    .entry((cc.clone(), vendor_id))
10015                    .or_insert_with(|| TransactionSummary {
10016                        total_volume: rust_decimal::Decimal::ZERO,
10017                        transaction_count: 0,
10018                        first_transaction_date: po_date,
10019                        last_transaction_date: po_date,
10020                        related_entities: std::collections::HashSet::new(),
10021                    });
10022                entry.total_volume += amount;
10023                entry.transaction_count += 1;
10024                if po_date < entry.first_transaction_date {
10025                    entry.first_transaction_date = po_date;
10026                }
10027                if po_date > entry.last_transaction_date {
10028                    entry.last_transaction_date = po_date;
10029                }
10030                entry.related_entities.insert(cc);
10031            }
10032
10033            // O2C chains: Company → Customer relationships
10034            for chain in &document_flows.o2c_chains {
10035                let cc = chain.sales_order.header.company_code.clone();
10036                let customer_id = chain.sales_order.customer_id.clone();
10037                let so_date = chain.sales_order.header.document_date;
10038                let amount = chain.sales_order.total_net_amount;
10039
10040                let entry = txn_summaries
10041                    .entry((cc.clone(), customer_id))
10042                    .or_insert_with(|| TransactionSummary {
10043                        total_volume: rust_decimal::Decimal::ZERO,
10044                        transaction_count: 0,
10045                        first_transaction_date: so_date,
10046                        last_transaction_date: so_date,
10047                        related_entities: std::collections::HashSet::new(),
10048                    });
10049                entry.total_volume += amount;
10050                entry.transaction_count += 1;
10051                if so_date < entry.first_transaction_date {
10052                    entry.first_transaction_date = so_date;
10053                }
10054                if so_date > entry.last_transaction_date {
10055                    entry.last_transaction_date = so_date;
10056                }
10057                entry.related_entities.insert(cc);
10058            }
10059
10060            let as_of_date = journal_entries
10061                .last()
10062                .map(|je| je.header.posting_date)
10063                .unwrap_or(start_date);
10064
10065            let graph = gen.generate_entity_graph(
10066                company_code,
10067                as_of_date,
10068                &vendor_summaries,
10069                &customer_summaries,
10070                &txn_summaries,
10071            );
10072
10073            info!(
10074                "Entity relationship graph: {} nodes, {} edges",
10075                graph.nodes.len(),
10076                graph.edges.len()
10077            );
10078            stats.entity_relationship_node_count = graph.nodes.len();
10079            stats.entity_relationship_edge_count = graph.edges.len();
10080            Some(graph)
10081        } else {
10082            None
10083        };
10084
10085        // --- Part 2: Cross-Process Links ---
10086        let cross_process_links = if cpl_enabled {
10087            // Build GoodsReceiptRef from P2P chains
10088            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10089                .p2p_chains
10090                .iter()
10091                .flat_map(|chain| {
10092                    let vendor_id = chain.purchase_order.vendor_id.clone();
10093                    let cc = chain.purchase_order.header.company_code.clone();
10094                    chain.goods_receipts.iter().flat_map(move |gr| {
10095                        gr.items.iter().filter_map({
10096                            let doc_id = gr.header.document_id.clone();
10097                            let v_id = vendor_id.clone();
10098                            let company = cc.clone();
10099                            let receipt_date = gr.header.document_date;
10100                            move |item| {
10101                                item.base
10102                                    .material_id
10103                                    .as_ref()
10104                                    .map(|mat_id| GoodsReceiptRef {
10105                                        document_id: doc_id.clone(),
10106                                        material_id: mat_id.clone(),
10107                                        quantity: item.base.quantity,
10108                                        receipt_date,
10109                                        vendor_id: v_id.clone(),
10110                                        company_code: company.clone(),
10111                                    })
10112                            }
10113                        })
10114                    })
10115                })
10116                .collect();
10117
10118            // Build DeliveryRef from O2C chains
10119            let del_refs: Vec<DeliveryRef> = document_flows
10120                .o2c_chains
10121                .iter()
10122                .flat_map(|chain| {
10123                    let customer_id = chain.sales_order.customer_id.clone();
10124                    let cc = chain.sales_order.header.company_code.clone();
10125                    chain.deliveries.iter().flat_map(move |del| {
10126                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10127                        del.items.iter().filter_map({
10128                            let doc_id = del.header.document_id.clone();
10129                            let c_id = customer_id.clone();
10130                            let company = cc.clone();
10131                            move |item| {
10132                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10133                                    document_id: doc_id.clone(),
10134                                    material_id: mat_id.clone(),
10135                                    quantity: item.base.quantity,
10136                                    delivery_date,
10137                                    customer_id: c_id.clone(),
10138                                    company_code: company.clone(),
10139                                })
10140                            }
10141                        })
10142                    })
10143                })
10144                .collect();
10145
10146            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10147            info!("Cross-process links generated: {} links", links.len());
10148            stats.cross_process_link_count = links.len();
10149            links
10150        } else {
10151            Vec::new()
10152        };
10153
10154        self.check_resources_with_log("post-entity-relationships")?;
10155        Ok((entity_graph, cross_process_links))
10156    }
10157
10158    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10159    fn phase_industry_data(
10160        &self,
10161        stats: &mut EnhancedGenerationStatistics,
10162    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10163        if !self.config.industry_specific.enabled {
10164            return None;
10165        }
10166        info!("Phase 29: Generating industry-specific data");
10167        let output = datasynth_generators::industry::factory::generate_industry_output(
10168            self.config.global.industry,
10169        );
10170        stats.industry_gl_account_count = output.gl_accounts.len();
10171        info!(
10172            "Industry data generated: {} GL accounts for {:?}",
10173            output.gl_accounts.len(),
10174            self.config.global.industry
10175        );
10176        Some(output)
10177    }
10178
10179    /// Phase 3b: Generate opening balances for each company.
10180    fn phase_opening_balances(
10181        &mut self,
10182        coa: &Arc<ChartOfAccounts>,
10183        stats: &mut EnhancedGenerationStatistics,
10184    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10185        if !self.config.balance.generate_opening_balances {
10186            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10187            return Ok(Vec::new());
10188        }
10189        info!("Phase 3b: Generating Opening Balances");
10190
10191        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10192            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10193        let fiscal_year = start_date.year();
10194
10195        // **v5.3** — When the shard context supplies prior-period
10196        // opening-balance carryovers, use them directly instead of
10197        // calling `OpeningBalanceGenerator`.  This implements multi-
10198        // period continuity: period N+1 opens with period N's closing
10199        // BS positions exactly, rather than re-rolling the industry-
10200        // mix generator and losing the audit trail.
10201        //
10202        // Empty `opening_balances` (the v5.0–v5.2 default) falls
10203        // through to the generator path — byte-identical behaviour
10204        // for single-period engagements.
10205        if let Some(ctx) = &self.shard_context {
10206            if !ctx.opening_balances.is_empty() {
10207                debug!(
10208                    "Phase 3b: using v5.3 opening-balance carryover ({} accounts)",
10209                    ctx.opening_balances.len()
10210                );
10211                let mut results = Vec::new();
10212                for company in &self.config.companies {
10213                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10214                        .opening_balances
10215                        .iter()
10216                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10217                        .collect();
10218                    let total_assets = ctx
10219                        .opening_balances
10220                        .iter()
10221                        .filter(|ob| {
10222                            matches!(
10223                                ob.account_type,
10224                                AccountType::Asset | AccountType::ContraAsset
10225                            )
10226                        })
10227                        .map(|ob| ob.net_balance())
10228                        .sum::<rust_decimal::Decimal>();
10229                    let total_liabilities = ctx
10230                        .opening_balances
10231                        .iter()
10232                        .filter(|ob| {
10233                            matches!(
10234                                ob.account_type,
10235                                AccountType::Liability | AccountType::ContraLiability
10236                            )
10237                        })
10238                        .map(|ob| ob.net_balance())
10239                        .sum::<rust_decimal::Decimal>();
10240                    let total_equity = ctx
10241                        .opening_balances
10242                        .iter()
10243                        .filter(|ob| {
10244                            matches!(
10245                                ob.account_type,
10246                                AccountType::Equity | AccountType::ContraEquity
10247                            )
10248                        })
10249                        .map(|ob| ob.net_balance())
10250                        .sum::<rust_decimal::Decimal>();
10251                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10252                        < rust_decimal::Decimal::ONE;
10253                    results.push(GeneratedOpeningBalance {
10254                        company_code: company.code.clone(),
10255                        as_of_date: start_date,
10256                        balances,
10257                        total_assets,
10258                        total_liabilities,
10259                        total_equity,
10260                        is_balanced,
10261                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10262                            current_ratio: None,
10263                            quick_ratio: None,
10264                            debt_to_equity: None,
10265                            working_capital: rust_decimal::Decimal::ZERO,
10266                        },
10267                    });
10268                }
10269                stats.opening_balance_count = results.len();
10270                info!(
10271                    "Phase 3b: opening-balance carryover applied ({} companies)",
10272                    results.len()
10273                );
10274                self.check_resources_with_log("post-opening-balances")?;
10275                return Ok(results);
10276            }
10277        }
10278
10279        let industry = match self.config.global.industry {
10280            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10281            IndustrySector::Retail => IndustryType::Retail,
10282            IndustrySector::FinancialServices => IndustryType::Financial,
10283            IndustrySector::Healthcare => IndustryType::Healthcare,
10284            IndustrySector::Technology => IndustryType::Technology,
10285            _ => IndustryType::Manufacturing,
10286        };
10287
10288        let config = datasynth_generators::OpeningBalanceConfig {
10289            industry,
10290            ..Default::default()
10291        };
10292        let mut gen =
10293            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10294
10295        let mut results = Vec::new();
10296        for company in &self.config.companies {
10297            let spec = OpeningBalanceSpec::new(
10298                company.code.clone(),
10299                start_date,
10300                fiscal_year,
10301                company.currency.clone(),
10302                rust_decimal::Decimal::new(10_000_000, 0),
10303                industry,
10304            );
10305            let ob = gen.generate(&spec, coa, start_date, &company.code);
10306            results.push(ob);
10307        }
10308
10309        stats.opening_balance_count = results.len();
10310        info!("Opening balances generated: {} companies", results.len());
10311        self.check_resources_with_log("post-opening-balances")?;
10312
10313        Ok(results)
10314    }
10315
10316    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10317    fn phase_subledger_reconciliation(
10318        &mut self,
10319        subledger: &SubledgerSnapshot,
10320        entries: &[JournalEntry],
10321        stats: &mut EnhancedGenerationStatistics,
10322    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10323        if !self.config.balance.reconcile_subledgers {
10324            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10325            return Ok(Vec::new());
10326        }
10327        info!("Phase 9b: Reconciling GL to subledger balances");
10328
10329        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10330            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10331            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10332
10333        // Build GL balance map from journal entries using a balance tracker
10334        let tracker_config = BalanceTrackerConfig {
10335            validate_on_each_entry: false,
10336            track_history: false,
10337            fail_on_validation_error: false,
10338            ..Default::default()
10339        };
10340        let recon_currency = self
10341            .config
10342            .companies
10343            .first()
10344            .map(|c| c.currency.clone())
10345            .unwrap_or_else(|| "USD".to_string());
10346        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10347        let validation_errors = tracker.apply_entries(entries);
10348        if !validation_errors.is_empty() {
10349            warn!(
10350                error_count = validation_errors.len(),
10351                "Balance tracker encountered validation errors during subledger reconciliation"
10352            );
10353            for err in &validation_errors {
10354                debug!("Balance validation error: {:?}", err);
10355            }
10356        }
10357
10358        let mut engine = datasynth_generators::ReconciliationEngine::new(
10359            datasynth_generators::ReconciliationConfig::default(),
10360        );
10361
10362        let mut results = Vec::new();
10363        let company_code = self
10364            .config
10365            .companies
10366            .first()
10367            .map(|c| c.code.as_str())
10368            .unwrap_or("1000");
10369
10370        // Reconcile AR
10371        if !subledger.ar_invoices.is_empty() {
10372            let gl_balance = tracker
10373                .get_account_balance(
10374                    company_code,
10375                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10376                )
10377                .map(|b| b.closing_balance)
10378                .unwrap_or_default();
10379            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10380            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10381        }
10382
10383        // Reconcile AP
10384        if !subledger.ap_invoices.is_empty() {
10385            let gl_balance = tracker
10386                .get_account_balance(
10387                    company_code,
10388                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10389                )
10390                .map(|b| b.closing_balance)
10391                .unwrap_or_default();
10392            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10393            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10394        }
10395
10396        // Reconcile FA
10397        if !subledger.fa_records.is_empty() {
10398            let gl_asset_balance = tracker
10399                .get_account_balance(
10400                    company_code,
10401                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10402                )
10403                .map(|b| b.closing_balance)
10404                .unwrap_or_default();
10405            let gl_accum_depr_balance = tracker
10406                .get_account_balance(
10407                    company_code,
10408                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10409                )
10410                .map(|b| b.closing_balance)
10411                .unwrap_or_default();
10412            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10413                subledger.fa_records.iter().collect();
10414            let (asset_recon, depr_recon) = engine.reconcile_fa(
10415                company_code,
10416                end_date,
10417                gl_asset_balance,
10418                gl_accum_depr_balance,
10419                &fa_refs,
10420            );
10421            results.push(asset_recon);
10422            results.push(depr_recon);
10423        }
10424
10425        // Reconcile Inventory
10426        if !subledger.inventory_positions.is_empty() {
10427            let gl_balance = tracker
10428                .get_account_balance(
10429                    company_code,
10430                    datasynth_core::accounts::control_accounts::INVENTORY,
10431                )
10432                .map(|b| b.closing_balance)
10433                .unwrap_or_default();
10434            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10435                subledger.inventory_positions.iter().collect();
10436            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10437        }
10438
10439        stats.subledger_reconciliation_count = results.len();
10440        let passed = results.iter().filter(|r| r.is_balanced()).count();
10441        let failed = results.len() - passed;
10442        info!(
10443            "Subledger reconciliation: {} checks, {} passed, {} failed",
10444            results.len(),
10445            passed,
10446            failed
10447        );
10448        self.check_resources_with_log("post-subledger-reconciliation")?;
10449
10450        Ok(results)
10451    }
10452
10453    /// Generate the chart of accounts.
10454    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10455        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10456
10457        let coa_framework = self.resolve_coa_framework();
10458
10459        let mut gen = ChartOfAccountsGenerator::new(
10460            self.config.chart_of_accounts.complexity,
10461            self.config.global.industry,
10462            self.seed,
10463        )
10464        .with_coa_framework(coa_framework);
10465
10466        let mut built = gen.generate();
10467        // v4.4.1: propagate the accounting framework label from config
10468        // onto the CoA struct so SDK consumers can read it without
10469        // cross-referencing the config (they previously saw null).
10470        if self.config.accounting_standards.enabled {
10471            use datasynth_config::schema::AccountingFrameworkConfig;
10472            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10473                match f {
10474                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10475                    AccountingFrameworkConfig::Ifrs => "ifrs",
10476                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10477                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10478                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10479                }
10480                .to_string()
10481            });
10482        }
10483        let coa = Arc::new(built);
10484        self.coa = Some(Arc::clone(&coa));
10485
10486        if let Some(pb) = pb {
10487            pb.finish_with_message("Chart of Accounts complete");
10488        }
10489
10490        Ok(coa)
10491    }
10492
10493    /// Generate master data entities.
10494    fn generate_master_data(&mut self) -> SynthResult<()> {
10495        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10496            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10497        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10498
10499        let total = self.config.companies.len() as u64 * 5; // 5 entity types
10500        let pb = self.create_progress_bar(total, "Generating Master Data");
10501
10502        // Resolve country pack once for all companies (uses primary company's country)
10503        let pack = self.primary_pack().clone();
10504
10505        // Capture config values needed inside the parallel closure
10506        let vendors_per_company = self.phase_config.vendors_per_company;
10507        let customers_per_company = self.phase_config.customers_per_company;
10508        let materials_per_company = self.phase_config.materials_per_company;
10509        let assets_per_company = self.phase_config.assets_per_company;
10510        let coa_framework = self.resolve_coa_framework();
10511
10512        // Generate all master data in parallel across companies.
10513        // Each company's data is independent, making this embarrassingly parallel.
10514        let per_company_results: Vec<_> = self
10515            .config
10516            .companies
10517            .par_iter()
10518            .enumerate()
10519            .map(|(i, company)| {
10520                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
10521                let pack = pack.clone();
10522
10523                // Generate vendors (offset counter so IDs are globally unique across companies)
10524                let mut vendor_gen = VendorGenerator::new(company_seed);
10525                vendor_gen.set_country_pack(pack.clone());
10526                vendor_gen.set_coa_framework(coa_framework);
10527                vendor_gen.set_counter_offset(i * vendors_per_company);
10528                // v3.2.0+: user-supplied bank names (and future template
10529                // strings) flow through the shared provider.
10530                vendor_gen.set_template_provider(self.template_provider.clone());
10531                // Wire vendor network config when enabled
10532                if self.config.vendor_network.enabled {
10533                    let vn = &self.config.vendor_network;
10534                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
10535                        enabled: true,
10536                        depth: vn.depth,
10537                        tier1_count: datasynth_generators::TierCountConfig::new(
10538                            vn.tier1.min,
10539                            vn.tier1.max,
10540                        ),
10541                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
10542                            vn.tier2_per_parent.min,
10543                            vn.tier2_per_parent.max,
10544                        ),
10545                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
10546                            vn.tier3_per_parent.min,
10547                            vn.tier3_per_parent.max,
10548                        ),
10549                        cluster_distribution: datasynth_generators::ClusterDistribution {
10550                            reliable_strategic: vn.clusters.reliable_strategic,
10551                            standard_operational: vn.clusters.standard_operational,
10552                            transactional: vn.clusters.transactional,
10553                            problematic: vn.clusters.problematic,
10554                        },
10555                        concentration_limits: datasynth_generators::ConcentrationLimits {
10556                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
10557                            max_top5: vn.dependencies.top_5_concentration,
10558                        },
10559                        ..datasynth_generators::VendorNetworkConfig::default()
10560                    });
10561                }
10562                let vendor_pool =
10563                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
10564
10565                // Generate customers (offset counter so IDs are globally unique across companies)
10566                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
10567                customer_gen.set_country_pack(pack.clone());
10568                customer_gen.set_coa_framework(coa_framework);
10569                customer_gen.set_counter_offset(i * customers_per_company);
10570                // v3.2.0+: user-supplied customer names flow through the shared provider.
10571                customer_gen.set_template_provider(self.template_provider.clone());
10572                // Wire customer segmentation config when enabled
10573                if self.config.customer_segmentation.enabled {
10574                    let cs = &self.config.customer_segmentation;
10575                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
10576                        enabled: true,
10577                        segment_distribution: datasynth_generators::SegmentDistribution {
10578                            enterprise: cs.value_segments.enterprise.customer_share,
10579                            mid_market: cs.value_segments.mid_market.customer_share,
10580                            smb: cs.value_segments.smb.customer_share,
10581                            consumer: cs.value_segments.consumer.customer_share,
10582                        },
10583                        referral_config: datasynth_generators::ReferralConfig {
10584                            enabled: cs.networks.referrals.enabled,
10585                            referral_rate: cs.networks.referrals.referral_rate,
10586                            ..Default::default()
10587                        },
10588                        hierarchy_config: datasynth_generators::HierarchyConfig {
10589                            enabled: cs.networks.corporate_hierarchies.enabled,
10590                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
10591                            ..Default::default()
10592                        },
10593                        ..Default::default()
10594                    };
10595                    customer_gen.set_segmentation_config(seg_cfg);
10596                }
10597                let customer_pool = customer_gen.generate_customer_pool(
10598                    customers_per_company,
10599                    &company.code,
10600                    start_date,
10601                );
10602
10603                // Generate materials (offset counter so IDs are globally unique across companies)
10604                let mut material_gen = MaterialGenerator::new(company_seed + 200);
10605                material_gen.set_country_pack(pack.clone());
10606                material_gen.set_counter_offset(i * materials_per_company);
10607                // v3.2.1+: user-supplied material descriptions flow through shared provider
10608                material_gen.set_template_provider(self.template_provider.clone());
10609                let material_pool = material_gen.generate_material_pool(
10610                    materials_per_company,
10611                    &company.code,
10612                    start_date,
10613                );
10614
10615                // Generate fixed assets
10616                let mut asset_gen = AssetGenerator::new(company_seed + 300);
10617                // v3.2.1+: user-supplied asset descriptions flow through shared provider
10618                asset_gen.set_template_provider(self.template_provider.clone());
10619                let asset_pool = asset_gen.generate_asset_pool(
10620                    assets_per_company,
10621                    &company.code,
10622                    (start_date, end_date),
10623                );
10624
10625                // Generate employees
10626                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
10627                employee_gen.set_country_pack(pack);
10628                // v3.2.1+: user-supplied department names flow through shared provider
10629                employee_gen.set_template_provider(self.template_provider.clone());
10630                let employee_pool =
10631                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
10632
10633                // Generate employee change history (2-5 events per employee)
10634                let employee_change_history =
10635                    employee_gen.generate_all_change_history(&employee_pool, end_date);
10636
10637                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
10638                let employee_ids: Vec<String> = employee_pool
10639                    .employees
10640                    .iter()
10641                    .map(|e| e.employee_id.clone())
10642                    .collect();
10643                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
10644                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
10645
10646                // v5.1: profit centre hierarchy (two-level: top-level
10647                // segment / region / product-group nodes + sub-units).
10648                let mut pc_gen =
10649                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
10650                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
10651
10652                (
10653                    vendor_pool.vendors,
10654                    customer_pool.customers,
10655                    material_pool.materials,
10656                    asset_pool.assets,
10657                    employee_pool.employees,
10658                    employee_change_history,
10659                    cost_centers,
10660                    profit_centers,
10661                )
10662            })
10663            .collect();
10664
10665        // Aggregate results from all companies
10666        for (
10667            vendors,
10668            customers,
10669            materials,
10670            assets,
10671            employees,
10672            change_history,
10673            cost_centers,
10674            profit_centers,
10675        ) in per_company_results
10676        {
10677            self.master_data.vendors.extend(vendors);
10678            self.master_data.customers.extend(customers);
10679            self.master_data.materials.extend(materials);
10680            self.master_data.assets.extend(assets);
10681            self.master_data.employees.extend(employees);
10682            self.master_data.cost_centers.extend(cost_centers);
10683            self.master_data.profit_centers.extend(profit_centers);
10684            self.master_data
10685                .employee_change_history
10686                .extend(change_history);
10687        }
10688
10689        // v3.3.0: one OrganizationalProfile per company. Cheap to
10690        // generate (derived from industry + company_code) so we
10691        // always emit when master data runs; no separate config flag.
10692        {
10693            use datasynth_core::models::IndustrySector;
10694            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
10695            let industry = match self.config.global.industry {
10696                IndustrySector::Manufacturing => "manufacturing",
10697                IndustrySector::Retail => "retail",
10698                IndustrySector::FinancialServices => "financial_services",
10699                IndustrySector::Technology => "technology",
10700                IndustrySector::Healthcare => "healthcare",
10701                _ => "other",
10702            };
10703            for (i, company) in self.config.companies.iter().enumerate() {
10704                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
10705                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
10706                let profile = profile_gen.generate(&company.code, industry);
10707                self.master_data.organizational_profiles.push(profile);
10708            }
10709        }
10710
10711        if let Some(pb) = &pb {
10712            pb.inc(total);
10713        }
10714        if let Some(pb) = pb {
10715            pb.finish_with_message("Master data generation complete");
10716        }
10717
10718        Ok(())
10719    }
10720
10721    /// Generate document flows (P2P and O2C).
10722    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
10723        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10724            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10725
10726        // Generate P2P chains
10727        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
10728        let months = (self.config.global.period_months as usize).max(1);
10729        let p2p_count = self
10730            .phase_config
10731            .p2p_chains
10732            .min(self.master_data.vendors.len() * 2 * months);
10733        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
10734
10735        // Convert P2P config from schema to generator config
10736        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
10737        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
10738        p2p_gen.set_country_pack(self.primary_pack().clone());
10739        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
10740        // to business days. No-op when `temporal_patterns.business_days.
10741        // enabled = false`.
10742        if let Some(ctx) = &self.temporal_context {
10743            p2p_gen.set_temporal_context(Arc::clone(ctx));
10744        }
10745
10746        for i in 0..p2p_count {
10747            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
10748            let materials: Vec<&Material> = self
10749                .master_data
10750                .materials
10751                .iter()
10752                .skip(i % self.master_data.materials.len().max(1))
10753                .take(2.min(self.master_data.materials.len()))
10754                .collect();
10755
10756            if materials.is_empty() {
10757                continue;
10758            }
10759
10760            let company = &self.config.companies[i % self.config.companies.len()];
10761            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
10762            let fiscal_period = po_date.month() as u8;
10763            let created_by = if self.master_data.employees.is_empty() {
10764                "SYSTEM"
10765            } else {
10766                self.master_data.employees[i % self.master_data.employees.len()]
10767                    .user_id
10768                    .as_str()
10769            };
10770
10771            let chain = p2p_gen.generate_chain(
10772                &company.code,
10773                vendor,
10774                &materials,
10775                po_date,
10776                start_date.year() as u16,
10777                fiscal_period,
10778                created_by,
10779            );
10780
10781            // Flatten documents
10782            flows.purchase_orders.push(chain.purchase_order.clone());
10783            flows.goods_receipts.extend(chain.goods_receipts.clone());
10784            if let Some(vi) = &chain.vendor_invoice {
10785                flows.vendor_invoices.push(vi.clone());
10786            }
10787            if let Some(payment) = &chain.payment {
10788                flows.payments.push(payment.clone());
10789            }
10790            for remainder in &chain.remainder_payments {
10791                flows.payments.push(remainder.clone());
10792            }
10793            flows.p2p_chains.push(chain);
10794
10795            if let Some(pb) = &pb {
10796                pb.inc(1);
10797            }
10798        }
10799
10800        if let Some(pb) = pb {
10801            pb.finish_with_message("P2P document flows complete");
10802        }
10803
10804        // Generate O2C chains
10805        // Cap at ~2 SOs per customer per month to keep order volume realistic
10806        let o2c_count = self
10807            .phase_config
10808            .o2c_chains
10809            .min(self.master_data.customers.len() * 2 * months);
10810        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
10811
10812        // Convert O2C config from schema to generator config
10813        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
10814        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
10815        o2c_gen.set_country_pack(self.primary_pack().clone());
10816        // v3.4.1: wire temporal context (no-op when business_days disabled).
10817        if let Some(ctx) = &self.temporal_context {
10818            o2c_gen.set_temporal_context(Arc::clone(ctx));
10819        }
10820
10821        for i in 0..o2c_count {
10822            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
10823            let materials: Vec<&Material> = self
10824                .master_data
10825                .materials
10826                .iter()
10827                .skip(i % self.master_data.materials.len().max(1))
10828                .take(2.min(self.master_data.materials.len()))
10829                .collect();
10830
10831            if materials.is_empty() {
10832                continue;
10833            }
10834
10835            let company = &self.config.companies[i % self.config.companies.len()];
10836            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
10837            let fiscal_period = so_date.month() as u8;
10838            let created_by = if self.master_data.employees.is_empty() {
10839                "SYSTEM"
10840            } else {
10841                self.master_data.employees[i % self.master_data.employees.len()]
10842                    .user_id
10843                    .as_str()
10844            };
10845
10846            let chain = o2c_gen.generate_chain(
10847                &company.code,
10848                customer,
10849                &materials,
10850                so_date,
10851                start_date.year() as u16,
10852                fiscal_period,
10853                created_by,
10854            );
10855
10856            // Flatten documents
10857            flows.sales_orders.push(chain.sales_order.clone());
10858            flows.deliveries.extend(chain.deliveries.clone());
10859            if let Some(ci) = &chain.customer_invoice {
10860                flows.customer_invoices.push(ci.clone());
10861            }
10862            if let Some(receipt) = &chain.customer_receipt {
10863                flows.payments.push(receipt.clone());
10864            }
10865            // Extract remainder receipts (follow-up to partial payments)
10866            for receipt in &chain.remainder_receipts {
10867                flows.payments.push(receipt.clone());
10868            }
10869            flows.o2c_chains.push(chain);
10870
10871            if let Some(pb) = &pb {
10872                pb.inc(1);
10873            }
10874        }
10875
10876        if let Some(pb) = pb {
10877            pb.finish_with_message("O2C document flows complete");
10878        }
10879
10880        // Collect all document cross-references from document headers.
10881        // Each document embeds references to its predecessor(s) via add_reference(); here we
10882        // denormalise them into a flat list for the document_references.json output file.
10883        {
10884            let mut refs = Vec::new();
10885            for doc in &flows.purchase_orders {
10886                refs.extend(doc.header.document_references.iter().cloned());
10887            }
10888            for doc in &flows.goods_receipts {
10889                refs.extend(doc.header.document_references.iter().cloned());
10890            }
10891            for doc in &flows.vendor_invoices {
10892                refs.extend(doc.header.document_references.iter().cloned());
10893            }
10894            for doc in &flows.sales_orders {
10895                refs.extend(doc.header.document_references.iter().cloned());
10896            }
10897            for doc in &flows.deliveries {
10898                refs.extend(doc.header.document_references.iter().cloned());
10899            }
10900            for doc in &flows.customer_invoices {
10901                refs.extend(doc.header.document_references.iter().cloned());
10902            }
10903            for doc in &flows.payments {
10904                refs.extend(doc.header.document_references.iter().cloned());
10905            }
10906            debug!(
10907                "Collected {} document cross-references from document headers",
10908                refs.len()
10909            );
10910            flows.document_references = refs;
10911        }
10912
10913        Ok(())
10914    }
10915
10916    /// Generate journal entries using parallel generation across multiple cores.
10917    fn generate_journal_entries(
10918        &mut self,
10919        coa: &Arc<ChartOfAccounts>,
10920    ) -> SynthResult<Vec<JournalEntry>> {
10921        use datasynth_core::traits::ParallelGenerator;
10922
10923        let total = self.calculate_total_transactions();
10924        let pb = self.create_progress_bar(total, "Generating Journal Entries");
10925
10926        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10927            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10928        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10929
10930        let company_codes: Vec<String> = self
10931            .config
10932            .companies
10933            .iter()
10934            .map(|c| c.code.clone())
10935            .collect();
10936
10937        let mut generator = JournalEntryGenerator::new_with_params(
10938            self.config.transactions.clone(),
10939            Arc::clone(coa),
10940            company_codes,
10941            start_date,
10942            end_date,
10943            self.seed,
10944        );
10945        // Wire the `business_processes.*_weight` config through (phantom knob
10946        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
10947        let bp = &self.config.business_processes;
10948        generator.set_business_process_weights(
10949            bp.o2c_weight,
10950            bp.p2p_weight,
10951            bp.r2r_weight,
10952            bp.h2r_weight,
10953            bp.a2r_weight,
10954        );
10955        // v3.4.0: wire advanced distributions (mixture models + industry
10956        // profiles). No-op when `distributions.enabled = false` or
10957        // `distributions.amounts.enabled = false`, preserving v3.3.2
10958        // byte-identical output on default configs.
10959        generator
10960            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
10961            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
10962        let generator = generator;
10963
10964        // Connect generated master data to ensure JEs reference real entities
10965        // Enable persona-based error injection for realistic human behavior
10966        // Pass fraud configuration for fraud injection
10967        let je_pack = self.primary_pack();
10968
10969        let mut generator = generator
10970            .with_master_data(
10971                &self.master_data.vendors,
10972                &self.master_data.customers,
10973                &self.master_data.materials,
10974            )
10975            .with_country_pack_names(je_pack)
10976            .with_country_pack_temporal(
10977                self.config.temporal_patterns.clone(),
10978                self.seed + 200,
10979                je_pack,
10980            )
10981            .with_persona_errors(true)
10982            .with_fraud_config(self.config.fraud.clone());
10983
10984        // Apply temporal drift if configured. v3.5.2+: also merge
10985        // `distributions.regime_changes` (regime events, economic
10986        // cycles, parameter drifts) into the same DriftConfig so both
10987        // knobs flow through the shared DriftController.
10988        let temporal_enabled = self.config.temporal.enabled;
10989        let regimes_enabled = self.config.distributions.regime_changes.enabled;
10990        if temporal_enabled || regimes_enabled {
10991            let mut drift_config = if temporal_enabled {
10992                self.config.temporal.to_core_config()
10993            } else {
10994                // regime-changes only: start from default (drift OFF),
10995                // apply_to flips `enabled = true`.
10996                datasynth_core::distributions::DriftConfig::default()
10997            };
10998            if regimes_enabled {
10999                self.config
11000                    .distributions
11001                    .regime_changes
11002                    .apply_to(&mut drift_config, start_date);
11003            }
11004            generator = generator.with_drift_config(drift_config, self.seed + 100);
11005        }
11006
11007        // Check memory limit at start
11008        self.check_memory_limit()?;
11009
11010        // Determine parallelism: use available cores, but cap at total entries
11011        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11012
11013        // Use parallel generation for datasets with 10K+ entries.
11014        // Below this threshold, the statistical properties of a single-seeded
11015        // generator (e.g. Benford compliance) are better preserved.
11016        let entries = if total >= 10_000 && num_threads > 1 {
11017            // Parallel path: split the generator across cores and generate in parallel.
11018            // Each sub-generator gets a unique seed for deterministic, independent generation.
11019            let sub_generators = generator.split(num_threads);
11020            let entries_per_thread = total as usize / num_threads;
11021            let remainder = total as usize % num_threads;
11022
11023            let batches: Vec<Vec<JournalEntry>> = sub_generators
11024                .into_par_iter()
11025                .enumerate()
11026                .map(|(i, mut gen)| {
11027                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11028                    gen.generate_batch(count)
11029                })
11030                .collect();
11031
11032            // Merge all batches into a single Vec
11033            let entries = JournalEntryGenerator::merge_results(batches);
11034
11035            if let Some(pb) = &pb {
11036                pb.inc(total);
11037            }
11038            entries
11039        } else {
11040            // Sequential path for small datasets (< 1000 entries)
11041            let mut entries = Vec::with_capacity(total as usize);
11042            for _ in 0..total {
11043                let entry = generator.generate();
11044                entries.push(entry);
11045                if let Some(pb) = &pb {
11046                    pb.inc(1);
11047                }
11048            }
11049            entries
11050        };
11051
11052        if let Some(pb) = pb {
11053            pb.finish_with_message("Journal entries complete");
11054        }
11055
11056        Ok(entries)
11057    }
11058
11059    /// Generate journal entries from document flows.
11060    ///
11061    /// This creates proper GL entries for each document in the P2P and O2C flows,
11062    /// ensuring that document activity is reflected in the general ledger.
11063    fn generate_jes_from_document_flows(
11064        &mut self,
11065        flows: &DocumentFlowSnapshot,
11066    ) -> SynthResult<Vec<JournalEntry>> {
11067        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11068        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11069
11070        let je_config = match self.resolve_coa_framework() {
11071            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11072            CoAFramework::GermanSkr04 => {
11073                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11074                DocumentFlowJeConfig::from(&fa)
11075            }
11076            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11077        };
11078
11079        let populate_fec = je_config.populate_fec_fields;
11080        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11081
11082        // Build auxiliary account lookup from vendor/customer master data so that
11083        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11084        // PCG "4010001") instead of raw partner IDs.
11085        if populate_fec {
11086            let mut aux_lookup = std::collections::HashMap::new();
11087            for vendor in &self.master_data.vendors {
11088                if let Some(ref aux) = vendor.auxiliary_gl_account {
11089                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11090                }
11091            }
11092            for customer in &self.master_data.customers {
11093                if let Some(ref aux) = customer.auxiliary_gl_account {
11094                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11095                }
11096            }
11097            if !aux_lookup.is_empty() {
11098                generator.set_auxiliary_account_lookup(aux_lookup);
11099            }
11100        }
11101
11102        let mut entries = Vec::new();
11103
11104        // Generate JEs from P2P chains
11105        for chain in &flows.p2p_chains {
11106            let chain_entries = generator.generate_from_p2p_chain(chain);
11107            entries.extend(chain_entries);
11108            if let Some(pb) = &pb {
11109                pb.inc(1);
11110            }
11111        }
11112
11113        // Generate JEs from O2C chains
11114        for chain in &flows.o2c_chains {
11115            let chain_entries = generator.generate_from_o2c_chain(chain);
11116            entries.extend(chain_entries);
11117            if let Some(pb) = &pb {
11118                pb.inc(1);
11119            }
11120        }
11121
11122        if let Some(pb) = pb {
11123            pb.finish_with_message(format!(
11124                "Generated {} JEs from document flows",
11125                entries.len()
11126            ));
11127        }
11128
11129        Ok(entries)
11130    }
11131
11132    /// Generate journal entries from payroll runs.
11133    ///
11134    /// Creates one JE per payroll run:
11135    /// - DR Salaries & Wages (6100) for gross pay
11136    /// - CR Payroll Clearing (9100) for gross pay
11137    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11138        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11139
11140        let mut jes = Vec::with_capacity(payroll_runs.len());
11141
11142        for run in payroll_runs {
11143            let mut je = JournalEntry::new_simple(
11144                format!("JE-PAYROLL-{}", run.payroll_id),
11145                run.company_code.clone(),
11146                run.run_date,
11147                format!("Payroll {}", run.payroll_id),
11148            );
11149
11150            // Debit Salaries & Wages for gross pay
11151            je.add_line(JournalEntryLine {
11152                line_number: 1,
11153                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11154                debit_amount: run.total_gross,
11155                reference: Some(run.payroll_id.clone()),
11156                text: Some(format!(
11157                    "Payroll {} ({} employees)",
11158                    run.payroll_id, run.employee_count
11159                )),
11160                ..Default::default()
11161            });
11162
11163            // Credit Payroll Clearing for gross pay
11164            je.add_line(JournalEntryLine {
11165                line_number: 2,
11166                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11167                credit_amount: run.total_gross,
11168                reference: Some(run.payroll_id.clone()),
11169                ..Default::default()
11170            });
11171
11172            jes.push(je);
11173        }
11174
11175        jes
11176    }
11177
11178    /// Link document flows to subledger records.
11179    ///
11180    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11181    /// ensuring subledger data is coherent with document flow data.
11182    fn link_document_flows_to_subledgers(
11183        &mut self,
11184        flows: &DocumentFlowSnapshot,
11185    ) -> SynthResult<SubledgerSnapshot> {
11186        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11187        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11188
11189        // Build vendor/customer name maps from master data for realistic subledger names
11190        let vendor_names: std::collections::HashMap<String, String> = self
11191            .master_data
11192            .vendors
11193            .iter()
11194            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11195            .collect();
11196        let customer_names: std::collections::HashMap<String, String> = self
11197            .master_data
11198            .customers
11199            .iter()
11200            .map(|c| (c.customer_id.clone(), c.name.clone()))
11201            .collect();
11202
11203        let mut linker = DocumentFlowLinker::new()
11204            .with_vendor_names(vendor_names)
11205            .with_customer_names(customer_names);
11206
11207        // Convert vendor invoices to AP invoices
11208        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11209        if let Some(pb) = &pb {
11210            pb.inc(flows.vendor_invoices.len() as u64);
11211        }
11212
11213        // Convert customer invoices to AR invoices
11214        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11215        if let Some(pb) = &pb {
11216            pb.inc(flows.customer_invoices.len() as u64);
11217        }
11218
11219        if let Some(pb) = pb {
11220            pb.finish_with_message(format!(
11221                "Linked {} AP and {} AR invoices",
11222                ap_invoices.len(),
11223                ar_invoices.len()
11224            ));
11225        }
11226
11227        Ok(SubledgerSnapshot {
11228            ap_invoices,
11229            ar_invoices,
11230            fa_records: Vec::new(),
11231            inventory_positions: Vec::new(),
11232            inventory_movements: Vec::new(),
11233            // Aging reports are computed after payment settlement in phase_document_flows.
11234            ar_aging_reports: Vec::new(),
11235            ap_aging_reports: Vec::new(),
11236            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11237            depreciation_runs: Vec::new(),
11238            inventory_valuations: Vec::new(),
11239            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11240            dunning_runs: Vec::new(),
11241            dunning_letters: Vec::new(),
11242        })
11243    }
11244
11245    /// Generate OCPM events from document flows.
11246    ///
11247    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11248    /// capturing the object-centric process perspective.
11249    #[allow(clippy::too_many_arguments)]
11250    fn generate_ocpm_events(
11251        &mut self,
11252        flows: &DocumentFlowSnapshot,
11253        sourcing: &SourcingSnapshot,
11254        hr: &HrSnapshot,
11255        manufacturing: &ManufacturingSnapshot,
11256        banking: &BankingSnapshot,
11257        audit: &AuditSnapshot,
11258        financial_reporting: &FinancialReportingSnapshot,
11259    ) -> SynthResult<OcpmSnapshot> {
11260        let total_chains = flows.p2p_chains.len()
11261            + flows.o2c_chains.len()
11262            + sourcing.sourcing_projects.len()
11263            + hr.payroll_runs.len()
11264            + manufacturing.production_orders.len()
11265            + banking.customers.len()
11266            + audit.engagements.len()
11267            + financial_reporting.bank_reconciliations.len();
11268        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11269
11270        // Create OCPM event log with standard types
11271        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11272        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11273
11274        // Configure the OCPM generator
11275        let ocpm_config = OcpmGeneratorConfig {
11276            generate_p2p: true,
11277            generate_o2c: true,
11278            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11279            generate_h2r: !hr.payroll_runs.is_empty(),
11280            generate_mfg: !manufacturing.production_orders.is_empty(),
11281            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11282            generate_bank: !banking.customers.is_empty(),
11283            generate_audit: !audit.engagements.is_empty(),
11284            happy_path_rate: 0.75,
11285            exception_path_rate: 0.20,
11286            error_path_rate: 0.05,
11287            add_duration_variability: true,
11288            duration_std_dev_factor: 0.3,
11289        };
11290        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11291        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11292
11293        // Get available users for resource assignment
11294        let available_users: Vec<String> = self
11295            .master_data
11296            .employees
11297            .iter()
11298            .take(20)
11299            .map(|e| e.user_id.clone())
11300            .collect();
11301
11302        // Deterministic base date from config (avoids Utc::now() non-determinism)
11303        let fallback_date =
11304            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11305        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11306            .unwrap_or(fallback_date);
11307        let base_midnight = base_date
11308            .and_hms_opt(0, 0, 0)
11309            .expect("midnight is always valid");
11310        let base_datetime =
11311            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11312
11313        // Helper closure to add case results to event log
11314        let add_result = |event_log: &mut OcpmEventLog,
11315                          result: datasynth_ocpm::CaseGenerationResult| {
11316            for event in result.events {
11317                event_log.add_event(event);
11318            }
11319            for object in result.objects {
11320                event_log.add_object(object);
11321            }
11322            for relationship in result.relationships {
11323                event_log.add_relationship(relationship);
11324            }
11325            for corr in result.correlation_events {
11326                event_log.add_correlation_event(corr);
11327            }
11328            event_log.add_case(result.case_trace);
11329        };
11330
11331        // Generate events from P2P chains
11332        for chain in &flows.p2p_chains {
11333            let po = &chain.purchase_order;
11334            let documents = P2pDocuments::new(
11335                &po.header.document_id,
11336                &po.vendor_id,
11337                &po.header.company_code,
11338                po.total_net_amount,
11339                &po.header.currency,
11340                &ocpm_uuid_factory,
11341            )
11342            .with_goods_receipt(
11343                chain
11344                    .goods_receipts
11345                    .first()
11346                    .map(|gr| gr.header.document_id.as_str())
11347                    .unwrap_or(""),
11348                &ocpm_uuid_factory,
11349            )
11350            .with_invoice(
11351                chain
11352                    .vendor_invoice
11353                    .as_ref()
11354                    .map(|vi| vi.header.document_id.as_str())
11355                    .unwrap_or(""),
11356                &ocpm_uuid_factory,
11357            )
11358            .with_payment(
11359                chain
11360                    .payment
11361                    .as_ref()
11362                    .map(|p| p.header.document_id.as_str())
11363                    .unwrap_or(""),
11364                &ocpm_uuid_factory,
11365            );
11366
11367            let start_time =
11368                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
11369            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
11370            add_result(&mut event_log, result);
11371
11372            if let Some(pb) = &pb {
11373                pb.inc(1);
11374            }
11375        }
11376
11377        // Generate events from O2C chains
11378        for chain in &flows.o2c_chains {
11379            let so = &chain.sales_order;
11380            let documents = O2cDocuments::new(
11381                &so.header.document_id,
11382                &so.customer_id,
11383                &so.header.company_code,
11384                so.total_net_amount,
11385                &so.header.currency,
11386                &ocpm_uuid_factory,
11387            )
11388            .with_delivery(
11389                chain
11390                    .deliveries
11391                    .first()
11392                    .map(|d| d.header.document_id.as_str())
11393                    .unwrap_or(""),
11394                &ocpm_uuid_factory,
11395            )
11396            .with_invoice(
11397                chain
11398                    .customer_invoice
11399                    .as_ref()
11400                    .map(|ci| ci.header.document_id.as_str())
11401                    .unwrap_or(""),
11402                &ocpm_uuid_factory,
11403            )
11404            .with_receipt(
11405                chain
11406                    .customer_receipt
11407                    .as_ref()
11408                    .map(|r| r.header.document_id.as_str())
11409                    .unwrap_or(""),
11410                &ocpm_uuid_factory,
11411            );
11412
11413            let start_time =
11414                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
11415            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
11416            add_result(&mut event_log, result);
11417
11418            if let Some(pb) = &pb {
11419                pb.inc(1);
11420            }
11421        }
11422
11423        // Generate events from S2C sourcing projects
11424        for project in &sourcing.sourcing_projects {
11425            // Find vendor from contracts or qualifications
11426            let vendor_id = sourcing
11427                .contracts
11428                .iter()
11429                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11430                .map(|c| c.vendor_id.clone())
11431                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
11432                .or_else(|| {
11433                    self.master_data
11434                        .vendors
11435                        .first()
11436                        .map(|v| v.vendor_id.clone())
11437                })
11438                .unwrap_or_else(|| "V000".to_string());
11439            let mut docs = S2cDocuments::new(
11440                &project.project_id,
11441                &vendor_id,
11442                &project.company_code,
11443                project.estimated_annual_spend,
11444                &ocpm_uuid_factory,
11445            );
11446            // Link RFx if available
11447            if let Some(rfx) = sourcing
11448                .rfx_events
11449                .iter()
11450                .find(|r| r.sourcing_project_id == project.project_id)
11451            {
11452                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
11453                // Link winning bid (status == Accepted)
11454                if let Some(bid) = sourcing.bids.iter().find(|b| {
11455                    b.rfx_id == rfx.rfx_id
11456                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
11457                }) {
11458                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
11459                }
11460            }
11461            // Link contract
11462            if let Some(contract) = sourcing
11463                .contracts
11464                .iter()
11465                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
11466            {
11467                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
11468            }
11469            let start_time = base_datetime - chrono::Duration::days(90);
11470            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
11471            add_result(&mut event_log, result);
11472
11473            if let Some(pb) = &pb {
11474                pb.inc(1);
11475            }
11476        }
11477
11478        // Generate events from H2R payroll runs
11479        for run in &hr.payroll_runs {
11480            // Use first matching payroll line item's employee, or fallback
11481            let employee_id = hr
11482                .payroll_line_items
11483                .iter()
11484                .find(|li| li.payroll_id == run.payroll_id)
11485                .map(|li| li.employee_id.as_str())
11486                .unwrap_or("EMP000");
11487            let docs = H2rDocuments::new(
11488                &run.payroll_id,
11489                employee_id,
11490                &run.company_code,
11491                run.total_gross,
11492                &ocpm_uuid_factory,
11493            )
11494            .with_time_entries(
11495                hr.time_entries
11496                    .iter()
11497                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
11498                    .take(5)
11499                    .map(|t| t.entry_id.as_str())
11500                    .collect(),
11501            );
11502            let start_time = base_datetime - chrono::Duration::days(30);
11503            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
11504            add_result(&mut event_log, result);
11505
11506            if let Some(pb) = &pb {
11507                pb.inc(1);
11508            }
11509        }
11510
11511        // Generate events from MFG production orders
11512        for order in &manufacturing.production_orders {
11513            let mut docs = MfgDocuments::new(
11514                &order.order_id,
11515                &order.material_id,
11516                &order.company_code,
11517                order.planned_quantity,
11518                &ocpm_uuid_factory,
11519            )
11520            .with_operations(
11521                order
11522                    .operations
11523                    .iter()
11524                    .map(|o| format!("OP-{:04}", o.operation_number))
11525                    .collect::<Vec<_>>()
11526                    .iter()
11527                    .map(std::string::String::as_str)
11528                    .collect(),
11529            );
11530            // Link quality inspection if available (via reference_id matching order_id)
11531            if let Some(insp) = manufacturing
11532                .quality_inspections
11533                .iter()
11534                .find(|i| i.reference_id == order.order_id)
11535            {
11536                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
11537            }
11538            // Link cycle count if available (match by material_id in items)
11539            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
11540                cc.items
11541                    .iter()
11542                    .any(|item| item.material_id == order.material_id)
11543            }) {
11544                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
11545            }
11546            let start_time = base_datetime - chrono::Duration::days(60);
11547            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
11548            add_result(&mut event_log, result);
11549
11550            if let Some(pb) = &pb {
11551                pb.inc(1);
11552            }
11553        }
11554
11555        // Generate events from Banking customers
11556        for customer in &banking.customers {
11557            let customer_id_str = customer.customer_id.to_string();
11558            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
11559            // Link accounts (primary_owner_id matches customer_id)
11560            if let Some(account) = banking
11561                .accounts
11562                .iter()
11563                .find(|a| a.primary_owner_id == customer.customer_id)
11564            {
11565                let account_id_str = account.account_id.to_string();
11566                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
11567                // Link transactions for this account
11568                let txn_strs: Vec<String> = banking
11569                    .transactions
11570                    .iter()
11571                    .filter(|t| t.account_id == account.account_id)
11572                    .take(10)
11573                    .map(|t| t.transaction_id.to_string())
11574                    .collect();
11575                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
11576                let txn_amounts: Vec<rust_decimal::Decimal> = banking
11577                    .transactions
11578                    .iter()
11579                    .filter(|t| t.account_id == account.account_id)
11580                    .take(10)
11581                    .map(|t| t.amount)
11582                    .collect();
11583                if !txn_ids.is_empty() {
11584                    docs = docs.with_transactions(txn_ids, txn_amounts);
11585                }
11586            }
11587            let start_time = base_datetime - chrono::Duration::days(180);
11588            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
11589            add_result(&mut event_log, result);
11590
11591            if let Some(pb) = &pb {
11592                pb.inc(1);
11593            }
11594        }
11595
11596        // Generate events from Audit engagements
11597        for engagement in &audit.engagements {
11598            let engagement_id_str = engagement.engagement_id.to_string();
11599            let docs = AuditDocuments::new(
11600                &engagement_id_str,
11601                &engagement.client_entity_id,
11602                &ocpm_uuid_factory,
11603            )
11604            .with_workpapers(
11605                audit
11606                    .workpapers
11607                    .iter()
11608                    .filter(|w| w.engagement_id == engagement.engagement_id)
11609                    .take(10)
11610                    .map(|w| w.workpaper_id.to_string())
11611                    .collect::<Vec<_>>()
11612                    .iter()
11613                    .map(std::string::String::as_str)
11614                    .collect(),
11615            )
11616            .with_evidence(
11617                audit
11618                    .evidence
11619                    .iter()
11620                    .filter(|e| e.engagement_id == engagement.engagement_id)
11621                    .take(10)
11622                    .map(|e| e.evidence_id.to_string())
11623                    .collect::<Vec<_>>()
11624                    .iter()
11625                    .map(std::string::String::as_str)
11626                    .collect(),
11627            )
11628            .with_risks(
11629                audit
11630                    .risk_assessments
11631                    .iter()
11632                    .filter(|r| r.engagement_id == engagement.engagement_id)
11633                    .take(5)
11634                    .map(|r| r.risk_id.to_string())
11635                    .collect::<Vec<_>>()
11636                    .iter()
11637                    .map(std::string::String::as_str)
11638                    .collect(),
11639            )
11640            .with_findings(
11641                audit
11642                    .findings
11643                    .iter()
11644                    .filter(|f| f.engagement_id == engagement.engagement_id)
11645                    .take(5)
11646                    .map(|f| f.finding_id.to_string())
11647                    .collect::<Vec<_>>()
11648                    .iter()
11649                    .map(std::string::String::as_str)
11650                    .collect(),
11651            )
11652            .with_judgments(
11653                audit
11654                    .judgments
11655                    .iter()
11656                    .filter(|j| j.engagement_id == engagement.engagement_id)
11657                    .take(5)
11658                    .map(|j| j.judgment_id.to_string())
11659                    .collect::<Vec<_>>()
11660                    .iter()
11661                    .map(std::string::String::as_str)
11662                    .collect(),
11663            );
11664            let start_time = base_datetime - chrono::Duration::days(120);
11665            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
11666            add_result(&mut event_log, result);
11667
11668            if let Some(pb) = &pb {
11669                pb.inc(1);
11670            }
11671        }
11672
11673        // Generate events from Bank Reconciliations
11674        for recon in &financial_reporting.bank_reconciliations {
11675            let docs = BankReconDocuments::new(
11676                &recon.reconciliation_id,
11677                &recon.bank_account_id,
11678                &recon.company_code,
11679                recon.bank_ending_balance,
11680                &ocpm_uuid_factory,
11681            )
11682            .with_statement_lines(
11683                recon
11684                    .statement_lines
11685                    .iter()
11686                    .take(20)
11687                    .map(|l| l.line_id.as_str())
11688                    .collect(),
11689            )
11690            .with_reconciling_items(
11691                recon
11692                    .reconciling_items
11693                    .iter()
11694                    .take(10)
11695                    .map(|i| i.item_id.as_str())
11696                    .collect(),
11697            );
11698            let start_time = base_datetime - chrono::Duration::days(30);
11699            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
11700            add_result(&mut event_log, result);
11701
11702            if let Some(pb) = &pb {
11703                pb.inc(1);
11704            }
11705        }
11706
11707        // Compute process variants
11708        event_log.compute_variants();
11709
11710        let summary = event_log.summary();
11711
11712        if let Some(pb) = pb {
11713            pb.finish_with_message(format!(
11714                "Generated {} OCPM events, {} objects",
11715                summary.event_count, summary.object_count
11716            ));
11717        }
11718
11719        Ok(OcpmSnapshot {
11720            event_count: summary.event_count,
11721            object_count: summary.object_count,
11722            case_count: summary.case_count,
11723            event_log: Some(event_log),
11724        })
11725    }
11726
11727    /// Inject anomalies into journal entries.
11728    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
11729        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
11730
11731        // Read anomaly rates from config instead of using hardcoded values.
11732        // Priority: anomaly_injection config > fraud config > default 0.02
11733        let total_rate = if self.config.anomaly_injection.enabled {
11734            self.config.anomaly_injection.rates.total_rate
11735        } else if self.config.fraud.enabled {
11736            self.config.fraud.fraud_rate
11737        } else {
11738            0.02
11739        };
11740
11741        let fraud_rate = if self.config.anomaly_injection.enabled {
11742            self.config.anomaly_injection.rates.fraud_rate
11743        } else {
11744            AnomalyRateConfig::default().fraud_rate
11745        };
11746
11747        let error_rate = if self.config.anomaly_injection.enabled {
11748            self.config.anomaly_injection.rates.error_rate
11749        } else {
11750            AnomalyRateConfig::default().error_rate
11751        };
11752
11753        let process_issue_rate = if self.config.anomaly_injection.enabled {
11754            self.config.anomaly_injection.rates.process_rate
11755        } else {
11756            AnomalyRateConfig::default().process_issue_rate
11757        };
11758
11759        let anomaly_config = AnomalyInjectorConfig {
11760            rates: AnomalyRateConfig {
11761                total_rate,
11762                fraud_rate,
11763                error_rate,
11764                process_issue_rate,
11765                ..Default::default()
11766            },
11767            seed: self.seed + 5000,
11768            ..Default::default()
11769        };
11770
11771        let mut injector = AnomalyInjector::new(anomaly_config);
11772        let result = injector.process_entries(entries);
11773
11774        if let Some(pb) = &pb {
11775            pb.inc(entries.len() as u64);
11776            pb.finish_with_message("Anomaly injection complete");
11777        }
11778
11779        let mut by_type = HashMap::new();
11780        for label in &result.labels {
11781            *by_type
11782                .entry(format!("{:?}", label.anomaly_type))
11783                .or_insert(0) += 1;
11784        }
11785
11786        Ok(AnomalyLabels {
11787            labels: result.labels,
11788            summary: Some(result.summary),
11789            by_type,
11790        })
11791    }
11792
11793    /// Validate journal entries using running balance tracker.
11794    ///
11795    /// Applies all entries to the balance tracker and validates:
11796    /// - Each entry is internally balanced (debits = credits)
11797    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
11798    ///
11799    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
11800    /// excluded from balance validation as they may be intentionally unbalanced.
11801    fn validate_journal_entries(
11802        &mut self,
11803        entries: &[JournalEntry],
11804    ) -> SynthResult<BalanceValidationResult> {
11805        // Filter out entries with human errors as they may be intentionally unbalanced
11806        let clean_entries: Vec<&JournalEntry> = entries
11807            .iter()
11808            .filter(|e| {
11809                e.header
11810                    .header_text
11811                    .as_ref()
11812                    .map(|t| !t.contains("[HUMAN_ERROR:"))
11813                    .unwrap_or(true)
11814            })
11815            .collect();
11816
11817        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
11818
11819        // Configure tracker to not fail on errors (collect them instead)
11820        let config = BalanceTrackerConfig {
11821            validate_on_each_entry: false,   // We'll validate at the end
11822            track_history: false,            // Skip history for performance
11823            fail_on_validation_error: false, // Collect errors, don't fail
11824            ..Default::default()
11825        };
11826        let validation_currency = self
11827            .config
11828            .companies
11829            .first()
11830            .map(|c| c.currency.clone())
11831            .unwrap_or_else(|| "USD".to_string());
11832
11833        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
11834
11835        // Apply clean entries (without human errors)
11836        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
11837        let errors = tracker.apply_entries(&clean_refs);
11838
11839        if let Some(pb) = &pb {
11840            pb.inc(entries.len() as u64);
11841        }
11842
11843        // Check if any entries were unbalanced
11844        // Note: When fail_on_validation_error is false, errors are stored in tracker
11845        let has_unbalanced = tracker
11846            .get_validation_errors()
11847            .iter()
11848            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
11849
11850        // Validate balance sheet for each company
11851        // Include both returned errors and collected validation errors
11852        let mut all_errors = errors;
11853        all_errors.extend(tracker.get_validation_errors().iter().cloned());
11854        let company_codes: Vec<String> = self
11855            .config
11856            .companies
11857            .iter()
11858            .map(|c| c.code.clone())
11859            .collect();
11860
11861        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11862            .map(|d| d + chrono::Months::new(self.config.global.period_months))
11863            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11864
11865        for company_code in &company_codes {
11866            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
11867                all_errors.push(e);
11868            }
11869        }
11870
11871        // Get statistics after all mutable operations are done
11872        let stats = tracker.get_statistics();
11873
11874        // Determine if balanced overall
11875        let is_balanced = all_errors.is_empty();
11876
11877        if let Some(pb) = pb {
11878            let msg = if is_balanced {
11879                "Balance validation passed"
11880            } else {
11881                "Balance validation completed with errors"
11882            };
11883            pb.finish_with_message(msg);
11884        }
11885
11886        Ok(BalanceValidationResult {
11887            validated: true,
11888            is_balanced,
11889            entries_processed: stats.entries_processed,
11890            total_debits: stats.total_debits,
11891            total_credits: stats.total_credits,
11892            accounts_tracked: stats.accounts_tracked,
11893            companies_tracked: stats.companies_tracked,
11894            validation_errors: all_errors,
11895            has_unbalanced_entries: has_unbalanced,
11896        })
11897    }
11898
11899    /// Inject data quality variations into journal entries.
11900    ///
11901    /// Applies typos, missing values, and format variations to make
11902    /// the synthetic data more realistic for testing data cleaning pipelines.
11903    fn inject_data_quality(
11904        &mut self,
11905        entries: &mut [JournalEntry],
11906    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
11907        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
11908
11909        // Build config from user-specified schema settings when data_quality is enabled;
11910        // otherwise fall back to the low-rate minimal() preset.
11911        let config = if self.config.data_quality.enabled {
11912            let dq = &self.config.data_quality;
11913            // Propagate per-field rates and protected fields from the schema
11914            // so users can dial in real-production NULL profiles per field
11915            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
11916            let field_rates = dq.missing_values.field_rates.clone();
11917            let mut required_fields: std::collections::HashSet<String> =
11918                dq.missing_values.protected_fields.iter().cloned().collect();
11919            // Always preserve audit-critical identifiers regardless of
11920            // user config — losing these breaks downstream joins.
11921            for f in [
11922                "document_id",
11923                "company_code",
11924                "posting_date",
11925                "fiscal_year",
11926                "fiscal_period",
11927                "gl_account",
11928                "line_number",
11929                "transaction_id",
11930            ] {
11931                required_fields.insert(f.to_string());
11932            }
11933            DataQualityConfig {
11934                enable_missing_values: dq.missing_values.enabled,
11935                missing_values: datasynth_generators::MissingValueConfig {
11936                    global_rate: dq.effective_missing_rate(),
11937                    field_rates,
11938                    required_fields,
11939                    ..Default::default()
11940                },
11941                enable_format_variations: dq.format_variations.enabled,
11942                format_variations: datasynth_generators::FormatVariationConfig {
11943                    date_variation_rate: dq.format_variations.dates.rate,
11944                    amount_variation_rate: dq.format_variations.amounts.rate,
11945                    identifier_variation_rate: dq.format_variations.identifiers.rate,
11946                    ..Default::default()
11947                },
11948                enable_duplicates: dq.duplicates.enabled,
11949                duplicates: datasynth_generators::DuplicateConfig {
11950                    duplicate_rate: dq.effective_duplicate_rate(),
11951                    ..Default::default()
11952                },
11953                enable_typos: dq.typos.enabled,
11954                typos: datasynth_generators::TypoConfig {
11955                    char_error_rate: dq.effective_typo_rate(),
11956                    ..Default::default()
11957                },
11958                enable_encoding_issues: dq.encoding_issues.enabled,
11959                encoding_issue_rate: dq.encoding_issues.rate,
11960                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
11961                track_statistics: true,
11962            }
11963        } else {
11964            DataQualityConfig::minimal()
11965        };
11966        let mut injector = DataQualityInjector::new(config);
11967
11968        // Wire country pack for locale-aware format baselines
11969        injector.set_country_pack(self.primary_pack().clone());
11970
11971        // Build context for missing value decisions
11972        let context = HashMap::new();
11973
11974        for entry in entries.iter_mut() {
11975            // Process header_text field (common target for typos)
11976            if let Some(text) = &entry.header.header_text {
11977                let processed = injector.process_text_field(
11978                    "header_text",
11979                    text,
11980                    &entry.header.document_id.to_string(),
11981                    &context,
11982                );
11983                match processed {
11984                    Some(new_text) if new_text != *text => {
11985                        entry.header.header_text = Some(new_text);
11986                    }
11987                    None => {
11988                        entry.header.header_text = None; // Missing value
11989                    }
11990                    _ => {}
11991                }
11992            }
11993
11994            // Process reference field
11995            if let Some(ref_text) = &entry.header.reference {
11996                let processed = injector.process_text_field(
11997                    "reference",
11998                    ref_text,
11999                    &entry.header.document_id.to_string(),
12000                    &context,
12001                );
12002                match processed {
12003                    Some(new_text) if new_text != *ref_text => {
12004                        entry.header.reference = Some(new_text);
12005                    }
12006                    None => {
12007                        entry.header.reference = None;
12008                    }
12009                    _ => {}
12010                }
12011            }
12012
12013            // Process user_persona field (potential for typos in user IDs)
12014            let user_persona = entry.header.user_persona.clone();
12015            if let Some(processed) = injector.process_text_field(
12016                "user_persona",
12017                &user_persona,
12018                &entry.header.document_id.to_string(),
12019                &context,
12020            ) {
12021                if processed != user_persona {
12022                    entry.header.user_persona = processed;
12023                }
12024            }
12025
12026            // Process line items
12027            for line in &mut entry.lines {
12028                // Process line description if present
12029                if let Some(ref text) = line.line_text {
12030                    let processed = injector.process_text_field(
12031                        "line_text",
12032                        text,
12033                        &entry.header.document_id.to_string(),
12034                        &context,
12035                    );
12036                    match processed {
12037                        Some(new_text) if new_text != *text => {
12038                            line.line_text = Some(new_text);
12039                        }
12040                        None => {
12041                            line.line_text = None;
12042                        }
12043                        _ => {}
12044                    }
12045                }
12046
12047                // Process cost_center if present
12048                if let Some(cc) = &line.cost_center {
12049                    let processed = injector.process_text_field(
12050                        "cost_center",
12051                        cc,
12052                        &entry.header.document_id.to_string(),
12053                        &context,
12054                    );
12055                    match processed {
12056                        Some(new_cc) if new_cc != *cc => {
12057                            line.cost_center = Some(new_cc);
12058                        }
12059                        None => {
12060                            line.cost_center = None;
12061                        }
12062                        _ => {}
12063                    }
12064                }
12065
12066                // Extended field coverage (v5.6+): apply NULL injection to
12067                // every Option<String> on the line so users can match
12068                // arbitrary real-production NULL profiles via
12069                // `data_quality.missing_values.field_rates`.
12070                //
12071                // Macro-free helper: process_field returns the new value
12072                // ({Some, None, unchanged}) and we apply it back.
12073                macro_rules! process_opt_field {
12074                    ($field_name:expr, $opt:expr) => {
12075                        if let Some(val) = $opt.as_ref() {
12076                            match injector.process_text_field(
12077                                $field_name,
12078                                val,
12079                                &entry.header.document_id.to_string(),
12080                                &context,
12081                            ) {
12082                                Some(new_val) if new_val != *val => {
12083                                    *$opt = Some(new_val);
12084                                }
12085                                None => {
12086                                    *$opt = None;
12087                                }
12088                                _ => {}
12089                            }
12090                        }
12091                    };
12092                }
12093
12094                process_opt_field!("profit_center", &mut line.profit_center);
12095                process_opt_field!("assignment", &mut line.assignment);
12096                process_opt_field!("tax_code", &mut line.tax_code);
12097                process_opt_field!("account_description", &mut line.account_description);
12098                process_opt_field!(
12099                    "auxiliary_account_number",
12100                    &mut line.auxiliary_account_number
12101                );
12102                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12103                process_opt_field!("lettrage", &mut line.lettrage);
12104            }
12105
12106            if let Some(pb) = &pb {
12107                pb.inc(1);
12108            }
12109        }
12110
12111        if let Some(pb) = pb {
12112            pb.finish_with_message("Data quality injection complete");
12113        }
12114
12115        let quality_issues = injector.issues().to_vec();
12116        Ok((injector.stats().clone(), quality_issues))
12117    }
12118
12119    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12120    ///
12121    /// Creates complete audit documentation for each company in the configuration,
12122    /// following ISA standards:
12123    /// - ISA 210/220: Engagement acceptance and terms
12124    /// - ISA 230: Audit documentation (workpapers)
12125    /// - ISA 265: Control deficiencies (findings)
12126    /// - ISA 315/330: Risk assessment and response
12127    /// - ISA 500: Audit evidence
12128    /// - ISA 200: Professional judgment
12129    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12130        // Check if FSM-driven audit generation is enabled
12131        let use_fsm = self
12132            .config
12133            .audit
12134            .fsm
12135            .as_ref()
12136            .map(|f| f.enabled)
12137            .unwrap_or(false);
12138
12139        if use_fsm {
12140            return self.generate_audit_data_with_fsm(entries);
12141        }
12142
12143        // --- Legacy (non-FSM) audit generation follows ---
12144        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12145            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12146        let fiscal_year = start_date.year() as u16;
12147        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12148
12149        // Calculate rough total revenue from entries for materiality
12150        let total_revenue: rust_decimal::Decimal = entries
12151            .iter()
12152            .flat_map(|e| e.lines.iter())
12153            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12154            .map(|l| l.credit_amount)
12155            .sum();
12156
12157        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12158        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12159
12160        let mut snapshot = AuditSnapshot::default();
12161
12162        // Initialize generators
12163        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12164        // v3.3.2: thread the user-facing audit schema config into the
12165        // engagement generator (team size range).
12166        engagement_gen.set_team_config(&self.config.audit.team);
12167
12168        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12169        // v3.3.2: thread workpaper + review workflow schema config into
12170        // the workpaper generator (per-section count range + review
12171        // delay ranges).
12172        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12173        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12174        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12175        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12176        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12177        finding_gen.set_template_provider(self.template_provider.clone());
12178        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12179        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12180        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12181        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12182        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12183        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12184        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12185
12186        // Get list of accounts from CoA for risk assessment
12187        let accounts: Vec<String> = self
12188            .coa
12189            .as_ref()
12190            .map(|coa| {
12191                coa.get_postable_accounts()
12192                    .iter()
12193                    .map(|acc| acc.account_code().to_string())
12194                    .collect()
12195            })
12196            .unwrap_or_default();
12197
12198        // Generate engagements for each company
12199        for (i, company) in self.config.companies.iter().enumerate() {
12200            // Calculate company-specific revenue (proportional to volume weight)
12201            let company_revenue = total_revenue
12202                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12203
12204            // Generate engagements for this company
12205            let engagements_for_company =
12206                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12207            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12208                1
12209            } else {
12210                0
12211            };
12212
12213            for _eng_idx in 0..(engagements_for_company + extra) {
12214                // v3.3.2: draw engagement type from the user-configured
12215                // distribution instead of always using the default
12216                // (AnnualAudit). Falls back to the default when all
12217                // probabilities are zero.
12218                let eng_type =
12219                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12220
12221                // Generate the engagement
12222                let mut engagement = engagement_gen.generate_engagement(
12223                    &company.code,
12224                    &company.name,
12225                    fiscal_year,
12226                    period_end,
12227                    company_revenue,
12228                    Some(eng_type),
12229                );
12230
12231                // Replace synthetic team IDs with real employee IDs from master data
12232                if !self.master_data.employees.is_empty() {
12233                    let emp_count = self.master_data.employees.len();
12234                    // Use employee IDs deterministically based on engagement index
12235                    let base = (i * 10 + _eng_idx) % emp_count;
12236                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
12237                        .employee_id
12238                        .clone();
12239                    engagement.engagement_manager_id = self.master_data.employees
12240                        [(base + 1) % emp_count]
12241                        .employee_id
12242                        .clone();
12243                    let real_team: Vec<String> = engagement
12244                        .team_member_ids
12245                        .iter()
12246                        .enumerate()
12247                        .map(|(j, _)| {
12248                            self.master_data.employees[(base + 2 + j) % emp_count]
12249                                .employee_id
12250                                .clone()
12251                        })
12252                        .collect();
12253                    engagement.team_member_ids = real_team;
12254                }
12255
12256                if let Some(pb) = &pb {
12257                    pb.inc(1);
12258                }
12259
12260                // Get team members from the engagement
12261                let team_members: Vec<String> = engagement.team_member_ids.clone();
12262
12263                // Generate workpapers for the engagement.
12264                // v3.3.2: honor `audit.generate_workpapers` — when false,
12265                // workpapers (and dependent evidence) are skipped while
12266                // the engagement itself, risk assessments, findings, etc.
12267                // still generate normally.
12268                let workpapers = if self.config.audit.generate_workpapers {
12269                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
12270                } else {
12271                    Vec::new()
12272                };
12273
12274                for wp in &workpapers {
12275                    if let Some(pb) = &pb {
12276                        pb.inc(1);
12277                    }
12278
12279                    // Generate evidence for each workpaper
12280                    let evidence = evidence_gen.generate_evidence_for_workpaper(
12281                        wp,
12282                        &team_members,
12283                        wp.preparer_date,
12284                    );
12285
12286                    for _ in &evidence {
12287                        if let Some(pb) = &pb {
12288                            pb.inc(1);
12289                        }
12290                    }
12291
12292                    snapshot.evidence.extend(evidence);
12293                }
12294
12295                // Generate risk assessments for the engagement
12296                let risks =
12297                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
12298
12299                for _ in &risks {
12300                    if let Some(pb) = &pb {
12301                        pb.inc(1);
12302                    }
12303                }
12304                snapshot.risk_assessments.extend(risks);
12305
12306                // Generate findings for the engagement
12307                let findings = finding_gen.generate_findings_for_engagement(
12308                    &engagement,
12309                    &workpapers,
12310                    &team_members,
12311                );
12312
12313                for _ in &findings {
12314                    if let Some(pb) = &pb {
12315                        pb.inc(1);
12316                    }
12317                }
12318                snapshot.findings.extend(findings);
12319
12320                // Generate professional judgments for the engagement
12321                let judgments =
12322                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
12323
12324                for _ in &judgments {
12325                    if let Some(pb) = &pb {
12326                        pb.inc(1);
12327                    }
12328                }
12329                snapshot.judgments.extend(judgments);
12330
12331                // ISA 505: External confirmations and responses
12332                let (confs, resps) =
12333                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
12334                snapshot.confirmations.extend(confs);
12335                snapshot.confirmation_responses.extend(resps);
12336
12337                // ISA 330: Procedure steps per workpaper
12338                let team_pairs: Vec<(String, String)> = team_members
12339                    .iter()
12340                    .map(|id| {
12341                        let name = self
12342                            .master_data
12343                            .employees
12344                            .iter()
12345                            .find(|e| e.employee_id == *id)
12346                            .map(|e| e.display_name.clone())
12347                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
12348                        (id.clone(), name)
12349                    })
12350                    .collect();
12351                for wp in &workpapers {
12352                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
12353                    snapshot.procedure_steps.extend(steps);
12354                }
12355
12356                // ISA 530: Samples per workpaper
12357                for wp in &workpapers {
12358                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
12359                        snapshot.samples.push(sample);
12360                    }
12361                }
12362
12363                // ISA 520: Analytical procedures
12364                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
12365                snapshot.analytical_results.extend(analytical);
12366
12367                // ISA 610: Internal audit function and reports
12368                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
12369                snapshot.ia_functions.push(ia_func);
12370                snapshot.ia_reports.extend(ia_reports);
12371
12372                // ISA 550: Related parties and transactions
12373                let vendor_names: Vec<String> = self
12374                    .master_data
12375                    .vendors
12376                    .iter()
12377                    .map(|v| v.name.clone())
12378                    .collect();
12379                let customer_names: Vec<String> = self
12380                    .master_data
12381                    .customers
12382                    .iter()
12383                    .map(|c| c.name.clone())
12384                    .collect();
12385                let (parties, rp_txns) =
12386                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
12387                snapshot.related_parties.extend(parties);
12388                snapshot.related_party_transactions.extend(rp_txns);
12389
12390                // Add workpapers after findings since findings need them
12391                snapshot.workpapers.extend(workpapers);
12392
12393                // Generate audit scope record for this engagement (one per engagement)
12394                {
12395                    let scope_id = format!(
12396                        "SCOPE-{}-{}",
12397                        engagement.engagement_id.simple(),
12398                        &engagement.client_entity_id
12399                    );
12400                    let scope = datasynth_core::models::audit::AuditScope::new(
12401                        scope_id.clone(),
12402                        engagement.engagement_id.to_string(),
12403                        engagement.client_entity_id.clone(),
12404                        engagement.materiality,
12405                    );
12406                    // Wire scope_id back to engagement
12407                    let mut eng = engagement;
12408                    eng.scope_id = Some(scope_id);
12409                    snapshot.audit_scopes.push(scope);
12410                    snapshot.engagements.push(eng);
12411                }
12412            }
12413        }
12414
12415        // ----------------------------------------------------------------
12416        // ISA 600: Group audit — component auditors, plan, instructions, reports
12417        // ----------------------------------------------------------------
12418        if self.config.companies.len() > 1 {
12419            // Use materiality from the first engagement if available, otherwise
12420            // derive a reasonable figure from total revenue.
12421            let group_materiality = snapshot
12422                .engagements
12423                .first()
12424                .map(|e| e.materiality)
12425                .unwrap_or_else(|| {
12426                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
12427                    total_revenue * pct
12428                });
12429
12430            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
12431            let group_engagement_id = snapshot
12432                .engagements
12433                .first()
12434                .map(|e| e.engagement_id.to_string())
12435                .unwrap_or_else(|| "GROUP-ENG".to_string());
12436
12437            let component_snapshot = component_gen.generate(
12438                &self.config.companies,
12439                group_materiality,
12440                &group_engagement_id,
12441                period_end,
12442            );
12443
12444            snapshot.component_auditors = component_snapshot.component_auditors;
12445            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
12446            snapshot.component_instructions = component_snapshot.component_instructions;
12447            snapshot.component_reports = component_snapshot.component_reports;
12448
12449            info!(
12450                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
12451                snapshot.component_auditors.len(),
12452                snapshot.component_instructions.len(),
12453                snapshot.component_reports.len(),
12454            );
12455        }
12456
12457        // ----------------------------------------------------------------
12458        // ISA 210: Engagement letters — one per engagement
12459        // ----------------------------------------------------------------
12460        {
12461            let applicable_framework = self
12462                .config
12463                .accounting_standards
12464                .framework
12465                .as_ref()
12466                .map(|f| format!("{f:?}"))
12467                .unwrap_or_else(|| "IFRS".to_string());
12468
12469            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
12470            let entity_count = self.config.companies.len();
12471
12472            for engagement in &snapshot.engagements {
12473                let company = self
12474                    .config
12475                    .companies
12476                    .iter()
12477                    .find(|c| c.code == engagement.client_entity_id);
12478                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
12479                let letter_date = engagement.planning_start;
12480                let letter = letter_gen.generate(
12481                    &engagement.engagement_id.to_string(),
12482                    &engagement.client_name,
12483                    entity_count,
12484                    engagement.period_end_date,
12485                    currency,
12486                    &applicable_framework,
12487                    letter_date,
12488                );
12489                snapshot.engagement_letters.push(letter);
12490            }
12491
12492            info!(
12493                "ISA 210 engagement letters: {} generated",
12494                snapshot.engagement_letters.len()
12495            );
12496        }
12497
12498        // ----------------------------------------------------------------
12499        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
12500        // ----------------------------------------------------------------
12501        if self.phase_config.generate_legal_documents {
12502            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
12503            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
12504            for engagement in &snapshot.engagements {
12505                // Build an employee name list for signatory drawing —
12506                // prefer employees from the engaged entity, fall back to
12507                // all employees.
12508                let employee_names: Vec<String> = self
12509                    .master_data
12510                    .employees
12511                    .iter()
12512                    .filter(|e| e.company_code == engagement.client_entity_id)
12513                    .map(|e| e.display_name.clone())
12514                    .collect();
12515                let names_to_use = if !employee_names.is_empty() {
12516                    employee_names
12517                } else {
12518                    self.master_data
12519                        .employees
12520                        .iter()
12521                        .take(10)
12522                        .map(|e| e.display_name.clone())
12523                        .collect()
12524                };
12525                let docs = legal_gen.generate(
12526                    &engagement.client_entity_id,
12527                    engagement.fiscal_year as i32,
12528                    &names_to_use,
12529                );
12530                snapshot.legal_documents.extend(docs);
12531            }
12532            info!(
12533                "v3.3.0 legal documents: {} emitted across {} engagements",
12534                snapshot.legal_documents.len(),
12535                snapshot.engagements.len()
12536            );
12537        }
12538
12539        // ----------------------------------------------------------------
12540        // v3.3.0: IT general controls — access logs + change records
12541        //
12542        // `ItControlsGenerator` runs one pass per company (not per
12543        // engagement) so employee sets and system catalogs stay
12544        // coherent. We derive the period from the earliest engagement's
12545        // planning_start through the latest engagement's period_end_date
12546        // for each company.
12547        // ----------------------------------------------------------------
12548        if self.phase_config.generate_it_controls {
12549            use datasynth_generators::it_controls_generator::ItControlsGenerator;
12550            use std::collections::HashMap;
12551            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
12552
12553            // Group engagements by company to produce one IT-controls
12554            // window per entity.
12555            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
12556                HashMap::new();
12557            for engagement in &snapshot.engagements {
12558                let entry = by_company
12559                    .entry(engagement.client_entity_id.clone())
12560                    .or_insert((engagement.planning_start, engagement.period_end_date));
12561                if engagement.planning_start < entry.0 {
12562                    entry.0 = engagement.planning_start;
12563                }
12564                if engagement.period_end_date > entry.1 {
12565                    entry.1 = engagement.period_end_date;
12566                }
12567            }
12568
12569            // Standard system catalog — populated from known ERP / app
12570            // names. Keeps the generator's data shape stable when the
12571            // user hasn't configured IT-system naming separately.
12572            let systems: Vec<String> = vec![
12573                "SAP ECC",
12574                "SAP S/4 HANA",
12575                "Oracle EBS",
12576                "Workday",
12577                "NetSuite",
12578                "Active Directory",
12579                "SharePoint",
12580                "Salesforce",
12581                "ServiceNow",
12582                "Jira",
12583                "GitHub Enterprise",
12584                "AWS Console",
12585                "Okta",
12586            ]
12587            .into_iter()
12588            .map(String::from)
12589            .collect();
12590
12591            for (company_code, (start, end)) in by_company {
12592                let emps: Vec<(String, String)> = self
12593                    .master_data
12594                    .employees
12595                    .iter()
12596                    .filter(|e| e.company_code == company_code)
12597                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
12598                    .collect();
12599                if emps.is_empty() {
12600                    continue;
12601                }
12602                // Compute period in months, rounded up to the nearest
12603                // whole month (min 1).
12604                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
12605                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
12606                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
12607                snapshot.it_controls_access_logs.extend(access_logs);
12608                snapshot.it_controls_change_records.extend(change_records);
12609            }
12610
12611            info!(
12612                "v3.3.0 IT controls: {} access logs, {} change records",
12613                snapshot.it_controls_access_logs.len(),
12614                snapshot.it_controls_change_records.len()
12615            );
12616        }
12617
12618        // ----------------------------------------------------------------
12619        // ISA 560 / IAS 10: Subsequent events
12620        // ----------------------------------------------------------------
12621        {
12622            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
12623            let entity_codes: Vec<String> = self
12624                .config
12625                .companies
12626                .iter()
12627                .map(|c| c.code.clone())
12628                .collect();
12629            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
12630            info!(
12631                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
12632                subsequent.len(),
12633                subsequent
12634                    .iter()
12635                    .filter(|e| matches!(
12636                        e.classification,
12637                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
12638                    ))
12639                    .count(),
12640                subsequent
12641                    .iter()
12642                    .filter(|e| matches!(
12643                        e.classification,
12644                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
12645                    ))
12646                    .count(),
12647            );
12648            snapshot.subsequent_events = subsequent;
12649        }
12650
12651        // ----------------------------------------------------------------
12652        // ISA 402: Service organization controls
12653        // ----------------------------------------------------------------
12654        {
12655            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
12656            let entity_codes: Vec<String> = self
12657                .config
12658                .companies
12659                .iter()
12660                .map(|c| c.code.clone())
12661                .collect();
12662            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
12663            info!(
12664                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
12665                soc_snapshot.service_organizations.len(),
12666                soc_snapshot.soc_reports.len(),
12667                soc_snapshot.user_entity_controls.len(),
12668            );
12669            snapshot.service_organizations = soc_snapshot.service_organizations;
12670            snapshot.soc_reports = soc_snapshot.soc_reports;
12671            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
12672        }
12673
12674        // ----------------------------------------------------------------
12675        // ISA 570: Going concern assessments
12676        // ----------------------------------------------------------------
12677        {
12678            use datasynth_generators::audit::going_concern_generator::{
12679                GoingConcernGenerator, GoingConcernInput,
12680            };
12681            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
12682            let entity_codes: Vec<String> = self
12683                .config
12684                .companies
12685                .iter()
12686                .map(|c| c.code.clone())
12687                .collect();
12688            // Assessment date = period end + 75 days (typical sign-off window).
12689            let assessment_date = period_end + chrono::Duration::days(75);
12690            let period_label = format!("FY{}", period_end.year());
12691
12692            // Build financial inputs from actual journal entries.
12693            //
12694            // We derive approximate P&L, working capital, and operating cash flow
12695            // by aggregating GL account balances from the journal entry population.
12696            // Account ranges used (standard chart):
12697            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
12698            //   Expenses:        6xxx (debit-normal)
12699            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
12700            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
12701            //   Operating CF:    net income adjusted for D&A (rough proxy)
12702            let gc_inputs: Vec<GoingConcernInput> = self
12703                .config
12704                .companies
12705                .iter()
12706                .map(|company| {
12707                    let code = &company.code;
12708                    let mut revenue = rust_decimal::Decimal::ZERO;
12709                    let mut expenses = rust_decimal::Decimal::ZERO;
12710                    let mut current_assets = rust_decimal::Decimal::ZERO;
12711                    let mut current_liabs = rust_decimal::Decimal::ZERO;
12712                    let mut total_debt = rust_decimal::Decimal::ZERO;
12713
12714                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
12715                        for line in &je.lines {
12716                            let acct = line.gl_account.as_str();
12717                            let net = line.debit_amount - line.credit_amount;
12718                            if acct.starts_with('4') {
12719                                // Revenue accounts: credit-normal, so negative net = revenue earned
12720                                revenue -= net;
12721                            } else if acct.starts_with('6') {
12722                                // Expense accounts: debit-normal
12723                                expenses += net;
12724                            }
12725                            // Balance sheet accounts for working capital
12726                            if acct.starts_with('1') {
12727                                // Current asset accounts (1000–1499)
12728                                if let Ok(n) = acct.parse::<u32>() {
12729                                    if (1000..=1499).contains(&n) {
12730                                        current_assets += net;
12731                                    }
12732                                }
12733                            } else if acct.starts_with('2') {
12734                                if let Ok(n) = acct.parse::<u32>() {
12735                                    if (2000..=2499).contains(&n) {
12736                                        // Current liabilities
12737                                        current_liabs -= net; // credit-normal
12738                                    } else if (2500..=2999).contains(&n) {
12739                                        // Long-term debt
12740                                        total_debt -= net;
12741                                    }
12742                                }
12743                            }
12744                        }
12745                    }
12746
12747                    let net_income = revenue - expenses;
12748                    let working_capital = current_assets - current_liabs;
12749                    // Rough operating CF proxy: net income (full accrual CF calculation
12750                    // is done separately in the cash flow statement generator)
12751                    let operating_cash_flow = net_income;
12752
12753                    GoingConcernInput {
12754                        entity_code: code.clone(),
12755                        net_income,
12756                        working_capital,
12757                        operating_cash_flow,
12758                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
12759                        assessment_date,
12760                    }
12761                })
12762                .collect();
12763
12764            let assessments = if gc_inputs.is_empty() {
12765                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
12766            } else {
12767                gc_gen.generate_for_entities_with_inputs(
12768                    &entity_codes,
12769                    &gc_inputs,
12770                    assessment_date,
12771                    &period_label,
12772                )
12773            };
12774            info!(
12775                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
12776                assessments.len(),
12777                assessments.iter().filter(|a| matches!(
12778                    a.auditor_conclusion,
12779                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
12780                )).count(),
12781                assessments.iter().filter(|a| matches!(
12782                    a.auditor_conclusion,
12783                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
12784                )).count(),
12785                assessments.iter().filter(|a| matches!(
12786                    a.auditor_conclusion,
12787                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
12788                )).count(),
12789            );
12790            snapshot.going_concern_assessments = assessments;
12791        }
12792
12793        // ----------------------------------------------------------------
12794        // ISA 540: Accounting estimates
12795        // ----------------------------------------------------------------
12796        {
12797            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
12798            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
12799            let entity_codes: Vec<String> = self
12800                .config
12801                .companies
12802                .iter()
12803                .map(|c| c.code.clone())
12804                .collect();
12805            let estimates = est_gen.generate_for_entities(&entity_codes);
12806            info!(
12807                "ISA 540 accounting estimates: {} estimates across {} entities \
12808                 ({} with retrospective reviews, {} with auditor point estimates)",
12809                estimates.len(),
12810                entity_codes.len(),
12811                estimates
12812                    .iter()
12813                    .filter(|e| e.retrospective_review.is_some())
12814                    .count(),
12815                estimates
12816                    .iter()
12817                    .filter(|e| e.auditor_point_estimate.is_some())
12818                    .count(),
12819            );
12820            snapshot.accounting_estimates = estimates;
12821        }
12822
12823        // ----------------------------------------------------------------
12824        // ISA 700/701/705/706: Audit opinions (one per engagement)
12825        // ----------------------------------------------------------------
12826        {
12827            use datasynth_generators::audit::audit_opinion_generator::{
12828                AuditOpinionGenerator, AuditOpinionInput,
12829            };
12830
12831            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
12832
12833            // Build inputs — one per engagement, linking findings and going concern.
12834            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
12835                .engagements
12836                .iter()
12837                .map(|eng| {
12838                    // Collect findings for this engagement.
12839                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12840                        .findings
12841                        .iter()
12842                        .filter(|f| f.engagement_id == eng.engagement_id)
12843                        .cloned()
12844                        .collect();
12845
12846                    // Going concern for this entity.
12847                    let gc = snapshot
12848                        .going_concern_assessments
12849                        .iter()
12850                        .find(|g| g.entity_code == eng.client_entity_id)
12851                        .cloned();
12852
12853                    // Component reports relevant to this engagement.
12854                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
12855                        snapshot.component_reports.clone();
12856
12857                    let auditor = self
12858                        .master_data
12859                        .employees
12860                        .first()
12861                        .map(|e| e.display_name.clone())
12862                        .unwrap_or_else(|| "Global Audit LLP".into());
12863
12864                    let partner = self
12865                        .master_data
12866                        .employees
12867                        .get(1)
12868                        .map(|e| e.display_name.clone())
12869                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
12870
12871                    AuditOpinionInput {
12872                        entity_code: eng.client_entity_id.clone(),
12873                        entity_name: eng.client_name.clone(),
12874                        engagement_id: eng.engagement_id,
12875                        period_end: eng.period_end_date,
12876                        findings: eng_findings,
12877                        going_concern: gc,
12878                        component_reports: comp_reports,
12879                        // Mark as US-listed when audit standards include PCAOB.
12880                        is_us_listed: {
12881                            let fw = &self.config.audit_standards.isa_compliance.framework;
12882                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
12883                        },
12884                        auditor_name: auditor,
12885                        engagement_partner: partner,
12886                    }
12887                })
12888                .collect();
12889
12890            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
12891
12892            for go in &generated_opinions {
12893                snapshot
12894                    .key_audit_matters
12895                    .extend(go.key_audit_matters.clone());
12896            }
12897            snapshot.audit_opinions = generated_opinions
12898                .into_iter()
12899                .map(|go| go.opinion)
12900                .collect();
12901
12902            info!(
12903                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
12904                snapshot.audit_opinions.len(),
12905                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
12906                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
12907                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
12908                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
12909            );
12910        }
12911
12912        // ----------------------------------------------------------------
12913        // SOX 302 / 404 assessments
12914        // ----------------------------------------------------------------
12915        {
12916            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
12917
12918            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
12919
12920            for (i, company) in self.config.companies.iter().enumerate() {
12921                // Collect findings for this company's engagements.
12922                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
12923                    .engagements
12924                    .iter()
12925                    .filter(|e| e.client_entity_id == company.code)
12926                    .map(|e| e.engagement_id)
12927                    .collect();
12928
12929                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
12930                    .findings
12931                    .iter()
12932                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
12933                    .cloned()
12934                    .collect();
12935
12936                // Derive executive names from employee list.
12937                let emp_count = self.master_data.employees.len();
12938                let ceo_name = if emp_count > 0 {
12939                    self.master_data.employees[i % emp_count]
12940                        .display_name
12941                        .clone()
12942                } else {
12943                    format!("CEO of {}", company.name)
12944                };
12945                let cfo_name = if emp_count > 1 {
12946                    self.master_data.employees[(i + 1) % emp_count]
12947                        .display_name
12948                        .clone()
12949                } else {
12950                    format!("CFO of {}", company.name)
12951                };
12952
12953                // Use engagement materiality if available.
12954                let materiality = snapshot
12955                    .engagements
12956                    .iter()
12957                    .find(|e| e.client_entity_id == company.code)
12958                    .map(|e| e.materiality)
12959                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
12960
12961                let input = SoxGeneratorInput {
12962                    company_code: company.code.clone(),
12963                    company_name: company.name.clone(),
12964                    fiscal_year,
12965                    period_end,
12966                    findings: company_findings,
12967                    ceo_name,
12968                    cfo_name,
12969                    materiality_threshold: materiality,
12970                    revenue_percent: rust_decimal::Decimal::from(100),
12971                    assets_percent: rust_decimal::Decimal::from(100),
12972                    significant_accounts: vec![
12973                        "Revenue".into(),
12974                        "Accounts Receivable".into(),
12975                        "Inventory".into(),
12976                        "Fixed Assets".into(),
12977                        "Accounts Payable".into(),
12978                    ],
12979                };
12980
12981                let (certs, assessment) = sox_gen.generate(&input);
12982                snapshot.sox_302_certifications.extend(certs);
12983                snapshot.sox_404_assessments.push(assessment);
12984            }
12985
12986            info!(
12987                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
12988                snapshot.sox_302_certifications.len(),
12989                snapshot.sox_404_assessments.len(),
12990                snapshot
12991                    .sox_404_assessments
12992                    .iter()
12993                    .filter(|a| a.icfr_effective)
12994                    .count(),
12995                snapshot
12996                    .sox_404_assessments
12997                    .iter()
12998                    .filter(|a| !a.icfr_effective)
12999                    .count(),
13000            );
13001        }
13002
13003        // ----------------------------------------------------------------
13004        // ISA 320: Materiality calculations (one per entity)
13005        // ----------------------------------------------------------------
13006        {
13007            use datasynth_generators::audit::materiality_generator::{
13008                MaterialityGenerator, MaterialityInput,
13009            };
13010
13011            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13012
13013            // Compute per-company financials from JEs.
13014            // Asset accounts start with '1', revenue with '4',
13015            // expense accounts with '5' or '6'.
13016            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13017
13018            for company in &self.config.companies {
13019                let company_code = company.code.clone();
13020
13021                // Revenue: credit-side entries on 4xxx accounts
13022                let company_revenue: rust_decimal::Decimal = entries
13023                    .iter()
13024                    .filter(|e| e.company_code() == company_code)
13025                    .flat_map(|e| e.lines.iter())
13026                    .filter(|l| l.account_code.starts_with('4'))
13027                    .map(|l| l.credit_amount)
13028                    .sum();
13029
13030                // Total assets: debit balances on 1xxx accounts
13031                let total_assets: rust_decimal::Decimal = entries
13032                    .iter()
13033                    .filter(|e| e.company_code() == company_code)
13034                    .flat_map(|e| e.lines.iter())
13035                    .filter(|l| l.account_code.starts_with('1'))
13036                    .map(|l| l.debit_amount)
13037                    .sum();
13038
13039                // Expenses: debit-side entries on 5xxx/6xxx accounts
13040                let total_expenses: rust_decimal::Decimal = entries
13041                    .iter()
13042                    .filter(|e| e.company_code() == company_code)
13043                    .flat_map(|e| e.lines.iter())
13044                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13045                    .map(|l| l.debit_amount)
13046                    .sum();
13047
13048                // Equity: credit balances on 3xxx accounts
13049                let equity: rust_decimal::Decimal = entries
13050                    .iter()
13051                    .filter(|e| e.company_code() == company_code)
13052                    .flat_map(|e| e.lines.iter())
13053                    .filter(|l| l.account_code.starts_with('3'))
13054                    .map(|l| l.credit_amount)
13055                    .sum();
13056
13057                let pretax_income = company_revenue - total_expenses;
13058
13059                // If no company-specific data, fall back to proportional share
13060                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13061                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13062                        .unwrap_or(rust_decimal::Decimal::ONE);
13063                    (
13064                        total_revenue * w,
13065                        total_revenue * w * rust_decimal::Decimal::from(3),
13066                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13067                        total_revenue * w * rust_decimal::Decimal::from(2),
13068                    )
13069                } else {
13070                    (company_revenue, total_assets, pretax_income, equity)
13071                };
13072
13073                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13074
13075                materiality_inputs.push(MaterialityInput {
13076                    entity_code: company_code,
13077                    period: format!("FY{}", fiscal_year),
13078                    revenue: rev,
13079                    pretax_income: pti,
13080                    total_assets: assets,
13081                    equity: eq,
13082                    gross_profit,
13083                });
13084            }
13085
13086            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13087
13088            info!(
13089                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13090                 {} total assets, {} equity benchmarks)",
13091                snapshot.materiality_calculations.len(),
13092                snapshot
13093                    .materiality_calculations
13094                    .iter()
13095                    .filter(|m| matches!(
13096                        m.benchmark,
13097                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13098                    ))
13099                    .count(),
13100                snapshot
13101                    .materiality_calculations
13102                    .iter()
13103                    .filter(|m| matches!(
13104                        m.benchmark,
13105                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13106                    ))
13107                    .count(),
13108                snapshot
13109                    .materiality_calculations
13110                    .iter()
13111                    .filter(|m| matches!(
13112                        m.benchmark,
13113                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13114                    ))
13115                    .count(),
13116                snapshot
13117                    .materiality_calculations
13118                    .iter()
13119                    .filter(|m| matches!(
13120                        m.benchmark,
13121                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13122                    ))
13123                    .count(),
13124            );
13125        }
13126
13127        // ----------------------------------------------------------------
13128        // ISA 315: Combined Risk Assessments (per entity, per account area)
13129        // ----------------------------------------------------------------
13130        {
13131            use datasynth_generators::audit::cra_generator::CraGenerator;
13132
13133            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13134
13135            // Build entity → scope_id map from already-generated scopes
13136            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13137                .audit_scopes
13138                .iter()
13139                .map(|s| (s.entity_code.clone(), s.id.clone()))
13140                .collect();
13141
13142            for company in &self.config.companies {
13143                let cras = cra_gen.generate_for_entity(&company.code, None);
13144                let scope_id = entity_scope_map.get(&company.code).cloned();
13145                let cras_with_scope: Vec<_> = cras
13146                    .into_iter()
13147                    .map(|mut cra| {
13148                        cra.scope_id = scope_id.clone();
13149                        cra
13150                    })
13151                    .collect();
13152                snapshot.combined_risk_assessments.extend(cras_with_scope);
13153            }
13154
13155            let significant_count = snapshot
13156                .combined_risk_assessments
13157                .iter()
13158                .filter(|c| c.significant_risk)
13159                .count();
13160            let high_cra_count = snapshot
13161                .combined_risk_assessments
13162                .iter()
13163                .filter(|c| {
13164                    matches!(
13165                        c.combined_risk,
13166                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13167                    )
13168                })
13169                .count();
13170
13171            info!(
13172                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13173                snapshot.combined_risk_assessments.len(),
13174                significant_count,
13175                high_cra_count,
13176            );
13177        }
13178
13179        // ----------------------------------------------------------------
13180        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13181        // ----------------------------------------------------------------
13182        {
13183            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13184
13185            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13186
13187            // Group CRAs by entity and use per-entity tolerable error from materiality
13188            for company in &self.config.companies {
13189                let entity_code = company.code.clone();
13190
13191                // Find tolerable error for this entity (= performance materiality)
13192                let tolerable_error = snapshot
13193                    .materiality_calculations
13194                    .iter()
13195                    .find(|m| m.entity_code == entity_code)
13196                    .map(|m| m.tolerable_error);
13197
13198                // Collect CRAs for this entity
13199                let entity_cras: Vec<_> = snapshot
13200                    .combined_risk_assessments
13201                    .iter()
13202                    .filter(|c| c.entity_code == entity_code)
13203                    .cloned()
13204                    .collect();
13205
13206                if !entity_cras.is_empty() {
13207                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13208                    snapshot.sampling_plans.extend(plans);
13209                    snapshot.sampled_items.extend(items);
13210                }
13211            }
13212
13213            let misstatement_count = snapshot
13214                .sampled_items
13215                .iter()
13216                .filter(|i| i.misstatement_found)
13217                .count();
13218
13219            info!(
13220                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13221                snapshot.sampling_plans.len(),
13222                snapshot.sampled_items.len(),
13223                misstatement_count,
13224            );
13225        }
13226
13227        // ----------------------------------------------------------------
13228        // ISA 315: Significant Classes of Transactions (SCOTS)
13229        // ----------------------------------------------------------------
13230        {
13231            use datasynth_generators::audit::scots_generator::{
13232                ScotsGenerator, ScotsGeneratorConfig,
13233            };
13234
13235            let ic_enabled = self.config.intercompany.enabled;
13236
13237            let config = ScotsGeneratorConfig {
13238                intercompany_enabled: ic_enabled,
13239                ..ScotsGeneratorConfig::default()
13240            };
13241            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
13242
13243            for company in &self.config.companies {
13244                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
13245                snapshot
13246                    .significant_transaction_classes
13247                    .extend(entity_scots);
13248            }
13249
13250            let estimation_count = snapshot
13251                .significant_transaction_classes
13252                .iter()
13253                .filter(|s| {
13254                    matches!(
13255                        s.transaction_type,
13256                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
13257                    )
13258                })
13259                .count();
13260
13261            info!(
13262                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
13263                snapshot.significant_transaction_classes.len(),
13264                estimation_count,
13265            );
13266        }
13267
13268        // ----------------------------------------------------------------
13269        // ISA 520: Unusual Item Markers
13270        // ----------------------------------------------------------------
13271        {
13272            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
13273
13274            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
13275            let entity_codes: Vec<String> = self
13276                .config
13277                .companies
13278                .iter()
13279                .map(|c| c.code.clone())
13280                .collect();
13281            let unusual_flags =
13282                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
13283            info!(
13284                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
13285                unusual_flags.len(),
13286                unusual_flags
13287                    .iter()
13288                    .filter(|f| matches!(
13289                        f.severity,
13290                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
13291                    ))
13292                    .count(),
13293                unusual_flags
13294                    .iter()
13295                    .filter(|f| matches!(
13296                        f.severity,
13297                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
13298                    ))
13299                    .count(),
13300                unusual_flags
13301                    .iter()
13302                    .filter(|f| matches!(
13303                        f.severity,
13304                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
13305                    ))
13306                    .count(),
13307            );
13308            snapshot.unusual_items = unusual_flags;
13309        }
13310
13311        // ----------------------------------------------------------------
13312        // ISA 520: Analytical Relationships
13313        // ----------------------------------------------------------------
13314        {
13315            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
13316
13317            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
13318            let entity_codes: Vec<String> = self
13319                .config
13320                .companies
13321                .iter()
13322                .map(|c| c.code.clone())
13323                .collect();
13324            let current_period_label = format!("FY{fiscal_year}");
13325            let prior_period_label = format!("FY{}", fiscal_year - 1);
13326            let analytical_rels = ar_gen.generate_for_entities(
13327                &entity_codes,
13328                entries,
13329                &current_period_label,
13330                &prior_period_label,
13331            );
13332            let out_of_range = analytical_rels
13333                .iter()
13334                .filter(|r| !r.within_expected_range)
13335                .count();
13336            info!(
13337                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
13338                analytical_rels.len(),
13339                out_of_range,
13340            );
13341            snapshot.analytical_relationships = analytical_rels;
13342        }
13343
13344        if let Some(pb) = pb {
13345            pb.finish_with_message(format!(
13346                "Audit data: {} engagements, {} workpapers, {} evidence, \
13347                 {} confirmations, {} procedure steps, {} samples, \
13348                 {} analytical, {} IA funcs, {} related parties, \
13349                 {} component auditors, {} letters, {} subsequent events, \
13350                 {} service orgs, {} going concern, {} accounting estimates, \
13351                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
13352                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
13353                 {} unusual items, {} analytical relationships",
13354                snapshot.engagements.len(),
13355                snapshot.workpapers.len(),
13356                snapshot.evidence.len(),
13357                snapshot.confirmations.len(),
13358                snapshot.procedure_steps.len(),
13359                snapshot.samples.len(),
13360                snapshot.analytical_results.len(),
13361                snapshot.ia_functions.len(),
13362                snapshot.related_parties.len(),
13363                snapshot.component_auditors.len(),
13364                snapshot.engagement_letters.len(),
13365                snapshot.subsequent_events.len(),
13366                snapshot.service_organizations.len(),
13367                snapshot.going_concern_assessments.len(),
13368                snapshot.accounting_estimates.len(),
13369                snapshot.audit_opinions.len(),
13370                snapshot.key_audit_matters.len(),
13371                snapshot.sox_302_certifications.len(),
13372                snapshot.sox_404_assessments.len(),
13373                snapshot.materiality_calculations.len(),
13374                snapshot.combined_risk_assessments.len(),
13375                snapshot.sampling_plans.len(),
13376                snapshot.significant_transaction_classes.len(),
13377                snapshot.unusual_items.len(),
13378                snapshot.analytical_relationships.len(),
13379            ));
13380        }
13381
13382        // ----------------------------------------------------------------
13383        // PCAOB-ISA cross-reference mappings
13384        // ----------------------------------------------------------------
13385        // Always include the standard PCAOB-ISA mappings when audit generation is
13386        // enabled. These are static reference data (no randomness required) so we
13387        // call standard_mappings() directly.
13388        {
13389            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13390            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13391            debug!(
13392                "PCAOB-ISA mappings generated: {} mappings",
13393                snapshot.isa_pcaob_mappings.len()
13394            );
13395        }
13396
13397        // ----------------------------------------------------------------
13398        // ISA standard reference entries
13399        // ----------------------------------------------------------------
13400        // Emit flat ISA standard reference data (number, title, series) so
13401        // consumers get a machine-readable listing of all 34 ISA standards in
13402        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
13403        {
13404            use datasynth_standards::audit::isa_reference::IsaStandard;
13405            snapshot.isa_mappings = IsaStandard::standard_entries();
13406            debug!(
13407                "ISA standard entries generated: {} standards",
13408                snapshot.isa_mappings.len()
13409            );
13410        }
13411
13412        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
13413        // For each RPT, find the chronologically closest JE for the engagement's entity.
13414        {
13415            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
13416                .engagements
13417                .iter()
13418                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
13419                .collect();
13420
13421            for rpt in &mut snapshot.related_party_transactions {
13422                if rpt.journal_entry_id.is_some() {
13423                    continue; // already set
13424                }
13425                let entity = engagement_by_id
13426                    .get(&rpt.engagement_id.to_string())
13427                    .copied()
13428                    .unwrap_or("");
13429
13430                // Find closest JE by date in the entity's company
13431                let best_je = entries
13432                    .iter()
13433                    .filter(|je| je.header.company_code == entity)
13434                    .min_by_key(|je| {
13435                        (je.header.posting_date - rpt.transaction_date)
13436                            .num_days()
13437                            .abs()
13438                    });
13439
13440                if let Some(je) = best_je {
13441                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
13442                }
13443            }
13444
13445            let linked = snapshot
13446                .related_party_transactions
13447                .iter()
13448                .filter(|t| t.journal_entry_id.is_some())
13449                .count();
13450            debug!(
13451                "Linked {}/{} related party transactions to journal entries",
13452                linked,
13453                snapshot.related_party_transactions.len()
13454            );
13455        }
13456
13457        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
13458        // One opinion per engagement, derived from that engagement's findings,
13459        // going-concern assessment, and any component-auditor reports. Fills
13460        // `audit_opinions` + a flattened `key_audit_matters` for downstream
13461        // export.
13462        if !snapshot.engagements.is_empty() {
13463            use datasynth_generators::audit_opinion_generator::{
13464                AuditOpinionGenerator, AuditOpinionInput,
13465            };
13466
13467            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
13468            let inputs: Vec<AuditOpinionInput> = snapshot
13469                .engagements
13470                .iter()
13471                .map(|eng| {
13472                    let findings = snapshot
13473                        .findings
13474                        .iter()
13475                        .filter(|f| f.engagement_id == eng.engagement_id)
13476                        .cloned()
13477                        .collect();
13478                    let going_concern = snapshot
13479                        .going_concern_assessments
13480                        .iter()
13481                        .find(|gc| gc.entity_code == eng.client_entity_id)
13482                        .cloned();
13483                    // ComponentAuditorReport doesn't carry an engagement id, but
13484                    // component scope is keyed by `entity_code`, so filter on that.
13485                    let component_reports = snapshot
13486                        .component_reports
13487                        .iter()
13488                        .filter(|r| r.entity_code == eng.client_entity_id)
13489                        .cloned()
13490                        .collect();
13491
13492                    AuditOpinionInput {
13493                        entity_code: eng.client_entity_id.clone(),
13494                        entity_name: eng.client_name.clone(),
13495                        engagement_id: eng.engagement_id,
13496                        period_end: eng.period_end_date,
13497                        findings,
13498                        going_concern,
13499                        component_reports,
13500                        is_us_listed: matches!(
13501                            eng.engagement_type,
13502                            datasynth_core::audit::EngagementType::IntegratedAudit
13503                                | datasynth_core::audit::EngagementType::Sox404
13504                        ),
13505                        auditor_name: "DataSynth Audit LLP".to_string(),
13506                        engagement_partner: "Engagement Partner".to_string(),
13507                    }
13508                })
13509                .collect();
13510
13511            let generated = opinion_gen.generate_batch(&inputs);
13512            for g in generated {
13513                snapshot.key_audit_matters.extend(g.key_audit_matters);
13514                snapshot.audit_opinions.push(g.opinion);
13515            }
13516            debug!(
13517                "Generated {} audit opinions with {} key audit matters",
13518                snapshot.audit_opinions.len(),
13519                snapshot.key_audit_matters.len()
13520            );
13521        }
13522
13523        Ok(snapshot)
13524    }
13525
13526    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
13527    ///
13528    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
13529    /// from the current orchestrator state, runs the FSM engine, and maps the
13530    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
13531    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
13532    fn generate_audit_data_with_fsm(
13533        &mut self,
13534        entries: &[JournalEntry],
13535    ) -> SynthResult<AuditSnapshot> {
13536        use datasynth_audit_fsm::{
13537            context::EngagementContext,
13538            engine::AuditFsmEngine,
13539            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
13540        };
13541        use rand::SeedableRng;
13542        use rand_chacha::ChaCha8Rng;
13543
13544        info!("Audit FSM: generating audit data via FSM engine");
13545
13546        let fsm_config = self
13547            .config
13548            .audit
13549            .fsm
13550            .as_ref()
13551            .expect("FSM config must be present when FSM is enabled");
13552
13553        // 1. Load blueprint from config string.
13554        let bwp = match fsm_config.blueprint.as_str() {
13555            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
13556            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
13557            _ => {
13558                warn!(
13559                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
13560                    fsm_config.blueprint
13561                );
13562                BlueprintWithPreconditions::load_builtin_fsa()
13563            }
13564        }
13565        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
13566
13567        // 2. Load overlay from config string.
13568        let overlay = match fsm_config.overlay.as_str() {
13569            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
13570            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
13571            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
13572            _ => {
13573                warn!(
13574                    "Unknown FSM overlay '{}', falling back to builtin:default",
13575                    fsm_config.overlay
13576                );
13577                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
13578            }
13579        }
13580        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
13581
13582        // 3. Build EngagementContext from orchestrator state.
13583        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
13584            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
13585        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
13586
13587        // Determine the engagement entity early so we can filter JEs.
13588        let company = self.config.companies.first();
13589        let company_code = company
13590            .map(|c| c.code.clone())
13591            .unwrap_or_else(|| "UNKNOWN".to_string());
13592        let company_name = company
13593            .map(|c| c.name.clone())
13594            .unwrap_or_else(|| "Unknown Company".to_string());
13595        let currency = company
13596            .map(|c| c.currency.clone())
13597            .unwrap_or_else(|| "USD".to_string());
13598
13599        // Filter JEs to the engagement entity for single-company coherence.
13600        let entity_entries: Vec<_> = entries
13601            .iter()
13602            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
13603            .cloned()
13604            .collect();
13605        let entries = &entity_entries; // Shadow the parameter for remaining usage
13606
13607        // Financial aggregates from journal entries.
13608        let total_revenue: rust_decimal::Decimal = entries
13609            .iter()
13610            .flat_map(|e| e.lines.iter())
13611            .filter(|l| l.account_code.starts_with('4'))
13612            .map(|l| l.credit_amount - l.debit_amount)
13613            .sum();
13614
13615        let total_assets: rust_decimal::Decimal = entries
13616            .iter()
13617            .flat_map(|e| e.lines.iter())
13618            .filter(|l| l.account_code.starts_with('1'))
13619            .map(|l| l.debit_amount - l.credit_amount)
13620            .sum();
13621
13622        let total_expenses: rust_decimal::Decimal = entries
13623            .iter()
13624            .flat_map(|e| e.lines.iter())
13625            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13626            .map(|l| l.debit_amount)
13627            .sum();
13628
13629        let equity: rust_decimal::Decimal = entries
13630            .iter()
13631            .flat_map(|e| e.lines.iter())
13632            .filter(|l| l.account_code.starts_with('3'))
13633            .map(|l| l.credit_amount - l.debit_amount)
13634            .sum();
13635
13636        let total_debt: rust_decimal::Decimal = entries
13637            .iter()
13638            .flat_map(|e| e.lines.iter())
13639            .filter(|l| l.account_code.starts_with('2'))
13640            .map(|l| l.credit_amount - l.debit_amount)
13641            .sum();
13642
13643        let pretax_income = total_revenue - total_expenses;
13644
13645        let cogs: rust_decimal::Decimal = entries
13646            .iter()
13647            .flat_map(|e| e.lines.iter())
13648            .filter(|l| l.account_code.starts_with('5'))
13649            .map(|l| l.debit_amount)
13650            .sum();
13651        let gross_profit = total_revenue - cogs;
13652
13653        let current_assets: rust_decimal::Decimal = entries
13654            .iter()
13655            .flat_map(|e| e.lines.iter())
13656            .filter(|l| {
13657                l.account_code.starts_with("10")
13658                    || l.account_code.starts_with("11")
13659                    || l.account_code.starts_with("12")
13660                    || l.account_code.starts_with("13")
13661            })
13662            .map(|l| l.debit_amount - l.credit_amount)
13663            .sum();
13664        let current_liabilities: rust_decimal::Decimal = entries
13665            .iter()
13666            .flat_map(|e| e.lines.iter())
13667            .filter(|l| {
13668                l.account_code.starts_with("20")
13669                    || l.account_code.starts_with("21")
13670                    || l.account_code.starts_with("22")
13671            })
13672            .map(|l| l.credit_amount - l.debit_amount)
13673            .sum();
13674        let working_capital = current_assets - current_liabilities;
13675
13676        let depreciation: rust_decimal::Decimal = entries
13677            .iter()
13678            .flat_map(|e| e.lines.iter())
13679            .filter(|l| l.account_code.starts_with("60"))
13680            .map(|l| l.debit_amount)
13681            .sum();
13682        let operating_cash_flow = pretax_income + depreciation;
13683
13684        // GL accounts for reference data.
13685        let accounts: Vec<String> = self
13686            .coa
13687            .as_ref()
13688            .map(|coa| {
13689                coa.get_postable_accounts()
13690                    .iter()
13691                    .map(|acc| acc.account_code().to_string())
13692                    .collect()
13693            })
13694            .unwrap_or_default();
13695
13696        // Team member IDs and display names from master data.
13697        let team_member_ids: Vec<String> = self
13698            .master_data
13699            .employees
13700            .iter()
13701            .take(8) // Cap team size
13702            .map(|e| e.employee_id.clone())
13703            .collect();
13704        let team_member_pairs: Vec<(String, String)> = self
13705            .master_data
13706            .employees
13707            .iter()
13708            .take(8)
13709            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13710            .collect();
13711
13712        let vendor_names: Vec<String> = self
13713            .master_data
13714            .vendors
13715            .iter()
13716            .map(|v| v.name.clone())
13717            .collect();
13718        let customer_names: Vec<String> = self
13719            .master_data
13720            .customers
13721            .iter()
13722            .map(|c| c.name.clone())
13723            .collect();
13724
13725        let entity_codes: Vec<String> = self
13726            .config
13727            .companies
13728            .iter()
13729            .map(|c| c.code.clone())
13730            .collect();
13731
13732        // Journal entry IDs for evidence tracing (sample up to 50).
13733        let journal_entry_ids: Vec<String> = entries
13734            .iter()
13735            .take(50)
13736            .map(|e| e.header.document_id.to_string())
13737            .collect();
13738
13739        // Account balances for risk weighting (aggregate debit - credit per account).
13740        let mut account_balances = std::collections::HashMap::<String, f64>::new();
13741        for entry in entries {
13742            for line in &entry.lines {
13743                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
13744                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
13745                *account_balances
13746                    .entry(line.account_code.clone())
13747                    .or_insert(0.0) += debit_f64 - credit_f64;
13748            }
13749        }
13750
13751        // Internal control IDs and anomaly refs are populated by the
13752        // caller when available; here we default to empty because the
13753        // orchestrator state may not have generated controls/anomalies
13754        // yet at this point in the pipeline.
13755        let control_ids: Vec<String> = Vec::new();
13756        let anomaly_refs: Vec<String> = Vec::new();
13757
13758        let mut context = EngagementContext {
13759            company_code,
13760            company_name,
13761            fiscal_year: start_date.year(),
13762            currency,
13763            total_revenue,
13764            total_assets,
13765            engagement_start: start_date,
13766            report_date: period_end,
13767            pretax_income,
13768            equity,
13769            gross_profit,
13770            working_capital,
13771            operating_cash_flow,
13772            total_debt,
13773            team_member_ids,
13774            team_member_pairs,
13775            accounts,
13776            vendor_names,
13777            customer_names,
13778            journal_entry_ids,
13779            account_balances,
13780            control_ids,
13781            anomaly_refs,
13782            journal_entries: entries.to_vec(),
13783            is_us_listed: false,
13784            entity_codes,
13785            auditor_firm_name: "DataSynth Audit LLP".into(),
13786            accounting_framework: self
13787                .config
13788                .accounting_standards
13789                .framework
13790                .map(|f| match f {
13791                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
13792                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
13793                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
13794                        "French GAAP"
13795                    }
13796                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
13797                        "German GAAP"
13798                    }
13799                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
13800                        "Dual Reporting"
13801                    }
13802                })
13803                .unwrap_or("IFRS")
13804                .into(),
13805        };
13806
13807        // 4. Create and run the FSM engine.
13808        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
13809        let rng = ChaCha8Rng::seed_from_u64(seed);
13810        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
13811
13812        let mut result = engine
13813            .run_engagement(&context)
13814            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
13815
13816        info!(
13817            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
13818             {} phases completed, duration {:.1}h",
13819            result.event_log.len(),
13820            result.artifacts.total_artifacts(),
13821            result.anomalies.len(),
13822            result.phases_completed.len(),
13823            result.total_duration_hours,
13824        );
13825
13826        // 4b. Populate financial data in the artifact bag for downstream consumers.
13827        let tb_entity = context.company_code.clone();
13828        let tb_fy = context.fiscal_year;
13829        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
13830        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
13831            entries,
13832            &tb_entity,
13833            tb_fy,
13834            self.coa.as_ref().map(|c| c.as_ref()),
13835        );
13836
13837        // 5. Map ArtifactBag fields to AuditSnapshot.
13838        let bag = result.artifacts;
13839        let mut snapshot = AuditSnapshot {
13840            engagements: bag.engagements,
13841            engagement_letters: bag.engagement_letters,
13842            materiality_calculations: bag.materiality_calculations,
13843            risk_assessments: bag.risk_assessments,
13844            combined_risk_assessments: bag.combined_risk_assessments,
13845            workpapers: bag.workpapers,
13846            evidence: bag.evidence,
13847            findings: bag.findings,
13848            judgments: bag.judgments,
13849            sampling_plans: bag.sampling_plans,
13850            sampled_items: bag.sampled_items,
13851            analytical_results: bag.analytical_results,
13852            going_concern_assessments: bag.going_concern_assessments,
13853            subsequent_events: bag.subsequent_events,
13854            audit_opinions: bag.audit_opinions,
13855            key_audit_matters: bag.key_audit_matters,
13856            procedure_steps: bag.procedure_steps,
13857            samples: bag.samples,
13858            confirmations: bag.confirmations,
13859            confirmation_responses: bag.confirmation_responses,
13860            // Store the event trail for downstream export.
13861            fsm_event_trail: Some(result.event_log),
13862            // Fields not produced by the FSM engine remain at their defaults.
13863            ..Default::default()
13864        };
13865
13866        // 6. Add static reference data (same as legacy path).
13867        {
13868            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
13869            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
13870        }
13871        {
13872            use datasynth_standards::audit::isa_reference::IsaStandard;
13873            snapshot.isa_mappings = IsaStandard::standard_entries();
13874        }
13875
13876        info!(
13877            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
13878             {} risk assessments, {} findings, {} materiality calcs",
13879            snapshot.engagements.len(),
13880            snapshot.workpapers.len(),
13881            snapshot.evidence.len(),
13882            snapshot.risk_assessments.len(),
13883            snapshot.findings.len(),
13884            snapshot.materiality_calculations.len(),
13885        );
13886
13887        Ok(snapshot)
13888    }
13889
13890    /// Export journal entries as graph data for ML training and network reconstruction.
13891    ///
13892    /// Builds a transaction graph where:
13893    /// - Nodes are GL accounts
13894    /// - Edges are money flows from credit to debit accounts
13895    /// - Edge attributes include amount, date, business process, anomaly flags
13896    fn export_graphs(
13897        &mut self,
13898        entries: &[JournalEntry],
13899        _coa: &Arc<ChartOfAccounts>,
13900        stats: &mut EnhancedGenerationStatistics,
13901    ) -> SynthResult<GraphExportSnapshot> {
13902        let pb = self.create_progress_bar(100, "Exporting Graphs");
13903
13904        let mut snapshot = GraphExportSnapshot::default();
13905
13906        // Get output directory
13907        let output_dir = self
13908            .output_path
13909            .clone()
13910            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
13911        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
13912
13913        // Process each graph type configuration
13914        for graph_type in &self.config.graph_export.graph_types {
13915            if let Some(pb) = &pb {
13916                pb.inc(10);
13917            }
13918
13919            // Build transaction graph
13920            let graph_config = TransactionGraphConfig {
13921                include_vendors: false,
13922                include_customers: false,
13923                create_debit_credit_edges: true,
13924                include_document_nodes: graph_type.include_document_nodes,
13925                min_edge_weight: graph_type.min_edge_weight,
13926                aggregate_parallel_edges: graph_type.aggregate_edges,
13927                framework: None,
13928            };
13929
13930            let mut builder = TransactionGraphBuilder::new(graph_config);
13931            builder.add_journal_entries(entries);
13932            let graph = builder.build();
13933
13934            // Update stats
13935            stats.graph_node_count += graph.node_count();
13936            stats.graph_edge_count += graph.edge_count();
13937
13938            if let Some(pb) = &pb {
13939                pb.inc(40);
13940            }
13941
13942            // Export to each configured format
13943            for format in &self.config.graph_export.formats {
13944                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
13945
13946                // Create output directory
13947                if let Err(e) = std::fs::create_dir_all(&format_dir) {
13948                    warn!("Failed to create graph output directory: {}", e);
13949                    continue;
13950                }
13951
13952                match format {
13953                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
13954                        let pyg_config = PyGExportConfig {
13955                            common: datasynth_graph::CommonExportConfig {
13956                                export_node_features: true,
13957                                export_edge_features: true,
13958                                export_node_labels: true,
13959                                export_edge_labels: true,
13960                                export_masks: true,
13961                                train_ratio: self.config.graph_export.train_ratio,
13962                                val_ratio: self.config.graph_export.validation_ratio,
13963                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
13964                            },
13965                            one_hot_categoricals: false,
13966                        };
13967
13968                        let exporter = PyGExporter::new(pyg_config);
13969                        match exporter.export(&graph, &format_dir) {
13970                            Ok(metadata) => {
13971                                snapshot.exports.insert(
13972                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
13973                                    GraphExportInfo {
13974                                        name: graph_type.name.clone(),
13975                                        format: "pytorch_geometric".to_string(),
13976                                        output_path: format_dir.clone(),
13977                                        node_count: metadata.num_nodes,
13978                                        edge_count: metadata.num_edges,
13979                                    },
13980                                );
13981                                snapshot.graph_count += 1;
13982                            }
13983                            Err(e) => {
13984                                warn!("Failed to export PyTorch Geometric graph: {}", e);
13985                            }
13986                        }
13987                    }
13988                    datasynth_config::schema::GraphExportFormat::Neo4j => {
13989                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
13990
13991                        let neo4j_config = Neo4jExportConfig {
13992                            export_node_properties: true,
13993                            export_edge_properties: true,
13994                            export_features: true,
13995                            generate_cypher: true,
13996                            generate_admin_import: true,
13997                            database_name: "synth".to_string(),
13998                            cypher_batch_size: 1000,
13999                        };
14000
14001                        let exporter = Neo4jExporter::new(neo4j_config);
14002                        match exporter.export(&graph, &format_dir) {
14003                            Ok(metadata) => {
14004                                snapshot.exports.insert(
14005                                    format!("{}_{}", graph_type.name, "neo4j"),
14006                                    GraphExportInfo {
14007                                        name: graph_type.name.clone(),
14008                                        format: "neo4j".to_string(),
14009                                        output_path: format_dir.clone(),
14010                                        node_count: metadata.num_nodes,
14011                                        edge_count: metadata.num_edges,
14012                                    },
14013                                );
14014                                snapshot.graph_count += 1;
14015                            }
14016                            Err(e) => {
14017                                warn!("Failed to export Neo4j graph: {}", e);
14018                            }
14019                        }
14020                    }
14021                    datasynth_config::schema::GraphExportFormat::Dgl => {
14022                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14023
14024                        let dgl_config = DGLExportConfig {
14025                            common: datasynth_graph::CommonExportConfig {
14026                                export_node_features: true,
14027                                export_edge_features: true,
14028                                export_node_labels: true,
14029                                export_edge_labels: true,
14030                                export_masks: true,
14031                                train_ratio: self.config.graph_export.train_ratio,
14032                                val_ratio: self.config.graph_export.validation_ratio,
14033                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14034                            },
14035                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14036                            include_pickle_script: true, // DGL ecosystem standard helper
14037                        };
14038
14039                        let exporter = DGLExporter::new(dgl_config);
14040                        match exporter.export(&graph, &format_dir) {
14041                            Ok(metadata) => {
14042                                snapshot.exports.insert(
14043                                    format!("{}_{}", graph_type.name, "dgl"),
14044                                    GraphExportInfo {
14045                                        name: graph_type.name.clone(),
14046                                        format: "dgl".to_string(),
14047                                        output_path: format_dir.clone(),
14048                                        node_count: metadata.common.num_nodes,
14049                                        edge_count: metadata.common.num_edges,
14050                                    },
14051                                );
14052                                snapshot.graph_count += 1;
14053                            }
14054                            Err(e) => {
14055                                warn!("Failed to export DGL graph: {}", e);
14056                            }
14057                        }
14058                    }
14059                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14060                        use datasynth_graph::{
14061                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14062                        };
14063
14064                        let rustgraph_config = RustGraphExportConfig {
14065                            include_features: true,
14066                            include_temporal: true,
14067                            include_labels: true,
14068                            source_name: "datasynth".to_string(),
14069                            batch_id: None,
14070                            output_format: RustGraphOutputFormat::JsonLines,
14071                            export_node_properties: true,
14072                            export_edge_properties: true,
14073                            pretty_print: false,
14074                        };
14075
14076                        let exporter = RustGraphExporter::new(rustgraph_config);
14077                        match exporter.export(&graph, &format_dir) {
14078                            Ok(metadata) => {
14079                                snapshot.exports.insert(
14080                                    format!("{}_{}", graph_type.name, "rustgraph"),
14081                                    GraphExportInfo {
14082                                        name: graph_type.name.clone(),
14083                                        format: "rustgraph".to_string(),
14084                                        output_path: format_dir.clone(),
14085                                        node_count: metadata.num_nodes,
14086                                        edge_count: metadata.num_edges,
14087                                    },
14088                                );
14089                                snapshot.graph_count += 1;
14090                            }
14091                            Err(e) => {
14092                                warn!("Failed to export RustGraph: {}", e);
14093                            }
14094                        }
14095                    }
14096                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14097                        // Hypergraph export is handled separately in Phase 10b
14098                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14099                    }
14100                }
14101            }
14102
14103            if let Some(pb) = &pb {
14104                pb.inc(40);
14105            }
14106        }
14107
14108        stats.graph_export_count = snapshot.graph_count;
14109        snapshot.exported = snapshot.graph_count > 0;
14110
14111        if let Some(pb) = pb {
14112            pb.finish_with_message(format!(
14113                "Graphs exported: {} graphs ({} nodes, {} edges)",
14114                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14115            ));
14116        }
14117
14118        Ok(snapshot)
14119    }
14120
14121    /// Build additional graph types (banking, approval, entity) when relevant data
14122    /// is available. These run as a late phase because the data they need (banking
14123    /// snapshot, intercompany snapshot) is only generated after the main graph
14124    /// export phase.
14125    fn build_additional_graphs(
14126        &self,
14127        banking: &BankingSnapshot,
14128        intercompany: &IntercompanySnapshot,
14129        entries: &[JournalEntry],
14130        stats: &mut EnhancedGenerationStatistics,
14131    ) {
14132        let output_dir = self
14133            .output_path
14134            .clone()
14135            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14136        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14137
14138        // Banking graph: build when banking customers and transactions exist
14139        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14140            info!("Phase 10c: Building banking network graph");
14141            let config = BankingGraphConfig::default();
14142            let mut builder = BankingGraphBuilder::new(config);
14143            builder.add_customers(&banking.customers);
14144            builder.add_accounts(&banking.accounts, &banking.customers);
14145            builder.add_transactions(&banking.transactions);
14146            let graph = builder.build();
14147
14148            let node_count = graph.node_count();
14149            let edge_count = graph.edge_count();
14150            stats.graph_node_count += node_count;
14151            stats.graph_edge_count += edge_count;
14152
14153            // Export as PyG if configured
14154            for format in &self.config.graph_export.formats {
14155                if matches!(
14156                    format,
14157                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14158                ) {
14159                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14160                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14161                        warn!("Failed to create banking graph output dir: {}", e);
14162                        continue;
14163                    }
14164                    let pyg_config = PyGExportConfig::default();
14165                    let exporter = PyGExporter::new(pyg_config);
14166                    if let Err(e) = exporter.export(&graph, &format_dir) {
14167                        warn!("Failed to export banking graph as PyG: {}", e);
14168                    } else {
14169                        info!(
14170                            "Banking network graph exported: {} nodes, {} edges",
14171                            node_count, edge_count
14172                        );
14173                    }
14174                }
14175            }
14176        }
14177
14178        // Approval graph: build from journal entry approval workflows
14179        let approval_entries: Vec<_> = entries
14180            .iter()
14181            .filter(|je| je.header.approval_workflow.is_some())
14182            .collect();
14183
14184        if !approval_entries.is_empty() {
14185            info!(
14186                "Phase 10c: Building approval network graph ({} entries with approvals)",
14187                approval_entries.len()
14188            );
14189            let config = ApprovalGraphConfig::default();
14190            let mut builder = ApprovalGraphBuilder::new(config);
14191
14192            for je in &approval_entries {
14193                if let Some(ref wf) = je.header.approval_workflow {
14194                    for action in &wf.actions {
14195                        let record = datasynth_core::models::ApprovalRecord {
14196                            approval_id: format!(
14197                                "APR-{}-{}",
14198                                je.header.document_id, action.approval_level
14199                            ),
14200                            document_number: je.header.document_id.to_string(),
14201                            document_type: "JE".to_string(),
14202                            company_code: je.company_code().to_string(),
14203                            requester_id: wf.preparer_id.clone(),
14204                            requester_name: Some(wf.preparer_name.clone()),
14205                            approver_id: action.actor_id.clone(),
14206                            approver_name: action.actor_name.clone(),
14207                            approval_date: je.posting_date(),
14208                            action: format!("{:?}", action.action),
14209                            amount: wf.amount,
14210                            approval_limit: None,
14211                            comments: action.comments.clone(),
14212                            delegation_from: None,
14213                            is_auto_approved: false,
14214                        };
14215                        builder.add_approval(&record);
14216                    }
14217                }
14218            }
14219
14220            let graph = builder.build();
14221            let node_count = graph.node_count();
14222            let edge_count = graph.edge_count();
14223            stats.graph_node_count += node_count;
14224            stats.graph_edge_count += edge_count;
14225
14226            // Export as PyG if configured
14227            for format in &self.config.graph_export.formats {
14228                if matches!(
14229                    format,
14230                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14231                ) {
14232                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14233                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14234                        warn!("Failed to create approval graph output dir: {}", e);
14235                        continue;
14236                    }
14237                    let pyg_config = PyGExportConfig::default();
14238                    let exporter = PyGExporter::new(pyg_config);
14239                    if let Err(e) = exporter.export(&graph, &format_dir) {
14240                        warn!("Failed to export approval graph as PyG: {}", e);
14241                    } else {
14242                        info!(
14243                            "Approval network graph exported: {} nodes, {} edges",
14244                            node_count, edge_count
14245                        );
14246                    }
14247                }
14248            }
14249        }
14250
14251        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
14252        if self.config.companies.len() >= 2 {
14253            info!(
14254                "Phase 10c: Building entity relationship graph ({} companies)",
14255                self.config.companies.len()
14256            );
14257
14258            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14259                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
14260
14261            // Map CompanyConfig → Company objects
14262            let parent_code = &self.config.companies[0].code;
14263            let mut companies: Vec<datasynth_core::models::Company> =
14264                Vec::with_capacity(self.config.companies.len());
14265
14266            // First company is the parent
14267            let first = &self.config.companies[0];
14268            companies.push(datasynth_core::models::Company::parent(
14269                &first.code,
14270                &first.name,
14271                &first.country,
14272                &first.currency,
14273            ));
14274
14275            // Remaining companies are subsidiaries (100% owned by parent)
14276            for cc in self.config.companies.iter().skip(1) {
14277                companies.push(datasynth_core::models::Company::subsidiary(
14278                    &cc.code,
14279                    &cc.name,
14280                    &cc.country,
14281                    &cc.currency,
14282                    parent_code,
14283                    rust_decimal::Decimal::from(100),
14284                ));
14285            }
14286
14287            // Build IntercompanyRelationship records (same logic as phase_intercompany)
14288            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
14289                self.config
14290                    .companies
14291                    .iter()
14292                    .skip(1)
14293                    .enumerate()
14294                    .map(|(i, cc)| {
14295                        let mut rel =
14296                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
14297                                format!("REL{:03}", i + 1),
14298                                parent_code.clone(),
14299                                cc.code.clone(),
14300                                rust_decimal::Decimal::from(100),
14301                                start_date,
14302                            );
14303                        rel.functional_currency = cc.currency.clone();
14304                        rel
14305                    })
14306                    .collect();
14307
14308            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
14309            builder.add_companies(&companies);
14310            builder.add_ownership_relationships(&relationships);
14311
14312            // Thread IC matched-pair transaction edges into the entity graph
14313            for pair in &intercompany.matched_pairs {
14314                builder.add_intercompany_edge(
14315                    &pair.seller_company,
14316                    &pair.buyer_company,
14317                    pair.amount,
14318                    &format!("{:?}", pair.transaction_type),
14319                );
14320            }
14321
14322            let graph = builder.build();
14323            let node_count = graph.node_count();
14324            let edge_count = graph.edge_count();
14325            stats.graph_node_count += node_count;
14326            stats.graph_edge_count += edge_count;
14327
14328            // Export as PyG if configured
14329            for format in &self.config.graph_export.formats {
14330                if matches!(
14331                    format,
14332                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14333                ) {
14334                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
14335                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14336                        warn!("Failed to create entity graph output dir: {}", e);
14337                        continue;
14338                    }
14339                    let pyg_config = PyGExportConfig::default();
14340                    let exporter = PyGExporter::new(pyg_config);
14341                    if let Err(e) = exporter.export(&graph, &format_dir) {
14342                        warn!("Failed to export entity graph as PyG: {}", e);
14343                    } else {
14344                        info!(
14345                            "Entity relationship graph exported: {} nodes, {} edges",
14346                            node_count, edge_count
14347                        );
14348                    }
14349                }
14350            }
14351        } else {
14352            debug!(
14353                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
14354                self.config.companies.len()
14355            );
14356        }
14357    }
14358
14359    /// Export a multi-layer hypergraph for RustGraph integration.
14360    ///
14361    /// Builds a 3-layer hypergraph:
14362    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
14363    /// - Layer 2: Process Events (all process family document flows + OCPM events)
14364    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
14365    #[allow(clippy::too_many_arguments)]
14366    fn export_hypergraph(
14367        &self,
14368        coa: &Arc<ChartOfAccounts>,
14369        entries: &[JournalEntry],
14370        document_flows: &DocumentFlowSnapshot,
14371        sourcing: &SourcingSnapshot,
14372        hr: &HrSnapshot,
14373        manufacturing: &ManufacturingSnapshot,
14374        banking: &BankingSnapshot,
14375        audit: &AuditSnapshot,
14376        financial_reporting: &FinancialReportingSnapshot,
14377        ocpm: &OcpmSnapshot,
14378        compliance: &ComplianceRegulationsSnapshot,
14379        stats: &mut EnhancedGenerationStatistics,
14380    ) -> SynthResult<HypergraphExportInfo> {
14381        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
14382        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
14383        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
14384        use datasynth_graph::models::hypergraph::AggregationStrategy;
14385
14386        let hg_settings = &self.config.graph_export.hypergraph;
14387
14388        // Parse aggregation strategy from config string
14389        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
14390            "truncate" => AggregationStrategy::Truncate,
14391            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
14392            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
14393            "importance_sample" => AggregationStrategy::ImportanceSample,
14394            _ => AggregationStrategy::PoolByCounterparty,
14395        };
14396
14397        let builder_config = HypergraphConfig {
14398            max_nodes: hg_settings.max_nodes,
14399            aggregation_strategy,
14400            include_coso: hg_settings.governance_layer.include_coso,
14401            include_controls: hg_settings.governance_layer.include_controls,
14402            include_sox: hg_settings.governance_layer.include_sox,
14403            include_vendors: hg_settings.governance_layer.include_vendors,
14404            include_customers: hg_settings.governance_layer.include_customers,
14405            include_employees: hg_settings.governance_layer.include_employees,
14406            include_p2p: hg_settings.process_layer.include_p2p,
14407            include_o2c: hg_settings.process_layer.include_o2c,
14408            include_s2c: hg_settings.process_layer.include_s2c,
14409            include_h2r: hg_settings.process_layer.include_h2r,
14410            include_mfg: hg_settings.process_layer.include_mfg,
14411            include_bank: hg_settings.process_layer.include_bank,
14412            include_audit: hg_settings.process_layer.include_audit,
14413            include_r2r: hg_settings.process_layer.include_r2r,
14414            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
14415            docs_per_counterparty_threshold: hg_settings
14416                .process_layer
14417                .docs_per_counterparty_threshold,
14418            include_accounts: hg_settings.accounting_layer.include_accounts,
14419            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
14420            include_cross_layer_edges: hg_settings.cross_layer.enabled,
14421            include_compliance: self.config.compliance_regulations.enabled,
14422            include_tax: true,
14423            include_treasury: true,
14424            include_esg: true,
14425            include_project: true,
14426            include_intercompany: true,
14427            include_temporal_events: true,
14428        };
14429
14430        let mut builder = HypergraphBuilder::new(builder_config);
14431
14432        // Layer 1: Governance & Controls
14433        builder.add_coso_framework();
14434
14435        // Add controls if available (generated during JE generation)
14436        // Controls are generated per-company; we use the standard set
14437        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
14438            let controls = InternalControl::standard_controls();
14439            builder.add_controls(&controls);
14440        }
14441
14442        // Add master data
14443        builder.add_vendors(&self.master_data.vendors);
14444        builder.add_customers(&self.master_data.customers);
14445        builder.add_employees(&self.master_data.employees);
14446
14447        // Layer 2: Process Events (all process families)
14448        builder.add_p2p_documents(
14449            &document_flows.purchase_orders,
14450            &document_flows.goods_receipts,
14451            &document_flows.vendor_invoices,
14452            &document_flows.payments,
14453        );
14454        builder.add_o2c_documents(
14455            &document_flows.sales_orders,
14456            &document_flows.deliveries,
14457            &document_flows.customer_invoices,
14458        );
14459        builder.add_s2c_documents(
14460            &sourcing.sourcing_projects,
14461            &sourcing.qualifications,
14462            &sourcing.rfx_events,
14463            &sourcing.bids,
14464            &sourcing.bid_evaluations,
14465            &sourcing.contracts,
14466        );
14467        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
14468        builder.add_mfg_documents(
14469            &manufacturing.production_orders,
14470            &manufacturing.quality_inspections,
14471            &manufacturing.cycle_counts,
14472        );
14473        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
14474        builder.add_audit_documents(
14475            &audit.engagements,
14476            &audit.workpapers,
14477            &audit.findings,
14478            &audit.evidence,
14479            &audit.risk_assessments,
14480            &audit.judgments,
14481            &audit.materiality_calculations,
14482            &audit.audit_opinions,
14483            &audit.going_concern_assessments,
14484        );
14485        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
14486
14487        // OCPM events as hyperedges
14488        if let Some(ref event_log) = ocpm.event_log {
14489            builder.add_ocpm_events(event_log);
14490        }
14491
14492        // Compliance regulations as cross-layer nodes
14493        if self.config.compliance_regulations.enabled
14494            && hg_settings.governance_layer.include_controls
14495        {
14496            // Reconstruct ComplianceStandard objects from the registry
14497            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14498            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
14499                .standard_records
14500                .iter()
14501                .filter_map(|r| {
14502                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
14503                    registry.get(&sid).cloned()
14504                })
14505                .collect();
14506
14507            builder.add_compliance_regulations(
14508                &standards,
14509                &compliance.findings,
14510                &compliance.filings,
14511            );
14512        }
14513
14514        // Layer 3: Accounting Network
14515        builder.add_accounts(coa);
14516        builder.add_journal_entries_as_hyperedges(entries);
14517
14518        // Build the hypergraph
14519        let hypergraph = builder.build();
14520
14521        // Export
14522        let output_dir = self
14523            .output_path
14524            .clone()
14525            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14526        let hg_dir = output_dir
14527            .join(&self.config.graph_export.output_subdirectory)
14528            .join(&hg_settings.output_subdirectory);
14529
14530        // Branch on output format
14531        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
14532            "unified" => {
14533                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14534                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14535                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
14536                })?;
14537                (
14538                    metadata.num_nodes,
14539                    metadata.num_edges,
14540                    metadata.num_hyperedges,
14541                )
14542            }
14543            _ => {
14544                // "native" or any unrecognized format → use existing exporter
14545                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
14546                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
14547                    SynthError::generation(format!("Hypergraph export failed: {e}"))
14548                })?;
14549                (
14550                    metadata.num_nodes,
14551                    metadata.num_edges,
14552                    metadata.num_hyperedges,
14553                )
14554            }
14555        };
14556
14557        // Stream to RustGraph ingest endpoint if configured
14558        #[cfg(feature = "streaming")]
14559        if let Some(ref target_url) = hg_settings.stream_target {
14560            use crate::stream_client::{StreamClient, StreamConfig};
14561            use std::io::Write as _;
14562
14563            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
14564            let stream_config = StreamConfig {
14565                target_url: target_url.clone(),
14566                batch_size: hg_settings.stream_batch_size,
14567                api_key,
14568                ..StreamConfig::default()
14569            };
14570
14571            match StreamClient::new(stream_config) {
14572                Ok(mut client) => {
14573                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
14574                    match exporter.export_to_writer(&hypergraph, &mut client) {
14575                        Ok(_) => {
14576                            if let Err(e) = client.flush() {
14577                                warn!("Failed to flush stream client: {}", e);
14578                            } else {
14579                                info!("Streamed {} records to {}", client.total_sent(), target_url);
14580                            }
14581                        }
14582                        Err(e) => {
14583                            warn!("Streaming export failed: {}", e);
14584                        }
14585                    }
14586                }
14587                Err(e) => {
14588                    warn!("Failed to create stream client: {}", e);
14589                }
14590            }
14591        }
14592
14593        // Update stats
14594        stats.graph_node_count += num_nodes;
14595        stats.graph_edge_count += num_edges;
14596        stats.graph_export_count += 1;
14597
14598        Ok(HypergraphExportInfo {
14599            node_count: num_nodes,
14600            edge_count: num_edges,
14601            hyperedge_count: num_hyperedges,
14602            output_path: hg_dir,
14603        })
14604    }
14605
14606    /// Generate banking KYC/AML data.
14607    ///
14608    /// Creates banking customers, accounts, and transactions with AML typology injection.
14609    /// Uses the BankingOrchestrator from synth-banking crate.
14610    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
14611        let pb = self.create_progress_bar(100, "Generating Banking Data");
14612
14613        // Build the banking orchestrator from config
14614        let orchestrator = BankingOrchestratorBuilder::new()
14615            .config(self.config.banking.clone())
14616            .seed(self.seed + 9000)
14617            .country_pack(self.primary_pack().clone())
14618            .build();
14619
14620        if let Some(pb) = &pb {
14621            pb.inc(10);
14622        }
14623
14624        // Generate the banking data
14625        let result = orchestrator.generate();
14626
14627        if let Some(pb) = &pb {
14628            pb.inc(90);
14629            pb.finish_with_message(format!(
14630                "Banking: {} customers, {} transactions",
14631                result.customers.len(),
14632                result.transactions.len()
14633            ));
14634        }
14635
14636        // Cross-reference banking customers with core master data so that
14637        // banking customer names align with the enterprise customer list.
14638        // We rotate through core customers, overlaying their name and country
14639        // onto the generated banking customers where possible.
14640        let mut banking_customers = result.customers;
14641        let core_customers = &self.master_data.customers;
14642        if !core_customers.is_empty() {
14643            for (i, bc) in banking_customers.iter_mut().enumerate() {
14644                let core = &core_customers[i % core_customers.len()];
14645                bc.name = CustomerName::business(&core.name);
14646                bc.residence_country = core.country.clone();
14647                bc.enterprise_customer_id = Some(core.customer_id.clone());
14648            }
14649            debug!(
14650                "Cross-referenced {} banking customers with {} core customers",
14651                banking_customers.len(),
14652                core_customers.len()
14653            );
14654        }
14655
14656        Ok(BankingSnapshot {
14657            customers: banking_customers,
14658            accounts: result.accounts,
14659            transactions: result.transactions,
14660            transaction_labels: result.transaction_labels,
14661            customer_labels: result.customer_labels,
14662            account_labels: result.account_labels,
14663            relationship_labels: result.relationship_labels,
14664            narratives: result.narratives,
14665            suspicious_count: result.stats.suspicious_count,
14666            scenario_count: result.scenarios.len(),
14667        })
14668    }
14669
14670    /// Calculate total transactions to generate.
14671    fn calculate_total_transactions(&self) -> u64 {
14672        let months = self.config.global.period_months as f64;
14673        self.config
14674            .companies
14675            .iter()
14676            .map(|c| {
14677                let annual = c.annual_transaction_volume.count() as f64;
14678                let weighted = annual * c.volume_weight;
14679                (weighted * months / 12.0) as u64
14680            })
14681            .sum()
14682    }
14683
14684    /// Create a progress bar if progress display is enabled.
14685    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
14686        if !self.phase_config.show_progress {
14687            return None;
14688        }
14689
14690        let pb = if let Some(mp) = &self.multi_progress {
14691            mp.add(ProgressBar::new(total))
14692        } else {
14693            ProgressBar::new(total)
14694        };
14695
14696        pb.set_style(
14697            ProgressStyle::default_bar()
14698                .template(&format!(
14699                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
14700                ))
14701                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
14702                .progress_chars("#>-"),
14703        );
14704
14705        Some(pb)
14706    }
14707
14708    /// Get the generated chart of accounts.
14709    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
14710        self.coa.clone()
14711    }
14712
14713    /// Get the generated master data.
14714    pub fn get_master_data(&self) -> &MasterDataSnapshot {
14715        &self.master_data
14716    }
14717
14718    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
14719    fn phase_compliance_regulations(
14720        &mut self,
14721        _stats: &mut EnhancedGenerationStatistics,
14722    ) -> SynthResult<ComplianceRegulationsSnapshot> {
14723        if !self.phase_config.generate_compliance_regulations {
14724            return Ok(ComplianceRegulationsSnapshot::default());
14725        }
14726
14727        info!("Phase: Generating Compliance Regulations Data");
14728
14729        let cr_config = &self.config.compliance_regulations;
14730
14731        // Determine jurisdictions: from config or inferred from companies
14732        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
14733            self.config
14734                .companies
14735                .iter()
14736                .map(|c| c.country.clone())
14737                .collect::<std::collections::HashSet<_>>()
14738                .into_iter()
14739                .collect()
14740        } else {
14741            cr_config.jurisdictions.clone()
14742        };
14743
14744        // Determine reference date
14745        let fallback_date =
14746            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
14747        let reference_date = cr_config
14748            .reference_date
14749            .as_ref()
14750            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
14751            .unwrap_or_else(|| {
14752                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14753                    .unwrap_or(fallback_date)
14754            });
14755
14756        // Generate standards registry data
14757        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
14758        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
14759        let cross_reference_records = reg_gen.generate_cross_reference_records();
14760        let jurisdiction_records =
14761            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
14762
14763        info!(
14764            "  Standards: {} records, {} cross-references, {} jurisdictions",
14765            standard_records.len(),
14766            cross_reference_records.len(),
14767            jurisdiction_records.len()
14768        );
14769
14770        // Generate audit procedures (if enabled)
14771        let audit_procedures = if cr_config.audit_procedures.enabled {
14772            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
14773                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
14774                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
14775                confidence_level: cr_config.audit_procedures.confidence_level,
14776                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
14777            };
14778            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
14779                self.seed + 9000,
14780                proc_config,
14781            );
14782            let registry = reg_gen.registry();
14783            let mut all_procs = Vec::new();
14784            for jurisdiction in &jurisdictions {
14785                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
14786                all_procs.extend(procs);
14787            }
14788            info!("  Audit procedures: {}", all_procs.len());
14789            all_procs
14790        } else {
14791            Vec::new()
14792        };
14793
14794        // Generate compliance findings (if enabled)
14795        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
14796            let finding_config =
14797                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
14798                    finding_rate: cr_config.findings.finding_rate,
14799                    material_weakness_rate: cr_config.findings.material_weakness_rate,
14800                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
14801                    generate_remediation: cr_config.findings.generate_remediation,
14802                };
14803            let mut finding_gen =
14804                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
14805                    self.seed + 9100,
14806                    finding_config,
14807                );
14808            let mut all_findings = Vec::new();
14809            for company in &self.config.companies {
14810                let company_findings =
14811                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
14812                all_findings.extend(company_findings);
14813            }
14814            info!("  Compliance findings: {}", all_findings.len());
14815            all_findings
14816        } else {
14817            Vec::new()
14818        };
14819
14820        // Generate regulatory filings (if enabled)
14821        let filings = if cr_config.filings.enabled {
14822            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
14823                filing_types: cr_config.filings.filing_types.clone(),
14824                generate_status_progression: cr_config.filings.generate_status_progression,
14825            };
14826            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
14827                self.seed + 9200,
14828                filing_config,
14829            );
14830            let company_codes: Vec<String> = self
14831                .config
14832                .companies
14833                .iter()
14834                .map(|c| c.code.clone())
14835                .collect();
14836            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14837                .unwrap_or(fallback_date);
14838            let filings = filing_gen.generate_filings(
14839                &company_codes,
14840                &jurisdictions,
14841                start_date,
14842                self.config.global.period_months,
14843            );
14844            info!("  Regulatory filings: {}", filings.len());
14845            filings
14846        } else {
14847            Vec::new()
14848        };
14849
14850        // Build compliance graph (if enabled)
14851        let compliance_graph = if cr_config.graph.enabled {
14852            let graph_config = datasynth_graph::ComplianceGraphConfig {
14853                include_standard_nodes: cr_config.graph.include_compliance_nodes,
14854                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
14855                include_cross_references: cr_config.graph.include_cross_references,
14856                include_supersession_edges: cr_config.graph.include_supersession_edges,
14857                include_account_links: cr_config.graph.include_account_links,
14858                include_control_links: cr_config.graph.include_control_links,
14859                include_company_links: cr_config.graph.include_company_links,
14860            };
14861            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
14862
14863            // Add standard nodes
14864            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
14865                .iter()
14866                .map(|r| datasynth_graph::StandardNodeInput {
14867                    standard_id: r.standard_id.clone(),
14868                    title: r.title.clone(),
14869                    category: r.category.clone(),
14870                    domain: r.domain.clone(),
14871                    is_active: r.is_active,
14872                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
14873                    applicable_account_types: r.applicable_account_types.clone(),
14874                    applicable_processes: r.applicable_processes.clone(),
14875                })
14876                .collect();
14877            builder.add_standards(&standard_inputs);
14878
14879            // Add jurisdiction nodes
14880            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
14881                jurisdiction_records
14882                    .iter()
14883                    .map(|r| datasynth_graph::JurisdictionNodeInput {
14884                        country_code: r.country_code.clone(),
14885                        country_name: r.country_name.clone(),
14886                        framework: r.accounting_framework.clone(),
14887                        standard_count: r.standard_count,
14888                        tax_rate: r.statutory_tax_rate,
14889                    })
14890                    .collect();
14891            builder.add_jurisdictions(&jurisdiction_inputs);
14892
14893            // Add cross-reference edges
14894            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
14895                cross_reference_records
14896                    .iter()
14897                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
14898                        from_standard: r.from_standard.clone(),
14899                        to_standard: r.to_standard.clone(),
14900                        relationship: r.relationship.clone(),
14901                        convergence_level: r.convergence_level,
14902                    })
14903                    .collect();
14904            builder.add_cross_references(&xref_inputs);
14905
14906            // Add jurisdiction→standard mappings
14907            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
14908                .iter()
14909                .map(|r| datasynth_graph::JurisdictionMappingInput {
14910                    country_code: r.jurisdiction.clone(),
14911                    standard_id: r.standard_id.clone(),
14912                })
14913                .collect();
14914            builder.add_jurisdiction_mappings(&mapping_inputs);
14915
14916            // Add procedure nodes
14917            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
14918                .iter()
14919                .map(|p| datasynth_graph::ProcedureNodeInput {
14920                    procedure_id: p.procedure_id.clone(),
14921                    standard_id: p.standard_id.clone(),
14922                    procedure_type: p.procedure_type.clone(),
14923                    sample_size: p.sample_size,
14924                    confidence_level: p.confidence_level,
14925                })
14926                .collect();
14927            builder.add_procedures(&proc_inputs);
14928
14929            // Add finding nodes
14930            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
14931                .iter()
14932                .map(|f| datasynth_graph::FindingNodeInput {
14933                    finding_id: f.finding_id.to_string(),
14934                    standard_id: f
14935                        .related_standards
14936                        .first()
14937                        .map(|s| s.as_str().to_string())
14938                        .unwrap_or_default(),
14939                    severity: f.severity.to_string(),
14940                    deficiency_level: f.deficiency_level.to_string(),
14941                    severity_score: f.deficiency_level.severity_score(),
14942                    control_id: f.control_id.clone(),
14943                    affected_accounts: f.affected_accounts.clone(),
14944                })
14945                .collect();
14946            builder.add_findings(&finding_inputs);
14947
14948            // Cross-domain: link standards to accounts from chart of accounts
14949            if cr_config.graph.include_account_links {
14950                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
14951                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
14952                for std_record in &standard_records {
14953                    if let Some(std_obj) =
14954                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
14955                            &std_record.standard_id,
14956                        ))
14957                    {
14958                        for acct_type in &std_obj.applicable_account_types {
14959                            account_links.push(datasynth_graph::AccountLinkInput {
14960                                standard_id: std_record.standard_id.clone(),
14961                                account_code: acct_type.clone(),
14962                                account_name: acct_type.clone(),
14963                            });
14964                        }
14965                    }
14966                }
14967                builder.add_account_links(&account_links);
14968            }
14969
14970            // Cross-domain: link standards to internal controls
14971            if cr_config.graph.include_control_links {
14972                let mut control_links = Vec::new();
14973                // SOX/PCAOB standards link to all controls
14974                let sox_like_ids: Vec<String> = standard_records
14975                    .iter()
14976                    .filter(|r| {
14977                        r.standard_id.starts_with("SOX")
14978                            || r.standard_id.starts_with("PCAOB-AS-2201")
14979                    })
14980                    .map(|r| r.standard_id.clone())
14981                    .collect();
14982                // Get control IDs from config (C001-C060 standard controls)
14983                let control_ids = [
14984                    ("C001", "Cash Controls"),
14985                    ("C002", "Large Transaction Approval"),
14986                    ("C010", "PO Approval"),
14987                    ("C011", "Three-Way Match"),
14988                    ("C020", "Revenue Recognition"),
14989                    ("C021", "Credit Check"),
14990                    ("C030", "Manual JE Approval"),
14991                    ("C031", "Period Close Review"),
14992                    ("C032", "Account Reconciliation"),
14993                    ("C040", "Payroll Processing"),
14994                    ("C050", "Fixed Asset Capitalization"),
14995                    ("C060", "Intercompany Elimination"),
14996                ];
14997                for sox_id in &sox_like_ids {
14998                    for (ctrl_id, ctrl_name) in &control_ids {
14999                        control_links.push(datasynth_graph::ControlLinkInput {
15000                            standard_id: sox_id.clone(),
15001                            control_id: ctrl_id.to_string(),
15002                            control_name: ctrl_name.to_string(),
15003                        });
15004                    }
15005                }
15006                builder.add_control_links(&control_links);
15007            }
15008
15009            // Cross-domain: filing nodes with company links
15010            if cr_config.graph.include_company_links {
15011                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15012                    .iter()
15013                    .enumerate()
15014                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15015                        filing_id: format!("F{:04}", i + 1),
15016                        filing_type: f.filing_type.to_string(),
15017                        company_code: f.company_code.clone(),
15018                        jurisdiction: f.jurisdiction.clone(),
15019                        status: format!("{:?}", f.status),
15020                    })
15021                    .collect();
15022                builder.add_filings(&filing_inputs);
15023            }
15024
15025            let graph = builder.build();
15026            info!(
15027                "  Compliance graph: {} nodes, {} edges",
15028                graph.nodes.len(),
15029                graph.edges.len()
15030            );
15031            Some(graph)
15032        } else {
15033            None
15034        };
15035
15036        self.check_resources_with_log("post-compliance-regulations")?;
15037
15038        Ok(ComplianceRegulationsSnapshot {
15039            standard_records,
15040            cross_reference_records,
15041            jurisdiction_records,
15042            audit_procedures,
15043            findings,
15044            filings,
15045            compliance_graph,
15046        })
15047    }
15048
15049    /// Build a lineage graph describing config → phase → output relationships.
15050    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15051        use super::lineage::LineageGraphBuilder;
15052
15053        let mut builder = LineageGraphBuilder::new();
15054
15055        // Config sections
15056        builder.add_config_section("config:global", "Global Config");
15057        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15058        builder.add_config_section("config:transactions", "Transaction Config");
15059
15060        // Generator phases
15061        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15062        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15063
15064        // Config → phase edges
15065        builder.configured_by("phase:coa", "config:chart_of_accounts");
15066        builder.configured_by("phase:je", "config:transactions");
15067
15068        // Output files
15069        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15070        builder.produced_by("output:je", "phase:je");
15071
15072        // Optional phases based on config
15073        if self.phase_config.generate_master_data {
15074            builder.add_config_section("config:master_data", "Master Data Config");
15075            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15076            builder.configured_by("phase:master_data", "config:master_data");
15077            builder.input_to("phase:master_data", "phase:je");
15078        }
15079
15080        if self.phase_config.generate_document_flows {
15081            builder.add_config_section("config:document_flows", "Document Flow Config");
15082            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15083            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15084            builder.configured_by("phase:p2p", "config:document_flows");
15085            builder.configured_by("phase:o2c", "config:document_flows");
15086
15087            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15088            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15089            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15090            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15091            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15092
15093            builder.produced_by("output:po", "phase:p2p");
15094            builder.produced_by("output:gr", "phase:p2p");
15095            builder.produced_by("output:vi", "phase:p2p");
15096            builder.produced_by("output:so", "phase:o2c");
15097            builder.produced_by("output:ci", "phase:o2c");
15098        }
15099
15100        if self.phase_config.inject_anomalies {
15101            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15102            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15103            builder.configured_by("phase:anomaly", "config:fraud");
15104            builder.add_output_file(
15105                "output:labels",
15106                "Anomaly Labels",
15107                "labels/anomaly_labels.csv",
15108            );
15109            builder.produced_by("output:labels", "phase:anomaly");
15110        }
15111
15112        if self.phase_config.generate_audit {
15113            builder.add_config_section("config:audit", "Audit Config");
15114            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15115            builder.configured_by("phase:audit", "config:audit");
15116        }
15117
15118        if self.phase_config.generate_banking {
15119            builder.add_config_section("config:banking", "Banking Config");
15120            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15121            builder.configured_by("phase:banking", "config:banking");
15122        }
15123
15124        if self.config.llm.enabled {
15125            builder.add_config_section("config:llm", "LLM Enrichment Config");
15126            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15127            builder.configured_by("phase:llm_enrichment", "config:llm");
15128        }
15129
15130        if self.config.diffusion.enabled {
15131            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15132            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15133            builder.configured_by("phase:diffusion", "config:diffusion");
15134        }
15135
15136        if self.config.causal.enabled {
15137            builder.add_config_section("config:causal", "Causal Generation Config");
15138            builder.add_generator_phase("phase:causal", "Causal Overlay");
15139            builder.configured_by("phase:causal", "config:causal");
15140        }
15141
15142        builder.build()
15143    }
15144
15145    // -----------------------------------------------------------------------
15146    // Trial-balance helpers used to replace hardcoded proxy values
15147    // -----------------------------------------------------------------------
15148
15149    /// Compute total revenue for a company from its journal entries.
15150    ///
15151    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15152    /// net credits on all revenue-account lines filtered to `company_code`.
15153    fn compute_company_revenue(
15154        entries: &[JournalEntry],
15155        company_code: &str,
15156    ) -> rust_decimal::Decimal {
15157        use rust_decimal::Decimal;
15158        let mut revenue = Decimal::ZERO;
15159        for je in entries {
15160            if je.header.company_code != company_code {
15161                continue;
15162            }
15163            for line in &je.lines {
15164                if line.gl_account.starts_with('4') {
15165                    // Revenue is credit-normal
15166                    revenue += line.credit_amount - line.debit_amount;
15167                }
15168            }
15169        }
15170        revenue.max(Decimal::ZERO)
15171    }
15172
15173    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15174    ///
15175    /// Asset accounts start with "1"; liability accounts start with "2".
15176    fn compute_entity_net_assets(
15177        entries: &[JournalEntry],
15178        entity_code: &str,
15179    ) -> rust_decimal::Decimal {
15180        use rust_decimal::Decimal;
15181        let mut asset_net = Decimal::ZERO;
15182        let mut liability_net = Decimal::ZERO;
15183        for je in entries {
15184            if je.header.company_code != entity_code {
15185                continue;
15186            }
15187            for line in &je.lines {
15188                if line.gl_account.starts_with('1') {
15189                    asset_net += line.debit_amount - line.credit_amount;
15190                } else if line.gl_account.starts_with('2') {
15191                    liability_net += line.credit_amount - line.debit_amount;
15192                }
15193            }
15194        }
15195        asset_net - liability_net
15196    }
15197
15198    /// v3.5.1+: Run the statistical validation suite configured in
15199    /// `distributions.validation.tests` over the final amount
15200    /// distribution.  Collects every non-zero line-level amount (debit +
15201    /// credit) and hands it to the runners in
15202    /// `datasynth_core::distributions::validation`.
15203    ///
15204    /// Returns `Ok(None)` when validation is disabled (the default).
15205    /// When `reporting.fail_on_error = true` and any test fails, returns
15206    /// `Err` with a concise message; otherwise attaches the report to
15207    /// the result and lets callers inspect it.
15208    fn phase_statistical_validation(
15209        &self,
15210        entries: &[JournalEntry],
15211    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15212        use datasynth_config::schema::StatisticalTestConfig;
15213        use datasynth_core::distributions::{
15214            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15215            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15216        };
15217        use rust_decimal::prelude::ToPrimitive;
15218
15219        let cfg = &self.config.distributions.validation;
15220        if !cfg.enabled {
15221            return Ok(None);
15222        }
15223
15224        // Collect per-line positive amounts (debit + credit is zero on the
15225        // non-posting side, so this naturally picks the magnitude).
15226        let amounts: Vec<rust_decimal::Decimal> = entries
15227            .iter()
15228            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15229            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15230            .collect();
15231
15232        // v4.1.0+ paired (amount, line_count) per entry for correlation
15233        // checks. Amount per entry is the debit-side total (= credit-side
15234        // total for a balanced entry).
15235        let paired_amount_linecount: Vec<(f64, f64)> = entries
15236            .iter()
15237            .filter_map(|je| {
15238                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
15239                if amt > rust_decimal::Decimal::ZERO {
15240                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
15241                } else {
15242                    None
15243                }
15244            })
15245            .collect();
15246
15247        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
15248        for test_cfg in &cfg.tests {
15249            match test_cfg {
15250                StatisticalTestConfig::BenfordFirstDigit {
15251                    threshold_mad,
15252                    warning_mad,
15253                } => {
15254                    results.push(run_benford_first_digit(
15255                        &amounts,
15256                        *threshold_mad,
15257                        *warning_mad,
15258                    ));
15259                }
15260                StatisticalTestConfig::ChiSquared { bins, significance } => {
15261                    results.push(run_chi_squared(&amounts, *bins, *significance));
15262                }
15263                StatisticalTestConfig::DistributionFit {
15264                    target: _,
15265                    ks_significance,
15266                    method: _,
15267                } => {
15268                    // v3.5.1+: log-uniformity KS check. Target-specific
15269                    // fits against Normal / Exponential land in v4.1.1+.
15270                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
15271                }
15272                StatisticalTestConfig::AndersonDarling {
15273                    target: _,
15274                    significance,
15275                } => {
15276                    // v4.1.0+: A*² statistic against log-normal on the
15277                    // log-scale. Other targets follow the same pattern.
15278                    results.push(run_anderson_darling(&amounts, *significance));
15279                }
15280                StatisticalTestConfig::CorrelationCheck {
15281                    expected_correlations,
15282                } => {
15283                    // v4.1.0+: (amount, line_count) is tracked today.
15284                    // Other pairs resolve to Skipped pending richer
15285                    // per-entry attribute collection.
15286                    if expected_correlations.is_empty() {
15287                        results.push(StatisticalTestResult {
15288                            name: "correlation_check".to_string(),
15289                            outcome: TestOutcome::Skipped,
15290                            statistic: 0.0,
15291                            threshold: 0.0,
15292                            message: "no expected correlations declared".to_string(),
15293                        });
15294                    } else {
15295                        for ec in expected_correlations {
15296                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
15297                            let is_amount_linecount = (ec.field1 == "amount"
15298                                && ec.field2 == "line_count")
15299                                || (ec.field1 == "line_count" && ec.field2 == "amount");
15300                            if is_amount_linecount {
15301                                let xs: Vec<f64> =
15302                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
15303                                let ys: Vec<f64> =
15304                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
15305                                results.push(run_correlation_check(
15306                                    &pair_key,
15307                                    &xs,
15308                                    &ys,
15309                                    ec.expected_r,
15310                                    ec.tolerance,
15311                                ));
15312                            } else {
15313                                results.push(StatisticalTestResult {
15314                                    name: format!("correlation_check_{pair_key}"),
15315                                    outcome: TestOutcome::Skipped,
15316                                    statistic: 0.0,
15317                                    threshold: ec.tolerance,
15318                                    message: format!(
15319                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
15320                                        ec.field1, ec.field2
15321                                    ),
15322                                });
15323                            }
15324                        }
15325                    }
15326                }
15327            }
15328        }
15329
15330        let report = StatisticalValidationReport {
15331            sample_count: amounts.len(),
15332            results,
15333        };
15334
15335        if cfg.reporting.fail_on_error && !report.all_passed() {
15336            let failed = report.failed_names().join(", ");
15337            return Err(SynthError::validation(format!(
15338                "statistical validation failed: {failed}"
15339            )));
15340        }
15341
15342        Ok(Some(report))
15343    }
15344
15345    /// v3.3.0: analytics-metadata phase.
15346    ///
15347    /// Runs AFTER all JE-adding phases (including Phase 20b's
15348    /// fraud-bias sweep). Four sub-generators fire in sequence, each
15349    /// gated by an individual `analytics_metadata.<flag>` toggle:
15350    ///
15351    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
15352    ///    current-period account balances.
15353    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
15354    ///    configured `global.industry`.
15355    /// 3. `ManagementReportGenerator` — management-report artefacts.
15356    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
15357    fn phase_analytics_metadata(
15358        &mut self,
15359        entries: &[JournalEntry],
15360    ) -> SynthResult<AnalyticsMetadataSnapshot> {
15361        use datasynth_generators::drift_event_generator::DriftEventGenerator;
15362        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
15363        use datasynth_generators::management_report_generator::ManagementReportGenerator;
15364        use datasynth_generators::prior_year_generator::PriorYearGenerator;
15365        use std::collections::BTreeMap;
15366
15367        let mut snap = AnalyticsMetadataSnapshot::default();
15368
15369        if !self.phase_config.generate_analytics_metadata {
15370            return Ok(snap);
15371        }
15372
15373        let cfg = &self.config.analytics_metadata;
15374        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15375            .map(|d| d.year())
15376            .unwrap_or(2025);
15377
15378        // ---- 1. Prior-year comparatives ----
15379        if cfg.prior_year {
15380            let mut gen = PriorYearGenerator::new(self.seed + 9100);
15381            for company in &self.config.companies {
15382                // Aggregate current-period balances per account code +
15383                // account name from the entries slice.
15384                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
15385                    BTreeMap::new();
15386                for je in entries {
15387                    if je.header.company_code != company.code {
15388                        continue;
15389                    }
15390                    for line in &je.lines {
15391                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
15392                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
15393                        });
15394                        entry.1 += line.debit_amount - line.credit_amount;
15395                    }
15396                }
15397                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
15398                    .into_iter()
15399                    .filter(|(_, (_, bal))| !bal.is_zero())
15400                    .map(|(code, (name, bal))| (code, name, bal))
15401                    .collect();
15402                if !current.is_empty() {
15403                    let comparatives =
15404                        gen.generate_comparatives(&company.code, fiscal_year, &current);
15405                    snap.prior_year_comparatives.extend(comparatives);
15406                }
15407            }
15408            info!(
15409                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
15410                snap.prior_year_comparatives.len(),
15411                self.config.companies.len()
15412            );
15413        }
15414
15415        // ---- 2. Industry benchmarks ----
15416        if cfg.industry_benchmark {
15417            use datasynth_core::models::IndustrySector;
15418            let industry = match self.config.global.industry {
15419                IndustrySector::Manufacturing => "manufacturing",
15420                IndustrySector::Retail => "retail",
15421                IndustrySector::FinancialServices => "financial_services",
15422                IndustrySector::Technology => "technology",
15423                IndustrySector::Healthcare => "healthcare",
15424                _ => "other",
15425            };
15426            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
15427            let benchmarks = gen.generate(industry, fiscal_year);
15428            info!(
15429                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
15430                benchmarks.len()
15431            );
15432            snap.industry_benchmarks = benchmarks;
15433        }
15434
15435        // ---- 3. Management reports ----
15436        if cfg.management_reports {
15437            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
15438            let period_months = self.config.global.period_months;
15439            for company in &self.config.companies {
15440                let reports =
15441                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
15442                snap.management_reports.extend(reports);
15443            }
15444            info!(
15445                "v3.3.0 analytics: {} management reports across {} companies",
15446                snap.management_reports.len(),
15447                self.config.companies.len()
15448            );
15449        }
15450
15451        // ---- 4. Drift-event labels ----
15452        if cfg.drift_events {
15453            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
15454                .expect("hardcoded NaiveDate 2025-01-01 is valid");
15455            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15456                .unwrap_or(fallback_start);
15457            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
15458            let mut gen = DriftEventGenerator::new(self.seed + 9400);
15459            let drifts = gen.generate_standalone_drifts(start_date, end_date);
15460            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
15461            snap.drift_events = drifts;
15462        }
15463        // `entries` parameter reserved for future JE-aware drift detection
15464        let _ = entries;
15465
15466        Ok(snap)
15467    }
15468}
15469
15470/// Get the directory name for a graph export format.
15471fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
15472    match format {
15473        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
15474        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
15475        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
15476        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
15477        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
15478    }
15479}
15480
15481/// Aggregate journal entry lines into per-account trial balance rows.
15482///
15483/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
15484/// debit/credit totals and a net balance (debit minus credit).
15485fn compute_trial_balance_entries(
15486    entries: &[JournalEntry],
15487    entity_code: &str,
15488    fiscal_year: i32,
15489    coa: Option<&ChartOfAccounts>,
15490) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
15491    use std::collections::BTreeMap;
15492
15493    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
15494        BTreeMap::new();
15495
15496    for je in entries {
15497        for line in &je.lines {
15498            let entry = balances.entry(line.account_code.clone()).or_default();
15499            entry.0 += line.debit_amount;
15500            entry.1 += line.credit_amount;
15501        }
15502    }
15503
15504    balances
15505        .into_iter()
15506        .map(
15507            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
15508                account_description: coa
15509                    .and_then(|c| c.get_account(&account_code))
15510                    .map(|a| a.description().to_string())
15511                    .unwrap_or_else(|| account_code.clone()),
15512                account_code,
15513                debit_balance: debit,
15514                credit_balance: credit,
15515                net_balance: debit - credit,
15516                entity_code: entity_code.to_string(),
15517                period: format!("FY{}", fiscal_year),
15518            },
15519        )
15520        .collect()
15521}
15522
15523#[cfg(test)]
15524#[allow(clippy::unwrap_used)]
15525mod tests {
15526    use super::*;
15527    use datasynth_config::schema::*;
15528
15529    fn create_test_config() -> GeneratorConfig {
15530        GeneratorConfig {
15531            global: GlobalConfig {
15532                industry: IndustrySector::Manufacturing,
15533                start_date: "2024-01-01".to_string(),
15534                period_months: 1,
15535                seed: Some(42),
15536                parallel: false,
15537                group_currency: "USD".to_string(),
15538                presentation_currency: None,
15539                worker_threads: 0,
15540                memory_limit_mb: 0,
15541                fiscal_year_months: None,
15542            },
15543            companies: vec![CompanyConfig {
15544                code: "1000".to_string(),
15545                name: "Test Company".to_string(),
15546                currency: "USD".to_string(),
15547                functional_currency: None,
15548                country: "US".to_string(),
15549                annual_transaction_volume: TransactionVolume::TenK,
15550                volume_weight: 1.0,
15551                fiscal_year_variant: "K4".to_string(),
15552            }],
15553            chart_of_accounts: ChartOfAccountsConfig {
15554                complexity: CoAComplexity::Small,
15555                industry_specific: true,
15556                custom_accounts: None,
15557                min_hierarchy_depth: 2,
15558                max_hierarchy_depth: 4,
15559            },
15560            transactions: TransactionConfig::default(),
15561            output: OutputConfig::default(),
15562            fraud: FraudConfig::default(),
15563            internal_controls: InternalControlsConfig::default(),
15564            business_processes: BusinessProcessConfig::default(),
15565            user_personas: UserPersonaConfig::default(),
15566            templates: TemplateConfig::default(),
15567            approval: ApprovalConfig::default(),
15568            departments: DepartmentConfig::default(),
15569            master_data: MasterDataConfig::default(),
15570            document_flows: DocumentFlowConfig::default(),
15571            intercompany: IntercompanyConfig::default(),
15572            balance: BalanceConfig::default(),
15573            ocpm: OcpmConfig::default(),
15574            audit: AuditGenerationConfig::default(),
15575            banking: datasynth_banking::BankingConfig::default(),
15576            data_quality: DataQualitySchemaConfig::default(),
15577            scenario: ScenarioConfig::default(),
15578            temporal: TemporalDriftConfig::default(),
15579            graph_export: GraphExportConfig::default(),
15580            streaming: StreamingSchemaConfig::default(),
15581            rate_limit: RateLimitSchemaConfig::default(),
15582            temporal_attributes: TemporalAttributeSchemaConfig::default(),
15583            relationships: RelationshipSchemaConfig::default(),
15584            accounting_standards: AccountingStandardsConfig::default(),
15585            audit_standards: AuditStandardsConfig::default(),
15586            distributions: Default::default(),
15587            temporal_patterns: Default::default(),
15588            vendor_network: VendorNetworkSchemaConfig::default(),
15589            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
15590            relationship_strength: RelationshipStrengthSchemaConfig::default(),
15591            cross_process_links: CrossProcessLinksSchemaConfig::default(),
15592            organizational_events: OrganizationalEventsSchemaConfig::default(),
15593            behavioral_drift: BehavioralDriftSchemaConfig::default(),
15594            market_drift: MarketDriftSchemaConfig::default(),
15595            drift_labeling: DriftLabelingSchemaConfig::default(),
15596            anomaly_injection: Default::default(),
15597            industry_specific: Default::default(),
15598            fingerprint_privacy: Default::default(),
15599            quality_gates: Default::default(),
15600            compliance: Default::default(),
15601            webhooks: Default::default(),
15602            llm: Default::default(),
15603            diffusion: Default::default(),
15604            causal: Default::default(),
15605            source_to_pay: Default::default(),
15606            financial_reporting: Default::default(),
15607            hr: Default::default(),
15608            manufacturing: Default::default(),
15609            sales_quotes: Default::default(),
15610            tax: Default::default(),
15611            treasury: Default::default(),
15612            project_accounting: Default::default(),
15613            esg: Default::default(),
15614            country_packs: None,
15615            scenarios: Default::default(),
15616            session: Default::default(),
15617            compliance_regulations: Default::default(),
15618            analytics_metadata: Default::default(),
15619        }
15620    }
15621
15622    #[test]
15623    fn test_enhanced_orchestrator_creation() {
15624        let config = create_test_config();
15625        let orchestrator = EnhancedOrchestrator::with_defaults(config);
15626        assert!(orchestrator.is_ok());
15627    }
15628
15629    #[test]
15630    fn test_minimal_generation() {
15631        let config = create_test_config();
15632        let phase_config = PhaseConfig {
15633            generate_master_data: false,
15634            generate_document_flows: false,
15635            generate_journal_entries: true,
15636            inject_anomalies: false,
15637            show_progress: false,
15638            ..Default::default()
15639        };
15640
15641        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15642        let result = orchestrator.generate();
15643
15644        assert!(result.is_ok());
15645        let result = result.unwrap();
15646        assert!(!result.journal_entries.is_empty());
15647    }
15648
15649    #[test]
15650    fn test_master_data_generation() {
15651        let config = create_test_config();
15652        let phase_config = PhaseConfig {
15653            generate_master_data: true,
15654            generate_document_flows: false,
15655            generate_journal_entries: false,
15656            inject_anomalies: false,
15657            show_progress: false,
15658            vendors_per_company: 5,
15659            customers_per_company: 5,
15660            materials_per_company: 10,
15661            assets_per_company: 5,
15662            employees_per_company: 10,
15663            ..Default::default()
15664        };
15665
15666        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15667        let result = orchestrator.generate().unwrap();
15668
15669        assert!(!result.master_data.vendors.is_empty());
15670        assert!(!result.master_data.customers.is_empty());
15671        assert!(!result.master_data.materials.is_empty());
15672    }
15673
15674    #[test]
15675    fn test_document_flow_generation() {
15676        let config = create_test_config();
15677        let phase_config = PhaseConfig {
15678            generate_master_data: true,
15679            generate_document_flows: true,
15680            generate_journal_entries: false,
15681            inject_anomalies: false,
15682            inject_data_quality: false,
15683            validate_balances: false,
15684            validate_coa_coverage_strict: false,
15685            generate_ocpm_events: false,
15686            show_progress: false,
15687            vendors_per_company: 5,
15688            customers_per_company: 5,
15689            materials_per_company: 10,
15690            assets_per_company: 5,
15691            employees_per_company: 10,
15692            p2p_chains: 5,
15693            o2c_chains: 5,
15694            ..Default::default()
15695        };
15696
15697        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15698        let result = orchestrator.generate().unwrap();
15699
15700        // Should have generated P2P and O2C chains
15701        assert!(!result.document_flows.p2p_chains.is_empty());
15702        assert!(!result.document_flows.o2c_chains.is_empty());
15703
15704        // Flattened documents should be populated
15705        assert!(!result.document_flows.purchase_orders.is_empty());
15706        assert!(!result.document_flows.sales_orders.is_empty());
15707    }
15708
15709    #[test]
15710    fn test_anomaly_injection() {
15711        let config = create_test_config();
15712        let phase_config = PhaseConfig {
15713            generate_master_data: false,
15714            generate_document_flows: false,
15715            generate_journal_entries: true,
15716            inject_anomalies: true,
15717            show_progress: false,
15718            ..Default::default()
15719        };
15720
15721        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15722        let result = orchestrator.generate().unwrap();
15723
15724        // Should have journal entries
15725        assert!(!result.journal_entries.is_empty());
15726
15727        // With ~833 entries and 2% rate, expect some anomalies
15728        // Note: This is probabilistic, so we just verify the structure exists
15729        assert!(result.anomaly_labels.summary.is_some());
15730    }
15731
15732    #[test]
15733    fn test_full_generation_pipeline() {
15734        let config = create_test_config();
15735        let phase_config = PhaseConfig {
15736            generate_master_data: true,
15737            generate_document_flows: true,
15738            generate_journal_entries: true,
15739            inject_anomalies: false,
15740            inject_data_quality: false,
15741            validate_balances: true,
15742            validate_coa_coverage_strict: false,
15743            generate_ocpm_events: false,
15744            show_progress: false,
15745            vendors_per_company: 3,
15746            customers_per_company: 3,
15747            materials_per_company: 5,
15748            assets_per_company: 3,
15749            employees_per_company: 5,
15750            p2p_chains: 3,
15751            o2c_chains: 3,
15752            ..Default::default()
15753        };
15754
15755        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15756        let result = orchestrator.generate().unwrap();
15757
15758        // All phases should have results
15759        assert!(!result.master_data.vendors.is_empty());
15760        assert!(!result.master_data.customers.is_empty());
15761        assert!(!result.document_flows.p2p_chains.is_empty());
15762        assert!(!result.document_flows.o2c_chains.is_empty());
15763        assert!(!result.journal_entries.is_empty());
15764        assert!(result.statistics.accounts_count > 0);
15765
15766        // Subledger linking should have run
15767        assert!(!result.subledger.ap_invoices.is_empty());
15768        assert!(!result.subledger.ar_invoices.is_empty());
15769
15770        // Balance validation should have run
15771        assert!(result.balance_validation.validated);
15772        assert!(result.balance_validation.entries_processed > 0);
15773    }
15774
15775    #[test]
15776    fn test_subledger_linking() {
15777        let config = create_test_config();
15778        let phase_config = PhaseConfig {
15779            generate_master_data: true,
15780            generate_document_flows: true,
15781            generate_journal_entries: false,
15782            inject_anomalies: false,
15783            inject_data_quality: false,
15784            validate_balances: false,
15785            validate_coa_coverage_strict: false,
15786            generate_ocpm_events: false,
15787            show_progress: false,
15788            vendors_per_company: 5,
15789            customers_per_company: 5,
15790            materials_per_company: 10,
15791            assets_per_company: 3,
15792            employees_per_company: 5,
15793            p2p_chains: 5,
15794            o2c_chains: 5,
15795            ..Default::default()
15796        };
15797
15798        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15799        let result = orchestrator.generate().unwrap();
15800
15801        // Should have document flows
15802        assert!(!result.document_flows.vendor_invoices.is_empty());
15803        assert!(!result.document_flows.customer_invoices.is_empty());
15804
15805        // Subledger should be linked from document flows
15806        assert!(!result.subledger.ap_invoices.is_empty());
15807        assert!(!result.subledger.ar_invoices.is_empty());
15808
15809        // AP invoices count should match vendor invoices count
15810        assert_eq!(
15811            result.subledger.ap_invoices.len(),
15812            result.document_flows.vendor_invoices.len()
15813        );
15814
15815        // AR invoices count should match customer invoices count
15816        assert_eq!(
15817            result.subledger.ar_invoices.len(),
15818            result.document_flows.customer_invoices.len()
15819        );
15820
15821        // Statistics should reflect subledger counts
15822        assert_eq!(
15823            result.statistics.ap_invoice_count,
15824            result.subledger.ap_invoices.len()
15825        );
15826        assert_eq!(
15827            result.statistics.ar_invoice_count,
15828            result.subledger.ar_invoices.len()
15829        );
15830    }
15831
15832    #[test]
15833    fn test_balance_validation() {
15834        let config = create_test_config();
15835        let phase_config = PhaseConfig {
15836            generate_master_data: false,
15837            generate_document_flows: false,
15838            generate_journal_entries: true,
15839            inject_anomalies: false,
15840            validate_balances: true,
15841            validate_coa_coverage_strict: false,
15842            show_progress: false,
15843            ..Default::default()
15844        };
15845
15846        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15847        let result = orchestrator.generate().unwrap();
15848
15849        // Balance validation should run
15850        assert!(result.balance_validation.validated);
15851        assert!(result.balance_validation.entries_processed > 0);
15852
15853        // Generated JEs should be balanced (no unbalanced entries)
15854        assert!(!result.balance_validation.has_unbalanced_entries);
15855
15856        // Total debits should equal total credits
15857        assert_eq!(
15858            result.balance_validation.total_debits,
15859            result.balance_validation.total_credits
15860        );
15861    }
15862
15863    #[test]
15864    fn test_statistics_accuracy() {
15865        let config = create_test_config();
15866        let phase_config = PhaseConfig {
15867            generate_master_data: true,
15868            generate_document_flows: false,
15869            generate_journal_entries: true,
15870            inject_anomalies: false,
15871            show_progress: false,
15872            vendors_per_company: 10,
15873            customers_per_company: 20,
15874            materials_per_company: 15,
15875            assets_per_company: 5,
15876            employees_per_company: 8,
15877            ..Default::default()
15878        };
15879
15880        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15881        let result = orchestrator.generate().unwrap();
15882
15883        // Statistics should match actual data
15884        assert_eq!(
15885            result.statistics.vendor_count,
15886            result.master_data.vendors.len()
15887        );
15888        assert_eq!(
15889            result.statistics.customer_count,
15890            result.master_data.customers.len()
15891        );
15892        assert_eq!(
15893            result.statistics.material_count,
15894            result.master_data.materials.len()
15895        );
15896        assert_eq!(
15897            result.statistics.total_entries as usize,
15898            result.journal_entries.len()
15899        );
15900    }
15901
15902    #[test]
15903    fn test_phase_config_defaults() {
15904        let config = PhaseConfig::default();
15905        assert!(config.generate_master_data);
15906        assert!(config.generate_document_flows);
15907        assert!(config.generate_journal_entries);
15908        assert!(!config.inject_anomalies);
15909        assert!(config.validate_balances);
15910        assert!(config.show_progress);
15911        assert!(config.vendors_per_company > 0);
15912        assert!(config.customers_per_company > 0);
15913    }
15914
15915    #[test]
15916    fn test_get_coa_before_generation() {
15917        let config = create_test_config();
15918        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
15919
15920        // Before generation, CoA should be None
15921        assert!(orchestrator.get_coa().is_none());
15922    }
15923
15924    #[test]
15925    fn test_get_coa_after_generation() {
15926        let config = create_test_config();
15927        let phase_config = PhaseConfig {
15928            generate_master_data: false,
15929            generate_document_flows: false,
15930            generate_journal_entries: true,
15931            inject_anomalies: false,
15932            show_progress: false,
15933            ..Default::default()
15934        };
15935
15936        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15937        let _ = orchestrator.generate().unwrap();
15938
15939        // After generation, CoA should be available
15940        assert!(orchestrator.get_coa().is_some());
15941    }
15942
15943    #[test]
15944    fn test_get_master_data() {
15945        let config = create_test_config();
15946        let phase_config = PhaseConfig {
15947            generate_master_data: true,
15948            generate_document_flows: false,
15949            generate_journal_entries: false,
15950            inject_anomalies: false,
15951            show_progress: false,
15952            vendors_per_company: 5,
15953            customers_per_company: 5,
15954            materials_per_company: 5,
15955            assets_per_company: 5,
15956            employees_per_company: 5,
15957            ..Default::default()
15958        };
15959
15960        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
15961        let result = orchestrator.generate().unwrap();
15962
15963        // After generate(), master_data is moved into the result
15964        assert!(!result.master_data.vendors.is_empty());
15965    }
15966
15967    #[test]
15968    fn test_with_progress_builder() {
15969        let config = create_test_config();
15970        let orchestrator = EnhancedOrchestrator::with_defaults(config)
15971            .unwrap()
15972            .with_progress(false);
15973
15974        // Should still work without progress
15975        assert!(!orchestrator.phase_config.show_progress);
15976    }
15977
15978    #[test]
15979    fn test_multi_company_generation() {
15980        let mut config = create_test_config();
15981        config.companies.push(CompanyConfig {
15982            code: "2000".to_string(),
15983            name: "Subsidiary".to_string(),
15984            currency: "EUR".to_string(),
15985            functional_currency: None,
15986            country: "DE".to_string(),
15987            annual_transaction_volume: TransactionVolume::TenK,
15988            volume_weight: 0.5,
15989            fiscal_year_variant: "K4".to_string(),
15990        });
15991
15992        let phase_config = PhaseConfig {
15993            generate_master_data: true,
15994            generate_document_flows: false,
15995            generate_journal_entries: true,
15996            inject_anomalies: false,
15997            show_progress: false,
15998            vendors_per_company: 5,
15999            customers_per_company: 5,
16000            materials_per_company: 5,
16001            assets_per_company: 5,
16002            employees_per_company: 5,
16003            ..Default::default()
16004        };
16005
16006        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16007        let result = orchestrator.generate().unwrap();
16008
16009        // Should have master data for both companies
16010        assert!(result.statistics.vendor_count >= 10); // 5 per company
16011        assert!(result.statistics.customer_count >= 10);
16012        assert!(result.statistics.companies_count == 2);
16013    }
16014
16015    #[test]
16016    fn test_empty_master_data_skips_document_flows() {
16017        let config = create_test_config();
16018        let phase_config = PhaseConfig {
16019            generate_master_data: false,   // Skip master data
16020            generate_document_flows: true, // Try to generate flows
16021            generate_journal_entries: false,
16022            inject_anomalies: false,
16023            show_progress: false,
16024            ..Default::default()
16025        };
16026
16027        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16028        let result = orchestrator.generate().unwrap();
16029
16030        // Without master data, document flows should be empty
16031        assert!(result.document_flows.p2p_chains.is_empty());
16032        assert!(result.document_flows.o2c_chains.is_empty());
16033    }
16034
16035    #[test]
16036    fn test_journal_entry_line_item_count() {
16037        let config = create_test_config();
16038        let phase_config = PhaseConfig {
16039            generate_master_data: false,
16040            generate_document_flows: false,
16041            generate_journal_entries: true,
16042            inject_anomalies: false,
16043            show_progress: false,
16044            ..Default::default()
16045        };
16046
16047        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16048        let result = orchestrator.generate().unwrap();
16049
16050        // Total line items should match sum of all entry line counts
16051        let calculated_line_items: u64 = result
16052            .journal_entries
16053            .iter()
16054            .map(|e| e.line_count() as u64)
16055            .sum();
16056        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16057    }
16058
16059    #[test]
16060    fn test_audit_generation() {
16061        let config = create_test_config();
16062        let phase_config = PhaseConfig {
16063            generate_master_data: false,
16064            generate_document_flows: false,
16065            generate_journal_entries: true,
16066            inject_anomalies: false,
16067            show_progress: false,
16068            generate_audit: true,
16069            audit_engagements: 2,
16070            workpapers_per_engagement: 5,
16071            evidence_per_workpaper: 2,
16072            risks_per_engagement: 3,
16073            findings_per_engagement: 2,
16074            judgments_per_engagement: 2,
16075            ..Default::default()
16076        };
16077
16078        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16079        let result = orchestrator.generate().unwrap();
16080
16081        // Should have generated audit data
16082        assert_eq!(result.audit.engagements.len(), 2);
16083        assert!(!result.audit.workpapers.is_empty());
16084        assert!(!result.audit.evidence.is_empty());
16085        assert!(!result.audit.risk_assessments.is_empty());
16086        assert!(!result.audit.findings.is_empty());
16087        assert!(!result.audit.judgments.is_empty());
16088
16089        // New ISA entity collections should also be populated
16090        assert!(
16091            !result.audit.confirmations.is_empty(),
16092            "ISA 505 confirmations should be generated"
16093        );
16094        assert!(
16095            !result.audit.confirmation_responses.is_empty(),
16096            "ISA 505 confirmation responses should be generated"
16097        );
16098        assert!(
16099            !result.audit.procedure_steps.is_empty(),
16100            "ISA 330 procedure steps should be generated"
16101        );
16102        // Samples may or may not be generated depending on workpaper sampling methods
16103        assert!(
16104            !result.audit.analytical_results.is_empty(),
16105            "ISA 520 analytical procedures should be generated"
16106        );
16107        assert!(
16108            !result.audit.ia_functions.is_empty(),
16109            "ISA 610 IA functions should be generated (one per engagement)"
16110        );
16111        assert!(
16112            !result.audit.related_parties.is_empty(),
16113            "ISA 550 related parties should be generated"
16114        );
16115
16116        // Statistics should match
16117        assert_eq!(
16118            result.statistics.audit_engagement_count,
16119            result.audit.engagements.len()
16120        );
16121        assert_eq!(
16122            result.statistics.audit_workpaper_count,
16123            result.audit.workpapers.len()
16124        );
16125        assert_eq!(
16126            result.statistics.audit_evidence_count,
16127            result.audit.evidence.len()
16128        );
16129        assert_eq!(
16130            result.statistics.audit_risk_count,
16131            result.audit.risk_assessments.len()
16132        );
16133        assert_eq!(
16134            result.statistics.audit_finding_count,
16135            result.audit.findings.len()
16136        );
16137        assert_eq!(
16138            result.statistics.audit_judgment_count,
16139            result.audit.judgments.len()
16140        );
16141        assert_eq!(
16142            result.statistics.audit_confirmation_count,
16143            result.audit.confirmations.len()
16144        );
16145        assert_eq!(
16146            result.statistics.audit_confirmation_response_count,
16147            result.audit.confirmation_responses.len()
16148        );
16149        assert_eq!(
16150            result.statistics.audit_procedure_step_count,
16151            result.audit.procedure_steps.len()
16152        );
16153        assert_eq!(
16154            result.statistics.audit_sample_count,
16155            result.audit.samples.len()
16156        );
16157        assert_eq!(
16158            result.statistics.audit_analytical_result_count,
16159            result.audit.analytical_results.len()
16160        );
16161        assert_eq!(
16162            result.statistics.audit_ia_function_count,
16163            result.audit.ia_functions.len()
16164        );
16165        assert_eq!(
16166            result.statistics.audit_ia_report_count,
16167            result.audit.ia_reports.len()
16168        );
16169        assert_eq!(
16170            result.statistics.audit_related_party_count,
16171            result.audit.related_parties.len()
16172        );
16173        assert_eq!(
16174            result.statistics.audit_related_party_transaction_count,
16175            result.audit.related_party_transactions.len()
16176        );
16177    }
16178
16179    #[test]
16180    fn test_new_phases_disabled_by_default() {
16181        let config = create_test_config();
16182        // Verify new config fields default to disabled
16183        assert!(!config.llm.enabled);
16184        assert!(!config.diffusion.enabled);
16185        assert!(!config.causal.enabled);
16186
16187        let phase_config = PhaseConfig {
16188            generate_master_data: false,
16189            generate_document_flows: false,
16190            generate_journal_entries: true,
16191            inject_anomalies: false,
16192            show_progress: false,
16193            ..Default::default()
16194        };
16195
16196        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16197        let result = orchestrator.generate().unwrap();
16198
16199        // All new phase statistics should be zero when disabled
16200        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16201        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16202        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16203        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16204        assert_eq!(result.statistics.causal_generation_ms, 0);
16205        assert_eq!(result.statistics.causal_samples_generated, 0);
16206        assert!(result.statistics.causal_validation_passed.is_none());
16207        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16208        assert!(result.counterfactual_pairs.is_empty());
16209    }
16210
16211    #[test]
16212    fn test_counterfactual_generation_enabled() {
16213        let config = create_test_config();
16214        let phase_config = PhaseConfig {
16215            generate_master_data: false,
16216            generate_document_flows: false,
16217            generate_journal_entries: true,
16218            inject_anomalies: false,
16219            show_progress: false,
16220            generate_counterfactuals: true,
16221            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16222            ..Default::default()
16223        };
16224
16225        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16226        let result = orchestrator.generate().unwrap();
16227
16228        // With JE generation enabled, counterfactual pairs should be generated
16229        if !result.journal_entries.is_empty() {
16230            assert_eq!(
16231                result.counterfactual_pairs.len(),
16232                result.journal_entries.len()
16233            );
16234            assert_eq!(
16235                result.statistics.counterfactual_pair_count,
16236                result.journal_entries.len()
16237            );
16238            // Each pair should have a distinct pair_id
16239            let ids: std::collections::HashSet<_> = result
16240                .counterfactual_pairs
16241                .iter()
16242                .map(|p| p.pair_id.clone())
16243                .collect();
16244            assert_eq!(ids.len(), result.counterfactual_pairs.len());
16245        }
16246    }
16247
16248    #[test]
16249    fn test_llm_enrichment_enabled() {
16250        let mut config = create_test_config();
16251        config.llm.enabled = true;
16252        config.llm.max_vendor_enrichments = 3;
16253
16254        let phase_config = PhaseConfig {
16255            generate_master_data: true,
16256            generate_document_flows: false,
16257            generate_journal_entries: false,
16258            inject_anomalies: false,
16259            show_progress: false,
16260            vendors_per_company: 5,
16261            customers_per_company: 3,
16262            materials_per_company: 3,
16263            assets_per_company: 3,
16264            employees_per_company: 3,
16265            ..Default::default()
16266        };
16267
16268        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16269        let result = orchestrator.generate().unwrap();
16270
16271        // LLM enrichment should have run
16272        assert!(result.statistics.llm_vendors_enriched > 0);
16273        assert!(result.statistics.llm_vendors_enriched <= 3);
16274    }
16275
16276    #[test]
16277    fn test_diffusion_enhancement_enabled() {
16278        let mut config = create_test_config();
16279        config.diffusion.enabled = true;
16280        config.diffusion.n_steps = 50;
16281        config.diffusion.sample_size = 20;
16282
16283        let phase_config = PhaseConfig {
16284            generate_master_data: false,
16285            generate_document_flows: false,
16286            generate_journal_entries: true,
16287            inject_anomalies: false,
16288            show_progress: false,
16289            ..Default::default()
16290        };
16291
16292        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16293        let result = orchestrator.generate().unwrap();
16294
16295        // Diffusion phase should have generated samples
16296        assert_eq!(result.statistics.diffusion_samples_generated, 20);
16297    }
16298
16299    #[test]
16300    fn test_causal_overlay_enabled() {
16301        let mut config = create_test_config();
16302        config.causal.enabled = true;
16303        config.causal.template = "fraud_detection".to_string();
16304        config.causal.sample_size = 100;
16305        config.causal.validate = true;
16306
16307        let phase_config = PhaseConfig {
16308            generate_master_data: false,
16309            generate_document_flows: false,
16310            generate_journal_entries: true,
16311            inject_anomalies: false,
16312            show_progress: false,
16313            ..Default::default()
16314        };
16315
16316        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16317        let result = orchestrator.generate().unwrap();
16318
16319        // Causal phase should have generated samples
16320        assert_eq!(result.statistics.causal_samples_generated, 100);
16321        // Validation should have run
16322        assert!(result.statistics.causal_validation_passed.is_some());
16323    }
16324
16325    #[test]
16326    fn test_causal_overlay_revenue_cycle_template() {
16327        let mut config = create_test_config();
16328        config.causal.enabled = true;
16329        config.causal.template = "revenue_cycle".to_string();
16330        config.causal.sample_size = 50;
16331        config.causal.validate = false;
16332
16333        let phase_config = PhaseConfig {
16334            generate_master_data: false,
16335            generate_document_flows: false,
16336            generate_journal_entries: true,
16337            inject_anomalies: false,
16338            show_progress: false,
16339            ..Default::default()
16340        };
16341
16342        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16343        let result = orchestrator.generate().unwrap();
16344
16345        // Causal phase should have generated samples
16346        assert_eq!(result.statistics.causal_samples_generated, 50);
16347        // Validation was disabled
16348        assert!(result.statistics.causal_validation_passed.is_none());
16349    }
16350
16351    #[test]
16352    fn test_all_new_phases_enabled_together() {
16353        let mut config = create_test_config();
16354        config.llm.enabled = true;
16355        config.llm.max_vendor_enrichments = 2;
16356        config.diffusion.enabled = true;
16357        config.diffusion.n_steps = 20;
16358        config.diffusion.sample_size = 10;
16359        config.causal.enabled = true;
16360        config.causal.sample_size = 50;
16361        config.causal.validate = true;
16362
16363        let phase_config = PhaseConfig {
16364            generate_master_data: true,
16365            generate_document_flows: false,
16366            generate_journal_entries: true,
16367            inject_anomalies: false,
16368            show_progress: false,
16369            vendors_per_company: 5,
16370            customers_per_company: 3,
16371            materials_per_company: 3,
16372            assets_per_company: 3,
16373            employees_per_company: 3,
16374            ..Default::default()
16375        };
16376
16377        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16378        let result = orchestrator.generate().unwrap();
16379
16380        // All three phases should have run
16381        assert!(result.statistics.llm_vendors_enriched > 0);
16382        assert_eq!(result.statistics.diffusion_samples_generated, 10);
16383        assert_eq!(result.statistics.causal_samples_generated, 50);
16384        assert!(result.statistics.causal_validation_passed.is_some());
16385    }
16386
16387    #[test]
16388    fn test_statistics_serialization_with_new_fields() {
16389        let stats = EnhancedGenerationStatistics {
16390            total_entries: 100,
16391            total_line_items: 500,
16392            llm_enrichment_ms: 42,
16393            llm_vendors_enriched: 10,
16394            diffusion_enhancement_ms: 100,
16395            diffusion_samples_generated: 50,
16396            causal_generation_ms: 200,
16397            causal_samples_generated: 100,
16398            causal_validation_passed: Some(true),
16399            ..Default::default()
16400        };
16401
16402        let json = serde_json::to_string(&stats).unwrap();
16403        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
16404
16405        assert_eq!(deserialized.llm_enrichment_ms, 42);
16406        assert_eq!(deserialized.llm_vendors_enriched, 10);
16407        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
16408        assert_eq!(deserialized.diffusion_samples_generated, 50);
16409        assert_eq!(deserialized.causal_generation_ms, 200);
16410        assert_eq!(deserialized.causal_samples_generated, 100);
16411        assert_eq!(deserialized.causal_validation_passed, Some(true));
16412    }
16413
16414    #[test]
16415    fn test_statistics_backward_compat_deserialization() {
16416        // Old JSON without the new fields should still deserialize
16417        let old_json = r#"{
16418            "total_entries": 100,
16419            "total_line_items": 500,
16420            "accounts_count": 50,
16421            "companies_count": 1,
16422            "period_months": 12,
16423            "vendor_count": 10,
16424            "customer_count": 20,
16425            "material_count": 15,
16426            "asset_count": 5,
16427            "employee_count": 8,
16428            "p2p_chain_count": 5,
16429            "o2c_chain_count": 5,
16430            "ap_invoice_count": 5,
16431            "ar_invoice_count": 5,
16432            "ocpm_event_count": 0,
16433            "ocpm_object_count": 0,
16434            "ocpm_case_count": 0,
16435            "audit_engagement_count": 0,
16436            "audit_workpaper_count": 0,
16437            "audit_evidence_count": 0,
16438            "audit_risk_count": 0,
16439            "audit_finding_count": 0,
16440            "audit_judgment_count": 0,
16441            "anomalies_injected": 0,
16442            "data_quality_issues": 0,
16443            "banking_customer_count": 0,
16444            "banking_account_count": 0,
16445            "banking_transaction_count": 0,
16446            "banking_suspicious_count": 0,
16447            "graph_export_count": 0,
16448            "graph_node_count": 0,
16449            "graph_edge_count": 0
16450        }"#;
16451
16452        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
16453
16454        // New fields should default to 0 / None
16455        assert_eq!(stats.llm_enrichment_ms, 0);
16456        assert_eq!(stats.llm_vendors_enriched, 0);
16457        assert_eq!(stats.diffusion_enhancement_ms, 0);
16458        assert_eq!(stats.diffusion_samples_generated, 0);
16459        assert_eq!(stats.causal_generation_ms, 0);
16460        assert_eq!(stats.causal_samples_generated, 0);
16461        assert!(stats.causal_validation_passed.is_none());
16462    }
16463}