Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    EnhancedInjectionConfig,
102    // ESG anomaly labels
103    EsgAnomalyLabel,
104    EvidenceGenerator,
105    // Subledger depreciation schedule generator
106    FaDepreciationScheduleConfig,
107    FaDepreciationScheduleGenerator,
108    // Financial statement generator
109    FinancialStatementGenerator,
110    FindingGenerator,
111    // Inventory valuation generator
112    InventoryValuationGenerator,
113    InventoryValuationGeneratorConfig,
114    JournalEntryGenerator,
115    JudgmentGenerator,
116    LatePaymentDistribution,
117    // Manufacturing cost accounting + warranty provisions
118    ManufacturingCostAccounting,
119    MaterialGenerator,
120    O2CDocumentChain,
121    O2CGenerator,
122    O2CGeneratorConfig,
123    O2CPaymentBehavior,
124    P2PDocumentChain,
125    // Document flow generators
126    P2PGenerator,
127    P2PGeneratorConfig,
128    P2PPaymentBehavior,
129    PaymentReference,
130    // Provisions and contingencies generator (IAS 37 / ASC 450)
131    ProvisionGenerator,
132    QualificationGenerator,
133    RfxGenerator,
134    RiskAssessmentGenerator,
135    // Balance validation
136    RunningBalanceTracker,
137    ScorecardGenerator,
138    // Segment reporting generator (IFRS 8 / ASC 280)
139    SegmentGenerator,
140    SegmentSeed,
141    SourcingProjectGenerator,
142    SpendAnalysisGenerator,
143    ValidationError,
144    // Master data generators
145    VendorGenerator,
146    WarrantyProvisionGenerator,
147    WorkpaperGenerator,
148};
149use datasynth_graph::{
150    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
151    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
152    TransactionGraphConfig,
153};
154use datasynth_ocpm::{
155    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
156    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
157    OcpmUuidFactory, P2pDocuments, S2cDocuments,
158};
159
160use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
161use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
162use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
163use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
164use datasynth_core::models::balance::{
165    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
166    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
167};
168use datasynth_core::models::documents::PaymentMethod;
169use datasynth_core::models::IndustrySector;
170use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
171use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
172use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
173use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
174use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
175use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
176use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
177use datasynth_generators::audit::sample_generator::SampleGenerator;
178use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
179use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
180use datasynth_generators::coa_generator::CoAFramework;
181use rayon::prelude::*;
182use rust_decimal::Decimal;
183
184// ============================================================================
185// Configuration Conversion Functions
186// ============================================================================
187
188/// Convert P2P flow config from schema to generator config.
189/// v4.4.1 — build a `DataQualityStats` with only `total_records`
190/// populated to `n_entries`. Used when the data-quality phase is
191/// skipped (by config or resource pressure) so downstream consumers
192/// can still see the denominator. Before v4.4.1 the writer emitted
193/// `total_records: 0` in those cases, which the SDK team flagged as
194/// indistinguishable from "ran but processed nothing".
195fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
196    #[allow(clippy::field_reassign_with_default)]
197    {
198        let mut s = DataQualityStats::default();
199        s.total_records = n_entries;
200        s.missing_values.total_records = n_entries;
201        s.format_variations.total_processed = n_entries;
202        s.duplicates.total_processed = n_entries;
203        s
204    }
205}
206
207fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
208    let payment_behavior = &schema_config.payment_behavior;
209    let late_dist = &payment_behavior.late_payment_days_distribution;
210
211    P2PGeneratorConfig {
212        three_way_match_rate: schema_config.three_way_match_rate,
213        partial_delivery_rate: schema_config.partial_delivery_rate,
214        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
215        price_variance_rate: schema_config.price_variance_rate,
216        max_price_variance_percent: schema_config.max_price_variance_percent,
217        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
218        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
219        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
220        payment_method_distribution: vec![
221            (PaymentMethod::BankTransfer, 0.60),
222            (PaymentMethod::Check, 0.25),
223            (PaymentMethod::Wire, 0.10),
224            (PaymentMethod::CreditCard, 0.05),
225        ],
226        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
227        payment_behavior: P2PPaymentBehavior {
228            late_payment_rate: payment_behavior.late_payment_rate,
229            late_payment_distribution: LatePaymentDistribution {
230                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
231                late_8_to_14: late_dist.late_8_to_14,
232                very_late_15_to_30: late_dist.very_late_15_to_30,
233                severely_late_31_to_60: late_dist.severely_late_31_to_60,
234                extremely_late_over_60: late_dist.extremely_late_over_60,
235            },
236            partial_payment_rate: payment_behavior.partial_payment_rate,
237            payment_correction_rate: payment_behavior.payment_correction_rate,
238            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
239        },
240    }
241}
242
243/// Convert O2C flow config from schema to generator config.
244fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
245    let payment_behavior = &schema_config.payment_behavior;
246
247    O2CGeneratorConfig {
248        credit_check_failure_rate: schema_config.credit_check_failure_rate,
249        partial_shipment_rate: schema_config.partial_shipment_rate,
250        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
251        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
252        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
253        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
254        bad_debt_rate: schema_config.bad_debt_rate,
255        returns_rate: schema_config.return_rate,
256        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
257        payment_method_distribution: vec![
258            (PaymentMethod::BankTransfer, 0.50),
259            (PaymentMethod::Check, 0.30),
260            (PaymentMethod::Wire, 0.15),
261            (PaymentMethod::CreditCard, 0.05),
262        ],
263        payment_behavior: O2CPaymentBehavior {
264            partial_payment_rate: payment_behavior.partial_payments.rate,
265            short_payment_rate: payment_behavior.short_payments.rate,
266            max_short_percent: payment_behavior.short_payments.max_short_percent,
267            on_account_rate: payment_behavior.on_account_payments.rate,
268            payment_correction_rate: payment_behavior.payment_corrections.rate,
269            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
270        },
271    }
272}
273
274/// Configuration for which generation phases to run.
275#[derive(Debug, Clone)]
276pub struct PhaseConfig {
277    /// Generate master data (vendors, customers, materials, assets, employees).
278    pub generate_master_data: bool,
279    /// Generate document flows (P2P, O2C).
280    pub generate_document_flows: bool,
281    /// Generate OCPM events from document flows.
282    pub generate_ocpm_events: bool,
283    /// Generate journal entries.
284    pub generate_journal_entries: bool,
285    /// Inject anomalies.
286    pub inject_anomalies: bool,
287    /// Inject data quality variations (typos, missing values, format variations).
288    pub inject_data_quality: bool,
289    /// Validate balance sheet equation after generation.
290    pub validate_balances: bool,
291    /// Validate that every `gl_account` referenced in generated JEs exists
292    /// in the chart of accounts. Off by default (a soft warning is emitted
293    /// instead). Set true to fail the run on any orphan account.
294    pub validate_coa_coverage_strict: bool,
295    /// Show progress bars.
296    pub show_progress: bool,
297    /// Number of vendors to generate per company.
298    pub vendors_per_company: usize,
299    /// Number of customers to generate per company.
300    pub customers_per_company: usize,
301    /// Number of materials to generate per company.
302    pub materials_per_company: usize,
303    /// Number of assets to generate per company.
304    pub assets_per_company: usize,
305    /// Number of employees to generate per company.
306    pub employees_per_company: usize,
307    /// Number of P2P chains to generate.
308    pub p2p_chains: usize,
309    /// Number of O2C chains to generate.
310    pub o2c_chains: usize,
311    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
312    pub generate_audit: bool,
313    /// Number of audit engagements to generate.
314    pub audit_engagements: usize,
315    /// Number of workpapers per engagement.
316    pub workpapers_per_engagement: usize,
317    /// Number of evidence items per workpaper.
318    pub evidence_per_workpaper: usize,
319    /// Number of risk assessments per engagement.
320    pub risks_per_engagement: usize,
321    /// Number of findings per engagement.
322    pub findings_per_engagement: usize,
323    /// Number of professional judgments per engagement.
324    pub judgments_per_engagement: usize,
325    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
326    pub generate_banking: bool,
327    /// Generate graph exports (accounting network for ML training).
328    pub generate_graph_export: bool,
329    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
330    pub generate_sourcing: bool,
331    /// Generate bank reconciliations from payments.
332    pub generate_bank_reconciliation: bool,
333    /// Generate financial statements from trial balances.
334    pub generate_financial_statements: bool,
335    /// Generate accounting standards data (revenue recognition, impairment).
336    pub generate_accounting_standards: bool,
337    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
338    pub generate_manufacturing: bool,
339    /// Generate sales quotes, management KPIs, and budgets.
340    pub generate_sales_kpi_budgets: bool,
341    /// Generate tax jurisdictions and tax codes.
342    pub generate_tax: bool,
343    /// Generate ESG data (emissions, energy, water, waste, social, governance).
344    pub generate_esg: bool,
345    /// Generate intercompany transactions and eliminations.
346    pub generate_intercompany: bool,
347    /// Generate process evolution and organizational events.
348    pub generate_evolution_events: bool,
349    /// Generate counterfactual (original, mutated) JE pairs for ML training.
350    pub generate_counterfactuals: bool,
351    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
352    pub generate_compliance_regulations: bool,
353    /// Generate period-close journal entries (tax provision, income statement close).
354    pub generate_period_close: bool,
355    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
356    pub generate_hr: bool,
357    /// Generate treasury data (cash management, hedging, debt, pooling).
358    pub generate_treasury: bool,
359    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
360    pub generate_project_accounting: bool,
361    /// v3.3.0: generate legal documents per engagement (engagement letters,
362    /// management rep letters, legal opinions, regulatory filings,
363    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
364    pub generate_legal_documents: bool,
365    /// v3.3.0: generate IT general controls (access logs, change
366    /// management records) per audit engagement. Gated by
367    /// `audit.it_controls.enabled`.
368    pub generate_it_controls: bool,
369    /// v3.3.0: run the analytics-metadata phase after all JE-adding
370    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
371    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
372    /// top-level `analytics_metadata.enabled` config flag.
373    pub generate_analytics_metadata: bool,
374}
375
376impl Default for PhaseConfig {
377    fn default() -> Self {
378        Self {
379            generate_master_data: true,
380            generate_document_flows: true,
381            generate_ocpm_events: false, // Off by default
382            generate_journal_entries: true,
383            inject_anomalies: false,
384            inject_data_quality: false, // Off by default (to preserve clean test data)
385            validate_balances: true,
386            validate_coa_coverage_strict: false,
387            show_progress: true,
388            vendors_per_company: 50,
389            customers_per_company: 100,
390            materials_per_company: 200,
391            assets_per_company: 50,
392            employees_per_company: 100,
393            p2p_chains: 100,
394            o2c_chains: 100,
395            generate_audit: false, // Off by default
396            audit_engagements: 5,
397            workpapers_per_engagement: 20,
398            evidence_per_workpaper: 5,
399            risks_per_engagement: 15,
400            findings_per_engagement: 8,
401            judgments_per_engagement: 10,
402            generate_banking: false,                // Off by default
403            generate_graph_export: false,           // Off by default
404            generate_sourcing: false,               // Off by default
405            generate_bank_reconciliation: false,    // Off by default
406            generate_financial_statements: false,   // Off by default
407            generate_accounting_standards: false,   // Off by default
408            generate_manufacturing: false,          // Off by default
409            generate_sales_kpi_budgets: false,      // Off by default
410            generate_tax: false,                    // Off by default
411            generate_esg: false,                    // Off by default
412            generate_intercompany: false,           // Off by default
413            generate_evolution_events: true,        // On by default
414            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
415            generate_compliance_regulations: false, // Off by default
416            generate_period_close: true,            // On by default
417            generate_hr: false,                     // Off by default
418            generate_treasury: false,               // Off by default
419            generate_project_accounting: false,     // Off by default
420            generate_legal_documents: false,        // v3.3.0 — off by default
421            generate_it_controls: false,            // v3.3.0 — off by default
422            generate_analytics_metadata: false,     // v3.3.0 — off by default
423        }
424    }
425}
426
427impl PhaseConfig {
428    /// Derive phase flags from [`GeneratorConfig`].
429    ///
430    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
431    /// CLI flags can override individual fields after calling this method.
432    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
433        Self {
434            // Always-on phases
435            generate_master_data: true,
436            generate_document_flows: true,
437            generate_journal_entries: true,
438            validate_balances: true,
439            validate_coa_coverage_strict: false,
440            generate_period_close: true,
441            generate_evolution_events: true,
442            show_progress: true,
443
444            // Feature-gated phases — derived from config sections
445            generate_audit: cfg.audit.enabled,
446            generate_banking: cfg.banking.enabled,
447            generate_graph_export: cfg.graph_export.enabled,
448            generate_sourcing: cfg.source_to_pay.enabled,
449            generate_intercompany: cfg.intercompany.enabled,
450            generate_financial_statements: cfg.financial_reporting.enabled,
451            generate_bank_reconciliation: cfg.financial_reporting.enabled,
452            generate_accounting_standards: cfg.accounting_standards.enabled,
453            generate_manufacturing: cfg.manufacturing.enabled,
454            generate_sales_kpi_budgets: cfg.sales_quotes.enabled
455                || cfg.financial_reporting.management_kpis.enabled
456                || cfg.financial_reporting.budgets.enabled
457                || cfg.financial_reporting.external_expectations.enabled
458                || cfg.financial_reporting.evidence_anchors.enabled,
459            generate_tax: cfg.tax.enabled,
460            generate_esg: cfg.esg.enabled,
461            generate_ocpm_events: cfg.ocpm.enabled,
462            generate_compliance_regulations: cfg.compliance_regulations.enabled,
463            generate_hr: cfg.hr.enabled,
464            generate_treasury: cfg.treasury.enabled,
465            generate_project_accounting: cfg.project_accounting.enabled,
466
467            // v3.3.0: L1 generator wiring
468            // Legal documents emitted when compliance_regulations is enabled
469            // and the nested legal_documents.enabled flag is set.
470            generate_legal_documents: cfg.compliance_regulations.enabled
471                && cfg.compliance_regulations.legal_documents.enabled,
472            // IT general controls emitted when audit is enabled and the
473            // nested it_controls.enabled flag is set.
474            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
475            // Analytics metadata phase (prior-year, industry benchmarks,
476            // management reports, drift events).
477            generate_analytics_metadata: cfg.analytics_metadata.enabled,
478
479            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
480            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
481
482            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
483            inject_data_quality: cfg.data_quality.enabled,
484
485            // Count defaults (CLI can override after calling this method)
486            vendors_per_company: 50,
487            customers_per_company: 100,
488            materials_per_company: 200,
489            assets_per_company: 50,
490            employees_per_company: 100,
491            p2p_chains: 100,
492            o2c_chains: 100,
493            audit_engagements: 5,
494            workpapers_per_engagement: 20,
495            evidence_per_workpaper: 5,
496            risks_per_engagement: 15,
497            findings_per_engagement: 8,
498            judgments_per_engagement: 10,
499        }
500    }
501}
502
503/// Master data snapshot containing all generated entities.
504#[derive(Debug, Clone, Default)]
505pub struct MasterDataSnapshot {
506    /// Generated vendors.
507    pub vendors: Vec<Vendor>,
508    /// Generated customers.
509    pub customers: Vec<Customer>,
510    /// Generated materials.
511    pub materials: Vec<Material>,
512    /// Generated fixed assets.
513    pub assets: Vec<FixedAsset>,
514    /// Generated employees.
515    pub employees: Vec<Employee>,
516    /// Generated cost center hierarchy (two-level: departments + sub-departments).
517    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
518    /// v5.1: Generated profit centre hierarchy (two-level: top-level
519    /// segment / region / product-group nodes + sub-units).  Emits to
520    /// SAP CEPC alongside `cost_centers` → CSKS.
521    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
522    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
523    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
524    /// v3.3.0+: organizational profiles (one per company) with
525    /// industry / geography / structure / complexity metadata. Emitted
526    /// alongside master data when `generate_master_data = true`.
527    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
528}
529
530/// Info about a completed hypergraph export.
531#[derive(Debug, Clone)]
532pub struct HypergraphExportInfo {
533    /// Number of nodes exported.
534    pub node_count: usize,
535    /// Number of pairwise edges exported.
536    pub edge_count: usize,
537    /// Number of hyperedges exported.
538    pub hyperedge_count: usize,
539    /// Output directory path.
540    pub output_path: PathBuf,
541}
542
543/// Document flow snapshot containing all generated document chains.
544#[derive(Debug, Clone, Default)]
545pub struct DocumentFlowSnapshot {
546    /// P2P document chains.
547    pub p2p_chains: Vec<P2PDocumentChain>,
548    /// O2C document chains.
549    pub o2c_chains: Vec<O2CDocumentChain>,
550    /// All purchase orders (flattened).
551    pub purchase_orders: Vec<documents::PurchaseOrder>,
552    /// All goods receipts (flattened).
553    pub goods_receipts: Vec<documents::GoodsReceipt>,
554    /// All vendor invoices (flattened).
555    pub vendor_invoices: Vec<documents::VendorInvoice>,
556    /// All sales orders (flattened).
557    pub sales_orders: Vec<documents::SalesOrder>,
558    /// All deliveries (flattened).
559    pub deliveries: Vec<documents::Delivery>,
560    /// All customer invoices (flattened).
561    pub customer_invoices: Vec<documents::CustomerInvoice>,
562    /// All payments (flattened).
563    pub payments: Vec<documents::Payment>,
564    /// Cross-document references collected from all document headers
565    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
566    pub document_references: Vec<documents::DocumentReference>,
567}
568
569/// Subledger snapshot containing generated subledger records.
570#[derive(Debug, Clone, Default)]
571pub struct SubledgerSnapshot {
572    /// AP invoices linked from document flow vendor invoices.
573    pub ap_invoices: Vec<APInvoice>,
574    /// AR invoices linked from document flow customer invoices.
575    pub ar_invoices: Vec<ARInvoice>,
576    /// FA subledger records (asset acquisitions from FA generator).
577    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
578    /// Inventory positions from inventory generator.
579    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
580    /// Inventory movements from inventory generator.
581    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
582    /// AR aging reports, one per company, computed after payment settlement.
583    pub ar_aging_reports: Vec<ARAgingReport>,
584    /// AP aging reports, one per company, computed after payment settlement.
585    pub ap_aging_reports: Vec<APAgingReport>,
586    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
587    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
588    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
589    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
590    /// Dunning runs executed after AR aging (one per company per dunning cycle).
591    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
592    /// Dunning letters generated across all dunning runs.
593    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
594}
595
596/// OCPM snapshot containing generated OCPM event log data.
597#[derive(Debug, Clone, Default)]
598pub struct OcpmSnapshot {
599    /// OCPM event log (if generated)
600    pub event_log: Option<OcpmEventLog>,
601    /// Number of events generated
602    pub event_count: usize,
603    /// Number of objects generated
604    pub object_count: usize,
605    /// Number of cases generated
606    pub case_count: usize,
607}
608
609/// Audit data snapshot containing all generated audit-related entities.
610#[derive(Debug, Clone, Default)]
611pub struct AuditSnapshot {
612    /// Audit engagements per ISA 210/220.
613    pub engagements: Vec<AuditEngagement>,
614    /// Workpapers per ISA 230.
615    pub workpapers: Vec<Workpaper>,
616    /// Audit evidence per ISA 500.
617    pub evidence: Vec<AuditEvidence>,
618    /// Risk assessments per ISA 315/330.
619    pub risk_assessments: Vec<RiskAssessment>,
620    /// Audit findings per ISA 265.
621    pub findings: Vec<AuditFinding>,
622    /// Professional judgments per ISA 200.
623    pub judgments: Vec<ProfessionalJudgment>,
624    /// External confirmations per ISA 505.
625    pub confirmations: Vec<ExternalConfirmation>,
626    /// Confirmation responses per ISA 505.
627    pub confirmation_responses: Vec<ConfirmationResponse>,
628    /// Audit procedure steps per ISA 330/530.
629    pub procedure_steps: Vec<AuditProcedureStep>,
630    /// Audit samples per ISA 530.
631    pub samples: Vec<AuditSample>,
632    /// Analytical procedure results per ISA 520.
633    pub analytical_results: Vec<AnalyticalProcedureResult>,
634    /// Internal audit functions per ISA 610.
635    pub ia_functions: Vec<InternalAuditFunction>,
636    /// Internal audit reports per ISA 610.
637    pub ia_reports: Vec<InternalAuditReport>,
638    /// Related parties per ISA 550.
639    pub related_parties: Vec<RelatedParty>,
640    /// Related party transactions per ISA 550.
641    pub related_party_transactions: Vec<RelatedPartyTransaction>,
642    // ---- ISA 600: Group Audits ----
643    /// Component auditors assigned by jurisdiction (ISA 600).
644    pub component_auditors: Vec<ComponentAuditor>,
645    /// Group audit plan with materiality allocations (ISA 600).
646    pub group_audit_plan: Option<GroupAuditPlan>,
647    /// Component instructions issued to component auditors (ISA 600).
648    pub component_instructions: Vec<ComponentInstruction>,
649    /// Reports received from component auditors (ISA 600).
650    pub component_reports: Vec<ComponentAuditorReport>,
651    // ---- ISA 210: Engagement Letters ----
652    /// Engagement letters per ISA 210.
653    pub engagement_letters: Vec<EngagementLetter>,
654    // ---- ISA 560 / IAS 10: Subsequent Events ----
655    /// Subsequent events per ISA 560 / IAS 10.
656    pub subsequent_events: Vec<SubsequentEvent>,
657    // ---- ISA 402: Service Organization Controls ----
658    /// Service organizations identified per ISA 402.
659    pub service_organizations: Vec<ServiceOrganization>,
660    /// SOC reports obtained per ISA 402.
661    pub soc_reports: Vec<SocReport>,
662    /// User entity controls documented per ISA 402.
663    pub user_entity_controls: Vec<UserEntityControl>,
664    // ---- ISA 570: Going Concern ----
665    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
666    pub going_concern_assessments:
667        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
668    // ---- ISA 540: Accounting Estimates ----
669    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
670    pub accounting_estimates:
671        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
672    // ---- ISA 700/701/705/706: Audit Opinions ----
673    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
674    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
675    /// Key Audit Matters per ISA 701 (flattened across all opinions).
676    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
677    // ---- SOX 302 / 404 ----
678    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
679    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
680    /// SOX Section 404 ICFR assessments (one per entity per year).
681    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
682    // ---- ISA 320: Materiality ----
683    /// Materiality calculations per entity per period (ISA 320).
684    pub materiality_calculations:
685        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
686    // ---- ISA 315: Combined Risk Assessments ----
687    /// Combined Risk Assessments per account area / assertion (ISA 315).
688    pub combined_risk_assessments:
689        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
690    // ---- ISA 530: Sampling Plans ----
691    /// Sampling plans per CRA at Moderate or higher (ISA 530).
692    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
693    /// Individual sampled items (key items + representative items) per ISA 530.
694    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
695    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
696    /// Significant classes of transactions per ISA 315 (one set per entity).
697    pub significant_transaction_classes:
698        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
699    // ---- ISA 520: Unusual Item Markers ----
700    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
701    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
702    // ---- ISA 520: Analytical Relationships ----
703    /// Analytical relationships (ratios, trends, correlations) per entity.
704    pub analytical_relationships:
705        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
706    // ---- PCAOB-ISA Cross-Reference ----
707    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
708    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
709    // ---- ISA Standard Reference ----
710    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
711    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
712    // ---- ISA 220 / ISA 300: Audit Scopes ----
713    /// Audit scope records (one per engagement) describing the audit boundary.
714    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
715    // ---- FSM Event Trail ----
716    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
717    /// Contains the ordered sequence of state-transition and procedure-step events
718    /// generated by the audit FSM engine.
719    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
720    // ---- v3.3.0: L1 generator wiring ----
721    /// Legal documents (engagement letters, management reps, legal
722    /// opinions, regulatory filings, board resolutions) per entity.
723    /// Emitted by `LegalDocumentGenerator` when
724    /// `compliance_regulations.legal_documents.enabled = true`.
725    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
726    /// IT general controls — access logs (login/privileged action
727    /// audit trail). Emitted by `ItControlsGenerator` when
728    /// `audit.it_controls.enabled = true`.
729    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
730    /// IT general controls — change management records (code deploys,
731    /// config changes, patches). Emitted by `ItControlsGenerator`.
732    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
733}
734
735/// Banking KYC/AML data snapshot containing all generated banking entities.
736#[derive(Debug, Clone, Default)]
737pub struct BankingSnapshot {
738    /// Banking customers (retail, business, trust).
739    pub customers: Vec<BankingCustomer>,
740    /// Bank accounts.
741    pub accounts: Vec<BankAccount>,
742    /// Bank transactions with AML labels.
743    pub transactions: Vec<BankTransaction>,
744    /// Transaction-level AML labels with features.
745    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
746    /// Customer-level AML labels.
747    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
748    /// Account-level AML labels.
749    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
750    /// Relationship-level AML labels.
751    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
752    /// Case narratives for AML scenarios.
753    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
754    /// Number of suspicious transactions.
755    pub suspicious_count: usize,
756    /// Number of AML scenarios generated.
757    pub scenario_count: usize,
758}
759
760/// Graph export snapshot containing exported graph metadata.
761#[derive(Debug, Clone, Default, Serialize)]
762pub struct GraphExportSnapshot {
763    /// Whether graph export was performed.
764    pub exported: bool,
765    /// Number of graphs exported.
766    pub graph_count: usize,
767    /// Exported graph metadata (by format name).
768    pub exports: HashMap<String, GraphExportInfo>,
769}
770
771/// Information about an exported graph.
772#[derive(Debug, Clone, Serialize)]
773pub struct GraphExportInfo {
774    /// Graph name.
775    pub name: String,
776    /// Export format (pytorch_geometric, neo4j, dgl).
777    pub format: String,
778    /// Output directory path.
779    pub output_path: PathBuf,
780    /// Number of nodes.
781    pub node_count: usize,
782    /// Number of edges.
783    pub edge_count: usize,
784}
785
786/// S2C sourcing data snapshot.
787#[derive(Debug, Clone, Default)]
788pub struct SourcingSnapshot {
789    /// Spend analyses.
790    pub spend_analyses: Vec<SpendAnalysis>,
791    /// Sourcing projects.
792    pub sourcing_projects: Vec<SourcingProject>,
793    /// Supplier qualifications.
794    pub qualifications: Vec<SupplierQualification>,
795    /// RFx events (RFI, RFP, RFQ).
796    pub rfx_events: Vec<RfxEvent>,
797    /// Supplier bids.
798    pub bids: Vec<SupplierBid>,
799    /// Bid evaluations.
800    pub bid_evaluations: Vec<BidEvaluation>,
801    /// Procurement contracts.
802    pub contracts: Vec<ProcurementContract>,
803    /// Catalog items.
804    pub catalog_items: Vec<CatalogItem>,
805    /// Supplier scorecards.
806    pub scorecards: Vec<SupplierScorecard>,
807}
808
809/// A single period's trial balance with metadata.
810///
811/// Used as the orchestrator's in-memory representation while it
812/// builds per-period FS / CF artefacts.  At write time the runtime
813/// converts each `PeriodTrialBalance` to the canonical
814/// [`datasynth_core::models::balance::TrialBalance`] shape via
815/// [`PeriodTrialBalance::into_canonical`] so the on-disk
816/// `period_close/trial_balances.json` matches what the group
817/// aggregate phase loads — see
818/// `crate::output_writer::write_outputs`.
819#[derive(Debug, Clone, Serialize, Deserialize)]
820pub struct PeriodTrialBalance {
821    /// Fiscal year.
822    pub fiscal_year: u16,
823    /// Fiscal period (1-12).
824    pub fiscal_period: u8,
825    /// Period start date.
826    pub period_start: NaiveDate,
827    /// Period end date.
828    pub period_end: NaiveDate,
829    /// Trial balance entries for this period.
830    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
831    /// Framework string for classifier dispatch in
832    /// [`PeriodTrialBalance::into_canonical`] (`"us_gaap"` / `"ifrs"` /
833    /// `"french_gaap"` / `"german_gaap"` / `"dual_reporting"`). Set by
834    /// the orchestrator at TB-emit time; defaults to `"us_gaap"` when
835    /// constructed by ad-hoc callers (e.g. test fixtures).
836    #[serde(default = "default_framework")]
837    pub framework: String,
838}
839
840fn default_framework() -> String {
841    "us_gaap".to_string()
842}
843
844impl PeriodTrialBalance {
845    /// Convert this in-memory period TB into the canonical
846    /// [`datasynth_core::models::balance::TrialBalance`] shape used
847    /// for the on-disk artefact.
848    ///
849    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
850    /// aggregate's `tb_loader` consumes the canonical type directly,
851    /// dropping the v5.0 dual-shape detection that converted from
852    /// `PeriodTrialBalance` JSON on the fly.
853    ///
854    /// v5.33: framework-aware classification — `category` and
855    /// `account_type` are now resolved via
856    /// [`datasynth_core::framework_accounts::FrameworkAccounts`] for the
857    /// framework recorded on `self.framework`, fixing the v5.32-and-prior
858    /// regression where every line was stamped `AccountType::Asset`
859    /// regardless of code (Defect C in the 3-year medium-chain
860    /// FINDINGS doc).
861    ///
862    /// The `is_balanced` / `is_equation_valid` flags are now set to
863    /// `true` with `out_of_balance` / `equation_difference` clamped to
864    /// zero. The interim-TB shape this writer produces is "cumulative
865    /// BS positions + period-only P&L", which is the standard adjusted
866    /// TB layout but has no `Σ debits == Σ credits` invariant — that
867    /// comparison is meaningful only for a gross-flow TB built from
868    /// fully-balanced JEs over a single time window. The integrity that
869    /// IS guaranteed is the underlying per-JE balance invariant
870    /// enforced by [`datasynth_core::models::journal_entry::JournalEntry::new`].
871    /// Downstream consumers that need a real signed-equation check
872    /// (`Σ A = Σ L + Σ E + NI`) should derive it from opening balances
873    /// plus the period-only P&L lines, not from the raw debit/credit
874    /// totals stamped here.
875    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
876        let framework = &self.framework;
877        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
878        let mut total_debits = Decimal::ZERO;
879        let mut total_credits = Decimal::ZERO;
880        let lines: Vec<TrialBalanceLine> = self
881            .entries
882            .into_iter()
883            .map(|e| {
884                total_debits += e.debit_balance;
885                total_credits += e.credit_balance;
886                let category =
887                    AccountCategory::from_account_code_with_framework(&e.account_code, framework);
888                let account_type = fa.classify_account_type(&e.account_code);
889                TrialBalanceLine {
890                    account_code: e.account_code,
891                    account_description: e.account_name,
892                    category,
893                    account_type,
894                    opening_balance: Decimal::ZERO,
895                    period_debits: e.debit_balance,
896                    period_credits: e.credit_balance,
897                    closing_balance: e.debit_balance - e.credit_balance,
898                    debit_balance: e.debit_balance,
899                    credit_balance: e.credit_balance,
900                    cost_center: None,
901                    profit_center: None,
902                }
903            })
904            .collect();
905        TrialBalance {
906            trial_balance_id: format!(
907                "{company_code}-{:04}{:02}",
908                self.fiscal_year, self.fiscal_period
909            ),
910            company_code: company_code.to_string(),
911            company_name: None,
912            as_of_date: self.period_end,
913            fiscal_year: self.fiscal_year as i32,
914            fiscal_period: self.fiscal_period as u32,
915            currency: currency.to_string(),
916            balance_type: TrialBalanceType::Adjusted,
917            lines,
918            total_debits,
919            total_credits,
920            is_balanced: true,
921            out_of_balance: Decimal::ZERO,
922            is_equation_valid: true,
923            equation_difference: Decimal::ZERO,
924            category_summary: std::collections::HashMap::new(),
925            created_at: self
926                .period_start
927                .and_hms_opt(0, 0, 0)
928                .expect("midnight is a valid time"),
929            created_by: "ORCHESTRATOR".to_string(),
930            approved_by: None,
931            approved_at: None,
932            status: TrialBalanceStatus::Final,
933        }
934    }
935}
936
937/// Financial reporting snapshot (financial statements + bank reconciliations).
938#[derive(Debug, Clone, Default)]
939pub struct FinancialReportingSnapshot {
940    /// Financial statements (balance sheet, income statement, cash flow).
941    /// For multi-entity configs this includes all standalone statements.
942    pub financial_statements: Vec<FinancialStatement>,
943    /// Standalone financial statements keyed by entity code.
944    /// Each entity has its own slice of statements.
945    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
946    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
947    pub consolidated_statements: Vec<FinancialStatement>,
948    /// Consolidation schedules (one per period) showing pre/post elimination detail.
949    pub consolidation_schedules: Vec<ConsolidationSchedule>,
950    /// Bank reconciliations.
951    pub bank_reconciliations: Vec<BankReconciliation>,
952    /// Period-close trial balances (one per period).
953    pub trial_balances: Vec<PeriodTrialBalance>,
954    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
955    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
956    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
957    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
958    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
959    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
960}
961
962/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
963#[derive(Debug, Clone, Default)]
964pub struct HrSnapshot {
965    /// Payroll runs (actual data).
966    pub payroll_runs: Vec<PayrollRun>,
967    /// Payroll line items (actual data).
968    pub payroll_line_items: Vec<PayrollLineItem>,
969    /// Time entries (actual data).
970    pub time_entries: Vec<TimeEntry>,
971    /// Expense reports (actual data).
972    pub expense_reports: Vec<ExpenseReport>,
973    /// Benefit enrollments (actual data).
974    pub benefit_enrollments: Vec<BenefitEnrollment>,
975    /// Defined benefit pension plans (IAS 19 / ASC 715).
976    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
977    /// Pension obligation (DBO) roll-forwards.
978    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
979    /// Plan asset roll-forwards.
980    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
981    /// Pension disclosures.
982    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
983    /// Journal entries generated from pension expense and OCI remeasurements.
984    pub pension_journal_entries: Vec<JournalEntry>,
985    /// Stock grants (ASC 718 / IFRS 2).
986    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
987    /// Stock-based compensation period expense records.
988    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
989    /// Journal entries generated from stock-based compensation expense.
990    pub stock_comp_journal_entries: Vec<JournalEntry>,
991    /// Payroll runs.
992    pub payroll_run_count: usize,
993    /// Payroll line item count.
994    pub payroll_line_item_count: usize,
995    /// Time entry count.
996    pub time_entry_count: usize,
997    /// Expense report count.
998    pub expense_report_count: usize,
999    /// Benefit enrollment count.
1000    pub benefit_enrollment_count: usize,
1001    /// Pension plan count.
1002    pub pension_plan_count: usize,
1003    /// Stock grant count.
1004    pub stock_grant_count: usize,
1005}
1006
1007/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
1008#[derive(Debug, Clone, Default)]
1009pub struct AccountingStandardsSnapshot {
1010    /// Revenue recognition contracts (actual data).
1011    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1012    /// Impairment tests (actual data).
1013    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1014    /// Business combinations (IFRS 3 / ASC 805).
1015    pub business_combinations:
1016        Vec<datasynth_core::models::business_combination::BusinessCombination>,
1017    /// Journal entries generated from business combinations (Day 1 + amortization).
1018    pub business_combination_journal_entries: Vec<JournalEntry>,
1019    /// ECL models (IFRS 9 / ASC 326).
1020    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1021    /// ECL provision movements.
1022    pub ecl_provision_movements:
1023        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1024    /// Journal entries from ECL provision.
1025    pub ecl_journal_entries: Vec<JournalEntry>,
1026    /// Provisions (IAS 37 / ASC 450).
1027    pub provisions: Vec<datasynth_core::models::provision::Provision>,
1028    /// Provision movement roll-forwards (IAS 37 / ASC 450).
1029    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1030    /// Contingent liabilities (IAS 37 / ASC 450).
1031    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1032    /// Journal entries from provisions.
1033    pub provision_journal_entries: Vec<JournalEntry>,
1034    /// IAS 21 functional currency translation results (one per entity per period).
1035    pub currency_translation_results:
1036        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1037    /// Revenue recognition contract count.
1038    pub revenue_contract_count: usize,
1039    /// Impairment test count.
1040    pub impairment_test_count: usize,
1041    /// Business combination count.
1042    pub business_combination_count: usize,
1043    /// ECL model count.
1044    pub ecl_model_count: usize,
1045    /// Provision count.
1046    pub provision_count: usize,
1047    /// Currency translation result count (IAS 21).
1048    pub currency_translation_count: usize,
1049    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1050    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1051    /// ROU asset + lease liability details.
1052    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1053    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1054    pub fair_value_measurements:
1055        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1056    /// Framework difference records (dual-reporting only).
1057    pub framework_differences:
1058        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1059    /// Per-entity framework reconciliation (dual-reporting only).
1060    pub framework_reconciliations:
1061        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1062    /// Counts for stats logging.
1063    pub lease_count: usize,
1064    pub fair_value_measurement_count: usize,
1065    pub framework_difference_count: usize,
1066}
1067
1068/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1069#[derive(Debug, Clone, Default)]
1070pub struct ComplianceRegulationsSnapshot {
1071    /// Flattened standard records for output.
1072    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1073    /// Cross-reference records.
1074    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1075    /// Jurisdiction profile records.
1076    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1077    /// Generated audit procedures.
1078    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1079    /// Generated compliance findings.
1080    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1081    /// Generated regulatory filings.
1082    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1083    /// Compliance graph (if graph integration enabled).
1084    pub compliance_graph: Option<datasynth_graph::Graph>,
1085}
1086
1087/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1088#[derive(Debug, Clone, Default)]
1089pub struct ManufacturingSnapshot {
1090    /// Production orders (actual data).
1091    pub production_orders: Vec<ProductionOrder>,
1092    /// Quality inspections (actual data).
1093    pub quality_inspections: Vec<QualityInspection>,
1094    /// Cycle counts (actual data).
1095    pub cycle_counts: Vec<CycleCount>,
1096    /// BOM components (actual data).
1097    pub bom_components: Vec<BomComponent>,
1098    /// Inventory movements (actual data).
1099    pub inventory_movements: Vec<InventoryMovement>,
1100    /// Production order count.
1101    pub production_order_count: usize,
1102    /// Quality inspection count.
1103    pub quality_inspection_count: usize,
1104    /// Cycle count count.
1105    pub cycle_count_count: usize,
1106    /// BOM component count.
1107    pub bom_component_count: usize,
1108    /// Inventory movement count.
1109    pub inventory_movement_count: usize,
1110}
1111
1112/// Sales, KPI, and budget data snapshot.
1113#[derive(Debug, Clone, Default)]
1114pub struct SalesKpiBudgetsSnapshot {
1115    /// Sales quotes (actual data).
1116    pub sales_quotes: Vec<SalesQuote>,
1117    /// Management KPIs (actual data).
1118    pub kpis: Vec<ManagementKpi>,
1119    /// Budgets (actual data).
1120    pub budgets: Vec<Budget>,
1121    /// External expectations (ISA-520 substantive-analytics layer).
1122    pub external_expectations: Vec<ExternalExpectation>,
1123    /// Evidence anchors (ISA-505 external-corroboration layer).
1124    pub evidence_anchors: Vec<EvidenceAnchor>,
1125    /// Sales quote count.
1126    pub sales_quote_count: usize,
1127    /// Management KPI count.
1128    pub kpi_count: usize,
1129    /// Budget line count.
1130    pub budget_line_count: usize,
1131}
1132
1133/// Anomaly labels generated during injection.
1134#[derive(Debug, Clone, Default)]
1135pub struct AnomalyLabels {
1136    /// All anomaly labels.
1137    pub labels: Vec<LabeledAnomaly>,
1138    /// Summary statistics.
1139    pub summary: Option<AnomalySummary>,
1140    /// Count by anomaly type.
1141    pub by_type: HashMap<String, usize>,
1142    /// Synthetic prior-year carry-forward register (confirmed campaign counterparties) — the
1143    /// confirmation channel the memory arm consumes (§40/§59). Empty unless `fraud.campaigns
1144    /// .carry_forward` is enabled. Written to `labels/carry_forward.json`.
1145    pub carry_forward: Vec<datasynth_generators::anomaly::campaign::CarryForwardRecord>,
1146}
1147
1148/// Balance validation results from running balance tracker.
1149#[derive(Debug, Clone, Default)]
1150pub struct BalanceValidationResult {
1151    /// Whether validation was performed.
1152    pub validated: bool,
1153    /// Whether balance sheet equation is satisfied.
1154    pub is_balanced: bool,
1155    /// Number of entries processed.
1156    pub entries_processed: u64,
1157    /// Total debits across all entries.
1158    pub total_debits: rust_decimal::Decimal,
1159    /// Total credits across all entries.
1160    pub total_credits: rust_decimal::Decimal,
1161    /// Number of accounts tracked.
1162    pub accounts_tracked: usize,
1163    /// Number of companies tracked.
1164    pub companies_tracked: usize,
1165    /// Validation errors encountered.
1166    pub validation_errors: Vec<ValidationError>,
1167    /// Whether any unbalanced entries were found.
1168    pub has_unbalanced_entries: bool,
1169}
1170
1171/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1172#[derive(Debug, Clone, Default)]
1173pub struct TaxSnapshot {
1174    /// Tax jurisdictions.
1175    pub jurisdictions: Vec<TaxJurisdiction>,
1176    /// Tax codes.
1177    pub codes: Vec<TaxCode>,
1178    /// Tax lines computed on documents.
1179    pub tax_lines: Vec<TaxLine>,
1180    /// Tax returns filed per period.
1181    pub tax_returns: Vec<TaxReturn>,
1182    /// Tax provisions.
1183    pub tax_provisions: Vec<TaxProvision>,
1184    /// Withholding tax records.
1185    pub withholding_records: Vec<WithholdingTaxRecord>,
1186    /// Tax anomaly labels.
1187    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1188    /// Jurisdiction count.
1189    pub jurisdiction_count: usize,
1190    /// Code count.
1191    pub code_count: usize,
1192    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1193    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1194    /// Journal entries posting tax payable/receivable from computed tax lines.
1195    pub tax_posting_journal_entries: Vec<JournalEntry>,
1196}
1197
1198/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1199#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1200pub struct IntercompanySnapshot {
1201    /// Group ownership structure (parent/subsidiary/associate relationships).
1202    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1203    /// IC matched pairs (transaction pairs between related entities).
1204    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1205    /// IC journal entries generated from matched pairs (seller side).
1206    pub seller_journal_entries: Vec<JournalEntry>,
1207    /// IC journal entries generated from matched pairs (buyer side).
1208    pub buyer_journal_entries: Vec<JournalEntry>,
1209    /// Elimination entries for consolidation.
1210    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1211    /// NCI measurements derived from group structure ownership percentages.
1212    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1213    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1214    #[serde(skip)]
1215    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1216    /// IC matched pair count.
1217    pub matched_pair_count: usize,
1218    /// IC elimination entry count.
1219    pub elimination_entry_count: usize,
1220    /// IC matching rate (0.0 to 1.0).
1221    pub match_rate: f64,
1222}
1223
1224/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1225#[derive(Debug, Clone, Default)]
1226pub struct EsgSnapshot {
1227    /// Emission records (scope 1, 2, 3).
1228    pub emissions: Vec<EmissionRecord>,
1229    /// Energy consumption records.
1230    pub energy: Vec<EnergyConsumption>,
1231    /// Water usage records.
1232    pub water: Vec<WaterUsage>,
1233    /// Waste records.
1234    pub waste: Vec<WasteRecord>,
1235    /// Workforce diversity metrics.
1236    pub diversity: Vec<WorkforceDiversityMetric>,
1237    /// Pay equity metrics.
1238    pub pay_equity: Vec<PayEquityMetric>,
1239    /// Safety incidents.
1240    pub safety_incidents: Vec<SafetyIncident>,
1241    /// Safety metrics.
1242    pub safety_metrics: Vec<SafetyMetric>,
1243    /// Governance metrics.
1244    pub governance: Vec<GovernanceMetric>,
1245    /// Supplier ESG assessments.
1246    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1247    /// Materiality assessments.
1248    pub materiality: Vec<MaterialityAssessment>,
1249    /// ESG disclosures.
1250    pub disclosures: Vec<EsgDisclosure>,
1251    /// Climate scenarios.
1252    pub climate_scenarios: Vec<ClimateScenario>,
1253    /// ESG anomaly labels.
1254    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1255    /// Total emission record count.
1256    pub emission_count: usize,
1257    /// Total disclosure count.
1258    pub disclosure_count: usize,
1259}
1260
1261/// Treasury data snapshot (cash management, hedging, debt, pooling).
1262#[derive(Debug, Clone, Default)]
1263pub struct TreasurySnapshot {
1264    /// Cash positions (daily balances per account).
1265    pub cash_positions: Vec<CashPosition>,
1266    /// Cash forecasts.
1267    pub cash_forecasts: Vec<CashForecast>,
1268    /// Cash pools.
1269    pub cash_pools: Vec<CashPool>,
1270    /// Cash pool sweep transactions.
1271    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1272    /// Hedging instruments.
1273    pub hedging_instruments: Vec<HedgingInstrument>,
1274    /// Hedge relationships (ASC 815/IFRS 9 designations).
1275    pub hedge_relationships: Vec<HedgeRelationship>,
1276    /// Debt instruments.
1277    pub debt_instruments: Vec<DebtInstrument>,
1278    /// Bank guarantees and letters of credit.
1279    pub bank_guarantees: Vec<BankGuarantee>,
1280    /// Intercompany netting runs.
1281    pub netting_runs: Vec<NettingRun>,
1282    /// Treasury anomaly labels.
1283    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1284    /// Journal entries generated from treasury instruments (debt interest accruals,
1285    /// hedge MTM, cash pool sweeps).
1286    pub journal_entries: Vec<JournalEntry>,
1287}
1288
1289/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1290#[derive(Debug, Clone, Default)]
1291pub struct ProjectAccountingSnapshot {
1292    /// Projects with WBS hierarchies.
1293    pub projects: Vec<Project>,
1294    /// Project cost lines (linked from source documents).
1295    pub cost_lines: Vec<ProjectCostLine>,
1296    /// Revenue recognition records.
1297    pub revenue_records: Vec<ProjectRevenue>,
1298    /// Earned value metrics.
1299    pub earned_value_metrics: Vec<EarnedValueMetric>,
1300    /// Change orders.
1301    pub change_orders: Vec<ChangeOrder>,
1302    /// Project milestones.
1303    pub milestones: Vec<ProjectMilestone>,
1304}
1305
1306/// Complete result of enhanced generation run.
1307#[derive(Debug, Default)]
1308pub struct EnhancedGenerationResult {
1309    /// Generated chart of accounts.
1310    pub chart_of_accounts: ChartOfAccounts,
1311    /// Master data snapshot.
1312    pub master_data: MasterDataSnapshot,
1313    /// Document flow snapshot.
1314    pub document_flows: DocumentFlowSnapshot,
1315    /// Subledger snapshot (linked from document flows).
1316    pub subledger: SubledgerSnapshot,
1317    /// OCPM event log snapshot (if OCPM generation enabled).
1318    pub ocpm: OcpmSnapshot,
1319    /// Audit data snapshot (if audit generation enabled).
1320    pub audit: AuditSnapshot,
1321    /// Banking KYC/AML data snapshot (if banking generation enabled).
1322    pub banking: BankingSnapshot,
1323    /// Graph export snapshot (if graph export enabled).
1324    pub graph_export: GraphExportSnapshot,
1325    /// S2C sourcing data snapshot (if sourcing generation enabled).
1326    pub sourcing: SourcingSnapshot,
1327    /// Financial reporting snapshot (financial statements + bank reconciliations).
1328    pub financial_reporting: FinancialReportingSnapshot,
1329    /// HR data snapshot (payroll, time entries, expenses).
1330    pub hr: HrSnapshot,
1331    /// Accounting standards snapshot (revenue recognition, impairment).
1332    pub accounting_standards: AccountingStandardsSnapshot,
1333    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1334    pub manufacturing: ManufacturingSnapshot,
1335    /// Sales, KPI, and budget snapshot.
1336    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1337    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1338    pub tax: TaxSnapshot,
1339    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1340    pub esg: EsgSnapshot,
1341    /// Treasury data snapshot (cash management, hedging, debt).
1342    pub treasury: TreasurySnapshot,
1343    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1344    pub project_accounting: ProjectAccountingSnapshot,
1345    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1346    pub process_evolution: Vec<ProcessEvolutionEvent>,
1347    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1348    pub organizational_events: Vec<OrganizationalEvent>,
1349    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1350    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1351    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1352    pub intercompany: IntercompanySnapshot,
1353    /// Generated journal entries.
1354    pub journal_entries: Vec<JournalEntry>,
1355    /// Anomaly labels (if injection enabled).
1356    pub anomaly_labels: AnomalyLabels,
1357    /// Balance validation results (if validation enabled).
1358    pub balance_validation: BalanceValidationResult,
1359    /// Data quality statistics (if injection enabled).
1360    pub data_quality_stats: DataQualityStats,
1361    /// Data quality issue records (if injection enabled).
1362    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1363    /// Generation statistics.
1364    pub statistics: EnhancedGenerationStatistics,
1365    /// Data lineage graph (if tracking enabled).
1366    pub lineage: Option<super::lineage::LineageGraph>,
1367    /// Quality gate evaluation result.
1368    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1369    /// Internal controls (if controls generation enabled).
1370    pub internal_controls: Vec<InternalControl>,
1371    /// SoD (Segregation of Duties) violations identified during control application.
1372    ///
1373    /// Each record corresponds to a journal entry where `sod_violation == true`.
1374    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1375    /// Opening balances (if opening balance generation enabled).
1376    pub opening_balances: Vec<GeneratedOpeningBalance>,
1377    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1378    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1379    /// Counterfactual (original, mutated) JE pairs for ML training.
1380    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1381    /// Fraud red-flag indicators on P2P/O2C documents.
1382    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1383    /// Collusion rings (coordinated fraud networks).
1384    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1385    /// Bi-temporal version chains for vendor entities.
1386    pub temporal_vendor_chains:
1387        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1388    /// Entity relationship graph (nodes + edges with strength scores).
1389    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1390    /// Cross-process links (P2P ↔ O2C via inventory movements).
1391    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1392    /// Industry-specific GL accounts and metadata.
1393    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1394    /// SP5.2 — CoA semantic prior snapshot. When `Some`, `write_journal_entries_csv`
1395    /// builds a secondary lookup from the prior's 3,123 corpus accounts and uses
1396    /// it as a fallback when the synthetic CoA index misses a line's `gl_account`
1397    /// (common when SP3.7's per-source attribute conditional emits corpus account
1398    /// numbers that differ from the synthetic CoA master table's number set).
1399    pub coa_semantic_prior:
1400        Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1401    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1402    pub compliance_regulations: ComplianceRegulationsSnapshot,
1403    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1404    /// industry benchmarks, management reports, drift events). Empty
1405    /// when `analytics_metadata.enabled = false`.
1406    pub analytics_metadata: AnalyticsMetadataSnapshot,
1407    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1408    /// KS) over the generated amount distribution.  `None` when
1409    /// `distributions.validation.enabled = false`.
1410    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1411    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1412    /// customer value-segment labels, and industry-specific metadata
1413    /// populated from the previously-inert `vendor_network`,
1414    /// `customer_segmentation`, and `industry_specific` schema
1415    /// sections. Empty when those sections are disabled.
1416    pub interconnectivity: InterconnectivitySnapshot,
1417}
1418
1419/// v4.1.3+: interconnectivity snapshot. Populated when
1420/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1421/// `industry_specific.enabled` are set. Holds tier / segment / industry
1422/// labels for generated entities so downstream tooling (graph export,
1423/// risk models) can consume them without re-deriving from scratch.
1424#[derive(Debug, Clone, Default)]
1425pub struct InterconnectivitySnapshot {
1426    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1427    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1428    pub vendor_tiers: Vec<(String, u8)>,
1429    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1430    /// `"reliable_strategic" / "standard_operational" / "transactional"
1431    /// / "problematic"`.
1432    pub vendor_clusters: Vec<(String, String)>,
1433    /// `(customer_id, value_segment)` pairs where value_segment is one
1434    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1435    pub customer_value_segments: Vec<(String, String)>,
1436    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1437    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1438    /// "churned" / "won_back"`.
1439    pub customer_lifecycle_stages: Vec<(String, String)>,
1440    /// Summary: industry-specific knob applied, if any (e.g.
1441    /// `"manufacturing.bom_depth=3"`).
1442    pub industry_metadata: Vec<String>,
1443}
1444
1445/// v3.3.0: snapshot for the analytics-metadata phase.
1446#[derive(Debug, Clone, Default)]
1447pub struct AnalyticsMetadataSnapshot {
1448    /// Prior-year comparative balances per account, per entity.
1449    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1450    /// Industry benchmarks for the configured industry.
1451    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1452    /// Management-report artefacts (dashboards, MDA sections).
1453    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1454    /// Drift-event labels emitted from the post-generation sweep.
1455    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1456}
1457
1458/// Enhanced statistics about a generation run.
1459#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1460pub struct EnhancedGenerationStatistics {
1461    /// Total journal entries generated.
1462    pub total_entries: u64,
1463    /// Total line items generated.
1464    pub total_line_items: u64,
1465    /// Number of accounts in CoA.
1466    pub accounts_count: usize,
1467    /// Number of companies.
1468    pub companies_count: usize,
1469    /// Period in months.
1470    pub period_months: u32,
1471    /// Master data counts.
1472    pub vendor_count: usize,
1473    pub customer_count: usize,
1474    pub material_count: usize,
1475    pub asset_count: usize,
1476    pub employee_count: usize,
1477    /// Document flow counts.
1478    pub p2p_chain_count: usize,
1479    pub o2c_chain_count: usize,
1480    /// Subledger counts.
1481    pub ap_invoice_count: usize,
1482    pub ar_invoice_count: usize,
1483    /// OCPM counts.
1484    pub ocpm_event_count: usize,
1485    pub ocpm_object_count: usize,
1486    pub ocpm_case_count: usize,
1487    /// Audit counts.
1488    pub audit_engagement_count: usize,
1489    pub audit_workpaper_count: usize,
1490    pub audit_evidence_count: usize,
1491    pub audit_risk_count: usize,
1492    pub audit_finding_count: usize,
1493    pub audit_judgment_count: usize,
1494    /// ISA 505 confirmation counts.
1495    #[serde(default)]
1496    pub audit_confirmation_count: usize,
1497    #[serde(default)]
1498    pub audit_confirmation_response_count: usize,
1499    /// ISA 330/530 procedure step and sample counts.
1500    #[serde(default)]
1501    pub audit_procedure_step_count: usize,
1502    #[serde(default)]
1503    pub audit_sample_count: usize,
1504    /// ISA 520 analytical procedure counts.
1505    #[serde(default)]
1506    pub audit_analytical_result_count: usize,
1507    /// ISA 610 internal audit counts.
1508    #[serde(default)]
1509    pub audit_ia_function_count: usize,
1510    #[serde(default)]
1511    pub audit_ia_report_count: usize,
1512    /// ISA 550 related party counts.
1513    #[serde(default)]
1514    pub audit_related_party_count: usize,
1515    #[serde(default)]
1516    pub audit_related_party_transaction_count: usize,
1517    /// Anomaly counts.
1518    pub anomalies_injected: usize,
1519    /// Data quality issue counts.
1520    pub data_quality_issues: usize,
1521    /// Banking counts.
1522    pub banking_customer_count: usize,
1523    pub banking_account_count: usize,
1524    pub banking_transaction_count: usize,
1525    pub banking_suspicious_count: usize,
1526    /// Graph export counts.
1527    pub graph_export_count: usize,
1528    pub graph_node_count: usize,
1529    pub graph_edge_count: usize,
1530    /// LLM enrichment timing (milliseconds).
1531    #[serde(default)]
1532    pub llm_enrichment_ms: u64,
1533    /// Number of vendor names enriched by LLM.
1534    #[serde(default)]
1535    pub llm_vendors_enriched: usize,
1536    /// v4.1.1+: number of customer names enriched by LLM.
1537    #[serde(default)]
1538    pub llm_customers_enriched: usize,
1539    /// v4.1.1+: number of material descriptions enriched by LLM.
1540    #[serde(default)]
1541    pub llm_materials_enriched: usize,
1542    /// v4.1.1+: number of audit finding titles enriched by LLM.
1543    #[serde(default)]
1544    pub llm_findings_enriched: usize,
1545    /// Diffusion enhancement timing (milliseconds).
1546    #[serde(default)]
1547    pub diffusion_enhancement_ms: u64,
1548    /// Number of diffusion samples generated.
1549    #[serde(default)]
1550    pub diffusion_samples_generated: usize,
1551    /// Hybrid-diffusion blend weight actually applied (after clamp to \[0,1\]).
1552    /// `None` when the neural/hybrid backend is not active.
1553    #[serde(default, skip_serializing_if = "Option::is_none")]
1554    pub neural_hybrid_weight: Option<f64>,
1555    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1556    #[serde(default, skip_serializing_if = "Option::is_none")]
1557    pub neural_hybrid_strategy: Option<String>,
1558    /// How many columns were routed through the neural backend.
1559    #[serde(default, skip_serializing_if = "Option::is_none")]
1560    pub neural_routed_column_count: Option<usize>,
1561    /// Causal generation timing (milliseconds).
1562    #[serde(default)]
1563    pub causal_generation_ms: u64,
1564    /// Number of causal samples generated.
1565    #[serde(default)]
1566    pub causal_samples_generated: usize,
1567    /// Whether causal validation passed.
1568    #[serde(default)]
1569    pub causal_validation_passed: Option<bool>,
1570    /// S2C sourcing counts.
1571    #[serde(default)]
1572    pub sourcing_project_count: usize,
1573    #[serde(default)]
1574    pub rfx_event_count: usize,
1575    #[serde(default)]
1576    pub bid_count: usize,
1577    #[serde(default)]
1578    pub contract_count: usize,
1579    #[serde(default)]
1580    pub catalog_item_count: usize,
1581    #[serde(default)]
1582    pub scorecard_count: usize,
1583    /// Financial reporting counts.
1584    #[serde(default)]
1585    pub financial_statement_count: usize,
1586    #[serde(default)]
1587    pub bank_reconciliation_count: usize,
1588    /// HR counts.
1589    #[serde(default)]
1590    pub payroll_run_count: usize,
1591    #[serde(default)]
1592    pub time_entry_count: usize,
1593    #[serde(default)]
1594    pub expense_report_count: usize,
1595    #[serde(default)]
1596    pub benefit_enrollment_count: usize,
1597    #[serde(default)]
1598    pub pension_plan_count: usize,
1599    #[serde(default)]
1600    pub stock_grant_count: usize,
1601    /// Accounting standards counts.
1602    #[serde(default)]
1603    pub revenue_contract_count: usize,
1604    #[serde(default)]
1605    pub impairment_test_count: usize,
1606    #[serde(default)]
1607    pub business_combination_count: usize,
1608    #[serde(default)]
1609    pub ecl_model_count: usize,
1610    #[serde(default)]
1611    pub provision_count: usize,
1612    /// Manufacturing counts.
1613    #[serde(default)]
1614    pub production_order_count: usize,
1615    #[serde(default)]
1616    pub quality_inspection_count: usize,
1617    #[serde(default)]
1618    pub cycle_count_count: usize,
1619    #[serde(default)]
1620    pub bom_component_count: usize,
1621    #[serde(default)]
1622    pub inventory_movement_count: usize,
1623    /// Sales & reporting counts.
1624    #[serde(default)]
1625    pub sales_quote_count: usize,
1626    #[serde(default)]
1627    pub kpi_count: usize,
1628    #[serde(default)]
1629    pub budget_line_count: usize,
1630    /// Tax counts.
1631    #[serde(default)]
1632    pub tax_jurisdiction_count: usize,
1633    #[serde(default)]
1634    pub tax_code_count: usize,
1635    /// ESG counts.
1636    #[serde(default)]
1637    pub esg_emission_count: usize,
1638    #[serde(default)]
1639    pub esg_disclosure_count: usize,
1640    /// Intercompany counts.
1641    #[serde(default)]
1642    pub ic_matched_pair_count: usize,
1643    #[serde(default)]
1644    pub ic_elimination_count: usize,
1645    /// Number of intercompany journal entries (seller + buyer side).
1646    #[serde(default)]
1647    pub ic_transaction_count: usize,
1648    /// Number of fixed asset subledger records.
1649    #[serde(default)]
1650    pub fa_subledger_count: usize,
1651    /// Number of inventory subledger records.
1652    #[serde(default)]
1653    pub inventory_subledger_count: usize,
1654    /// Treasury debt instrument count.
1655    #[serde(default)]
1656    pub treasury_debt_instrument_count: usize,
1657    /// Treasury hedging instrument count.
1658    #[serde(default)]
1659    pub treasury_hedging_instrument_count: usize,
1660    /// Project accounting project count.
1661    #[serde(default)]
1662    pub project_count: usize,
1663    /// Project accounting change order count.
1664    #[serde(default)]
1665    pub project_change_order_count: usize,
1666    /// Tax provision count.
1667    #[serde(default)]
1668    pub tax_provision_count: usize,
1669    /// Opening balance count.
1670    #[serde(default)]
1671    pub opening_balance_count: usize,
1672    /// Subledger reconciliation count.
1673    #[serde(default)]
1674    pub subledger_reconciliation_count: usize,
1675    /// Tax line count.
1676    #[serde(default)]
1677    pub tax_line_count: usize,
1678    /// Project cost line count.
1679    #[serde(default)]
1680    pub project_cost_line_count: usize,
1681    /// Cash position count.
1682    #[serde(default)]
1683    pub cash_position_count: usize,
1684    /// Cash forecast count.
1685    #[serde(default)]
1686    pub cash_forecast_count: usize,
1687    /// Cash pool count.
1688    #[serde(default)]
1689    pub cash_pool_count: usize,
1690    /// Process evolution event count.
1691    #[serde(default)]
1692    pub process_evolution_event_count: usize,
1693    /// Organizational event count.
1694    #[serde(default)]
1695    pub organizational_event_count: usize,
1696    /// Counterfactual pair count.
1697    #[serde(default)]
1698    pub counterfactual_pair_count: usize,
1699    /// Number of fraud red-flag indicators generated.
1700    #[serde(default)]
1701    pub red_flag_count: usize,
1702    /// Number of collusion rings generated.
1703    #[serde(default)]
1704    pub collusion_ring_count: usize,
1705    /// Number of bi-temporal vendor version chains generated.
1706    #[serde(default)]
1707    pub temporal_version_chain_count: usize,
1708    /// Number of nodes in the entity relationship graph.
1709    #[serde(default)]
1710    pub entity_relationship_node_count: usize,
1711    /// Number of edges in the entity relationship graph.
1712    #[serde(default)]
1713    pub entity_relationship_edge_count: usize,
1714    /// Number of cross-process links generated.
1715    #[serde(default)]
1716    pub cross_process_link_count: usize,
1717    /// Number of disruption events generated.
1718    #[serde(default)]
1719    pub disruption_event_count: usize,
1720    /// Number of industry-specific GL accounts generated.
1721    #[serde(default)]
1722    pub industry_gl_account_count: usize,
1723    /// Number of period-close journal entries generated (tax provision + closing entries).
1724    #[serde(default)]
1725    pub period_close_je_count: usize,
1726}
1727
1728/// Enhanced orchestrator with full feature integration.
1729pub struct EnhancedOrchestrator {
1730    config: GeneratorConfig,
1731    phase_config: PhaseConfig,
1732    coa: Option<Arc<ChartOfAccounts>>,
1733    master_data: MasterDataSnapshot,
1734    seed: u64,
1735    multi_progress: Option<MultiProgress>,
1736    /// Resource guard for memory, disk, and CPU monitoring
1737    resource_guard: ResourceGuard,
1738    /// Output path for disk space monitoring
1739    output_path: Option<PathBuf>,
1740    /// Copula generators for preserving correlations (from fingerprint)
1741    copula_generators: Vec<CopulaGeneratorSpec>,
1742    /// Country pack registry for localized data generation
1743    country_pack_registry: datasynth_core::CountryPackRegistry,
1744    /// Optional streaming sink for phase-by-phase output
1745    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1746    /// Shared template provider for user-supplied template packs.
1747    ///
1748    /// Constructed from `config.templates.path` at orchestrator creation
1749    /// time. When the path is `None`, this is still populated with an
1750    /// embedded-only provider so generators can always call trait methods
1751    /// without an `Option<…>` guard. v3.2.0+.
1752    template_provider: datasynth_core::templates::SharedTemplateProvider,
1753    /// v3.4.1+ temporal context for business-day / holiday awareness.
1754    ///
1755    /// Populated only when `temporal_patterns.business_days.enabled`. When
1756    /// `None`, document-flow / HR / treasury / period-close generators keep
1757    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1758    /// for the same seed).
1759    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1760    /// Optional shard-mode context (set by group-engine shard runners).
1761    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1762    shard_context: Option<crate::shard_context::ShardContext>,
1763    /// SP3.12 — cached priors, shared between `generate_journal_entries` (which
1764    /// loads them) and `generate_jes_from_document_flows` (which applies padding).
1765    /// Set once after the SP3 opt-in block in `generate_journal_entries`.
1766    cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1767}
1768
1769impl EnhancedOrchestrator {
1770    /// Create a new enhanced orchestrator.
1771    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1772        datasynth_config::validate_config(&config)?;
1773
1774        let seed = config.global.seed.unwrap_or_else(rand::random);
1775
1776        // Build resource guard from config
1777        let resource_guard = Self::build_resource_guard(&config, None);
1778
1779        // Build country pack registry from config
1780        let country_pack_registry = match &config.country_packs {
1781            Some(cp) => {
1782                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1783                    .map_err(|e| SynthError::config(e.to_string()))?
1784            }
1785            None => datasynth_core::CountryPackRegistry::builtin_only()
1786                .map_err(|e| SynthError::config(e.to_string()))?,
1787        };
1788
1789        // Build the shared template provider from config.templates.path.
1790        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1791        // `Some(path)` → load file/dir and honour `merge_strategy`.
1792        let template_provider = Self::build_template_provider(&config)?;
1793
1794        // v3.4.1: build a shared temporal context when
1795        // `temporal_patterns.business_days.enabled`. `None` preserves the
1796        // raw-RNG date-offset behaviour per-generator.
1797        let temporal_context = Self::build_temporal_context(&config)?;
1798
1799        Ok(Self {
1800            config,
1801            phase_config,
1802            coa: None,
1803            master_data: MasterDataSnapshot::default(),
1804            seed,
1805            multi_progress: None,
1806            resource_guard,
1807            output_path: None,
1808            copula_generators: Vec::new(),
1809            country_pack_registry,
1810            phase_sink: None,
1811            template_provider,
1812            temporal_context,
1813            shard_context: None,
1814            cached_priors: None,
1815        })
1816    }
1817
1818    /// Install shard-mode context.  Called by the group shard runner
1819    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1820    /// entry point).  Has no effect on single-entity runs.
1821    ///
1822    /// See [`crate::shard_context::ShardContext`] for rationale.
1823    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1824        self.shard_context = Some(ctx);
1825    }
1826
1827    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1828    ///
1829    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1830    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1831    /// enabled. Returns `Err` only for unrecoverable config errors.
1832    fn build_temporal_context(
1833        config: &GeneratorConfig,
1834    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1835        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1836
1837        let tp = &config.temporal_patterns;
1838        if !tp.enabled || !tp.business_days.enabled {
1839            return Ok(None);
1840        }
1841
1842        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1843            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1844        let end_date = start_date + chrono::Months::new(config.global.period_months);
1845
1846        let region_code = tp
1847            .calendars
1848            .regions
1849            .first()
1850            .cloned()
1851            .unwrap_or_else(|| "US".to_string());
1852        let region = parse_region_code(&region_code);
1853
1854        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1855    }
1856
1857    /// Build the shared template provider from `config.templates`.
1858    ///
1859    /// Always returns a provider — falls back to embedded-only when
1860    /// `config.templates.path` is `None`. The merge-strategy from config
1861    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1862    /// orchestrator-construction time are fatal (preferable to silently
1863    /// using embedded pools when the user supplied a bad path).
1864    fn build_template_provider(
1865        config: &GeneratorConfig,
1866    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1867        use datasynth_core::templates::{
1868            loader::{MergeStrategy, TemplateLoader},
1869            DefaultTemplateProvider,
1870        };
1871        use std::sync::Arc;
1872
1873        let provider = match &config.templates.path {
1874            None => DefaultTemplateProvider::new(),
1875            Some(path) => {
1876                let data = if path.is_dir() {
1877                    TemplateLoader::load_from_directory(path)
1878                } else {
1879                    TemplateLoader::load_from_file(path)
1880                }
1881                .map_err(|e| {
1882                    SynthError::config(format!(
1883                        "Failed to load templates from {}: {e}",
1884                        path.display()
1885                    ))
1886                })?;
1887                let strategy = match config.templates.merge_strategy {
1888                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1889                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1890                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1891                        MergeStrategy::MergePreferFile
1892                    }
1893                };
1894                DefaultTemplateProvider::with_templates(data, strategy)
1895            }
1896        };
1897        Ok(Arc::new(provider))
1898    }
1899
1900    /// Create with default phase config.
1901    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1902        Self::new(config, PhaseConfig::default())
1903    }
1904
1905    /// Set a streaming phase sink for real-time output (builder pattern).
1906    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1907        self.phase_sink = Some(sink);
1908        self
1909    }
1910
1911    /// Set a streaming phase sink on an existing orchestrator.
1912    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1913        self.phase_sink = Some(sink);
1914    }
1915
1916    /// Emit a batch of items to the phase sink (if configured).
1917    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1918        if let Some(ref sink) = self.phase_sink {
1919            for item in items {
1920                if let Ok(value) = serde_json::to_value(item) {
1921                    if let Err(e) = sink.emit(phase, type_name, &value) {
1922                        warn!(
1923                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1924                        );
1925                    }
1926                }
1927            }
1928            if let Err(e) = sink.phase_complete(phase) {
1929                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1930            }
1931        }
1932    }
1933
1934    /// Enable/disable progress bars.
1935    pub fn with_progress(mut self, show: bool) -> Self {
1936        self.phase_config.show_progress = show;
1937        if show {
1938            self.multi_progress = Some(MultiProgress::new());
1939        }
1940        self
1941    }
1942
1943    /// Set the output path for disk space monitoring.
1944    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1945        let path = path.into();
1946        self.output_path = Some(path.clone());
1947        // Rebuild resource guard with the output path
1948        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1949        self
1950    }
1951
1952    /// Access the country pack registry.
1953    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1954        &self.country_pack_registry
1955    }
1956
1957    /// Look up a country pack by country code string.
1958    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1959        self.country_pack_registry.get_by_str(country)
1960    }
1961
1962    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1963    /// company, defaulting to `"US"` if no companies are configured.
1964    fn primary_country_code(&self) -> &str {
1965        self.config
1966            .companies
1967            .first()
1968            .map(|c| c.country.as_str())
1969            .unwrap_or("US")
1970    }
1971
1972    /// Resolve the country pack for the primary (first) company.
1973    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1974        self.country_pack_for(self.primary_country_code())
1975    }
1976
1977    /// Resolve the CoA framework from config/country-pack.
1978    fn resolve_coa_framework(&self) -> CoAFramework {
1979        if self.config.accounting_standards.enabled {
1980            match self.config.accounting_standards.framework {
1981                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1982                    return CoAFramework::FrenchPcg;
1983                }
1984                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1985                    return CoAFramework::GermanSkr04;
1986                }
1987                _ => {}
1988            }
1989        }
1990        // Fallback: derive from country pack
1991        let pack = self.primary_pack();
1992        match pack.accounting.framework.as_str() {
1993            "french_gaap" => CoAFramework::FrenchPcg,
1994            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1995            _ => CoAFramework::UsGaap,
1996        }
1997    }
1998
1999    /// Resolve the framework string consumed by
2000    /// [`datasynth_core::framework_accounts::FrameworkAccounts::for_framework`].
2001    ///
2002    /// Mirrors [`Self::resolve_coa_framework`] but returns the snake_case
2003    /// label (`"us_gaap"`, `"ifrs"`, `"french_gaap"`, `"german_gaap"`,
2004    /// `"dual_reporting"`) that the framework-aware account classifier
2005    /// expects. Country drives selection because the country pack's CoA
2006    /// loader is what actually picks the numbering convention (SKR04 for
2007    /// DE, PCG for FR) — the entity's `accounting_framework` label can
2008    /// disagree with the chart it's posted against (e.g. a DE entity
2009    /// flagged `accounting_framework: ifrs` still gets SKR04 codes from
2010    /// its country pack).
2011    fn resolve_framework_str(&self) -> &'static str {
2012        // Country first — the chart of accounts loaded for this company
2013        // is keyed by country pack, so the code numbering convention
2014        // follows country, not the framework label.
2015        match self.primary_country_code().to_ascii_uppercase().as_str() {
2016            "DE" | "AT" => "german_gaap",
2017            "FR" | "BE" | "LU" => "french_gaap",
2018            _ => {
2019                // No country override → take the framework label.
2020                if self.config.accounting_standards.enabled {
2021                    match self.config.accounting_standards.framework {
2022                        Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2023                            return "french_gaap";
2024                        }
2025                        Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2026                            return "german_gaap";
2027                        }
2028                        Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2029                            return "ifrs";
2030                        }
2031                        Some(
2032                            datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2033                        ) => {
2034                            return "dual_reporting";
2035                        }
2036                        Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2037                        | None => {}
2038                    }
2039                }
2040                "us_gaap"
2041            }
2042        }
2043    }
2044
2045    /// Check if copula generators are available.
2046    ///
2047    /// Returns true if the orchestrator has copula generators for preserving
2048    /// correlations (typically from fingerprint-based generation).
2049    pub fn has_copulas(&self) -> bool {
2050        !self.copula_generators.is_empty()
2051    }
2052
2053    /// Get the copula generators.
2054    ///
2055    /// Returns a reference to the copula generators for use during generation.
2056    /// These can be used to generate correlated samples that preserve the
2057    /// statistical relationships from the source data.
2058    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2059        &self.copula_generators
2060    }
2061
2062    /// Get a mutable reference to the copula generators.
2063    ///
2064    /// Allows generators to sample from copulas during data generation.
2065    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2066        &mut self.copula_generators
2067    }
2068
2069    /// Sample correlated values from a named copula.
2070    ///
2071    /// Returns None if the copula doesn't exist.
2072    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2073        self.copula_generators
2074            .iter_mut()
2075            .find(|c| c.name == copula_name)
2076            .map(|c| c.generator.sample())
2077    }
2078
2079    /// Create an orchestrator from a fingerprint file.
2080    ///
2081    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
2082    /// and creates an orchestrator configured to generate data matching
2083    /// the statistical properties of the original data.
2084    ///
2085    /// # Arguments
2086    /// * `fingerprint_path` - Path to the .dsf fingerprint file
2087    /// * `phase_config` - Phase configuration for generation
2088    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2089    ///
2090    /// # Example
2091    /// ```no_run
2092    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
2093    /// use std::path::Path;
2094    ///
2095    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
2096    ///     Path::new("fingerprint.dsf"),
2097    ///     PhaseConfig::default(),
2098    ///     1.0,
2099    /// ).unwrap();
2100    /// ```
2101    pub fn from_fingerprint(
2102        fingerprint_path: &std::path::Path,
2103        phase_config: PhaseConfig,
2104        scale: f64,
2105    ) -> SynthResult<Self> {
2106        info!("Loading fingerprint from: {}", fingerprint_path.display());
2107
2108        // Read the fingerprint
2109        let reader = FingerprintReader::new();
2110        let fingerprint = reader
2111            .read_from_file(fingerprint_path)
2112            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2113
2114        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2115    }
2116
2117    /// Create an orchestrator from a loaded fingerprint.
2118    ///
2119    /// # Arguments
2120    /// * `fingerprint` - The loaded fingerprint
2121    /// * `phase_config` - Phase configuration for generation
2122    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2123    pub fn from_fingerprint_data(
2124        fingerprint: Fingerprint,
2125        phase_config: PhaseConfig,
2126        scale: f64,
2127    ) -> SynthResult<Self> {
2128        info!(
2129            "Synthesizing config from fingerprint (version: {}, tables: {})",
2130            fingerprint.manifest.version,
2131            fingerprint.schema.tables.len()
2132        );
2133
2134        // Generate a seed for the synthesis
2135        let seed: u64 = rand::random();
2136        info!("Fingerprint synthesis seed: {}", seed);
2137
2138        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2139        let options = SynthesisOptions {
2140            scale,
2141            seed: Some(seed),
2142            preserve_correlations: true,
2143            inject_anomalies: true,
2144        };
2145        let synthesizer = ConfigSynthesizer::with_options(options);
2146
2147        // Synthesize full result including copula generators
2148        let synthesis_result = synthesizer
2149            .synthesize_full(&fingerprint, seed)
2150            .map_err(|e| {
2151                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2152            })?;
2153
2154        // Start with a base config from the fingerprint's industry if available
2155        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2156            Self::base_config_for_industry(industry)
2157        } else {
2158            Self::base_config_for_industry("manufacturing")
2159        };
2160
2161        // Apply the synthesized patches
2162        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2163
2164        // Log synthesis results
2165        info!(
2166            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2167            fingerprint.schema.tables.len(),
2168            scale,
2169            synthesis_result.copula_generators.len()
2170        );
2171
2172        if !synthesis_result.copula_generators.is_empty() {
2173            for spec in &synthesis_result.copula_generators {
2174                info!(
2175                    "  Copula '{}' for table '{}': {} columns",
2176                    spec.name,
2177                    spec.table,
2178                    spec.columns.len()
2179                );
2180            }
2181        }
2182
2183        // Create the orchestrator with the synthesized config
2184        let mut orchestrator = Self::new(config, phase_config)?;
2185
2186        // Store copula generators for use during generation
2187        orchestrator.copula_generators = synthesis_result.copula_generators;
2188
2189        Ok(orchestrator)
2190    }
2191
2192    /// Create a base config for a given industry.
2193    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2194        use datasynth_config::presets::create_preset;
2195        use datasynth_config::TransactionVolume;
2196        use datasynth_core::models::{CoAComplexity, IndustrySector};
2197
2198        let sector = match industry.to_lowercase().as_str() {
2199            "manufacturing" => IndustrySector::Manufacturing,
2200            "retail" => IndustrySector::Retail,
2201            "financial" | "financial_services" => IndustrySector::FinancialServices,
2202            "healthcare" => IndustrySector::Healthcare,
2203            "technology" | "tech" => IndustrySector::Technology,
2204            _ => IndustrySector::Manufacturing,
2205        };
2206
2207        // Create a preset with reasonable defaults
2208        create_preset(
2209            sector,
2210            1,  // company count
2211            12, // period months
2212            CoAComplexity::Medium,
2213            TransactionVolume::TenK,
2214        )
2215    }
2216
2217    /// Apply a config patch to a GeneratorConfig.
2218    fn apply_config_patch(
2219        mut config: GeneratorConfig,
2220        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2221    ) -> GeneratorConfig {
2222        use datasynth_fingerprint::synthesis::ConfigValue;
2223
2224        for (key, value) in patch.values() {
2225            match (key.as_str(), value) {
2226                // Transaction count is handled via TransactionVolume enum on companies
2227                // Log it but cannot directly set it (would need to modify company volumes)
2228                ("transactions.count", ConfigValue::Integer(n)) => {
2229                    info!(
2230                        "Fingerprint suggests {} transactions (apply via company volumes)",
2231                        n
2232                    );
2233                }
2234                ("global.period_months", ConfigValue::Integer(n)) => {
2235                    config.global.period_months = (*n).clamp(1, 120) as u32;
2236                }
2237                ("global.start_date", ConfigValue::String(s)) => {
2238                    config.global.start_date = s.clone();
2239                }
2240                ("global.seed", ConfigValue::Integer(n)) => {
2241                    config.global.seed = Some(*n as u64);
2242                }
2243                ("fraud.enabled", ConfigValue::Bool(b)) => {
2244                    config.fraud.enabled = *b;
2245                }
2246                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2247                    config.fraud.fraud_rate = *f;
2248                }
2249                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2250                    config.data_quality.enabled = *b;
2251                }
2252                // Handle anomaly injection paths (mapped to fraud config)
2253                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2254                    config.fraud.enabled = *b;
2255                }
2256                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2257                    config.fraud.fraud_rate = *f;
2258                }
2259                _ => {
2260                    debug!("Ignoring unknown config patch key: {}", key);
2261                }
2262            }
2263        }
2264
2265        config
2266    }
2267
2268    /// Build a resource guard from the configuration.
2269    fn build_resource_guard(
2270        config: &GeneratorConfig,
2271        output_path: Option<PathBuf>,
2272    ) -> ResourceGuard {
2273        let mut builder = ResourceGuardBuilder::new();
2274
2275        // Configure memory limit if set
2276        if config.global.memory_limit_mb > 0 {
2277            builder = builder.memory_limit(config.global.memory_limit_mb);
2278        }
2279
2280        // Configure disk monitoring for output path
2281        if let Some(path) = output_path {
2282            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2283        }
2284
2285        // Use conservative degradation settings for production safety
2286        builder = builder.conservative();
2287
2288        builder.build()
2289    }
2290
2291    /// Check resources (memory, disk, CPU) and return degradation level.
2292    ///
2293    /// Returns an error if hard limits are exceeded.
2294    /// Returns Ok(DegradationLevel) indicating current resource state.
2295    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2296        self.resource_guard.check()
2297    }
2298
2299    /// Check resources with logging.
2300    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2301        let level = self.resource_guard.check()?;
2302
2303        if level != DegradationLevel::Normal {
2304            warn!(
2305                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2306                phase,
2307                level,
2308                self.resource_guard.current_memory_mb(),
2309                self.resource_guard.available_disk_mb()
2310            );
2311        }
2312
2313        Ok(level)
2314    }
2315
2316    /// Get current degradation actions based on resource state.
2317    fn get_degradation_actions(&self) -> DegradationActions {
2318        self.resource_guard.get_actions()
2319    }
2320
2321    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2322    fn check_memory_limit(&self) -> SynthResult<()> {
2323        self.check_resources()?;
2324        Ok(())
2325    }
2326
2327    /// Run the complete generation workflow.
2328    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2329        info!("Starting enhanced generation workflow");
2330        info!(
2331            "Config: industry={:?}, period_months={}, companies={}",
2332            self.config.global.industry,
2333            self.config.global.period_months,
2334            self.config.companies.len()
2335        );
2336
2337        // Set decimal serialization mode (thread-local, affects JSON output).
2338        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2339        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2340        datasynth_core::serde_decimal::set_numeric_native(is_native);
2341        struct NumericModeGuard;
2342        impl Drop for NumericModeGuard {
2343            fn drop(&mut self) {
2344                datasynth_core::serde_decimal::set_numeric_native(false);
2345            }
2346        }
2347        let _numeric_guard = if is_native {
2348            Some(NumericModeGuard)
2349        } else {
2350            None
2351        };
2352
2353        // Initial resource check before starting
2354        let initial_level = self.check_resources_with_log("initial")?;
2355        if initial_level == DegradationLevel::Emergency {
2356            return Err(SynthError::resource(
2357                "Insufficient resources to start generation",
2358            ));
2359        }
2360
2361        let mut stats = EnhancedGenerationStatistics {
2362            companies_count: self.config.companies.len(),
2363            period_months: self.config.global.period_months,
2364            ..Default::default()
2365        };
2366
2367        // Phase 1: Chart of Accounts
2368        let coa = self.phase_chart_of_accounts(&mut stats)?;
2369
2370        // Phase 2: Master Data
2371        self.phase_master_data(&mut stats)?;
2372
2373        // Emit master data to stream sink
2374        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2375        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2376        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2377
2378        // Phase 3: Document Flows + Subledger Linking
2379        let (mut document_flows, mut subledger, fa_journal_entries) =
2380            self.phase_document_flows(&mut stats)?;
2381
2382        // Emit document flows to stream sink
2383        self.emit_phase_items(
2384            "document_flows",
2385            "PurchaseOrder",
2386            &document_flows.purchase_orders,
2387        );
2388        self.emit_phase_items(
2389            "document_flows",
2390            "GoodsReceipt",
2391            &document_flows.goods_receipts,
2392        );
2393        self.emit_phase_items(
2394            "document_flows",
2395            "VendorInvoice",
2396            &document_flows.vendor_invoices,
2397        );
2398        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2399        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2400
2401        // Phase 3b: Opening Balances (before JE generation)
2402        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2403
2404        // Phase 3c: Convert opening balances to journal entries and prepend them.
2405        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2406        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2407        // balance map type.
2408        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2409            .iter()
2410            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2411            .collect();
2412        if !opening_balance_jes.is_empty() {
2413            debug!(
2414                "Prepending {} opening balance JEs to entries",
2415                opening_balance_jes.len()
2416            );
2417        }
2418
2419        // Phase 4: Journal Entries
2420        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2421
2422        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2423        // starts from the correct initial state.
2424        if !opening_balance_jes.is_empty() {
2425            let mut combined = opening_balance_jes;
2426            combined.extend(entries);
2427            entries = combined;
2428        }
2429
2430        // Phase 4c: Append FA acquisition journal entries to main entries
2431        if !fa_journal_entries.is_empty() {
2432            debug!(
2433                "Appending {} FA acquisition JEs to main entries",
2434                fa_journal_entries.len()
2435            );
2436            entries.extend(fa_journal_entries);
2437        }
2438
2439        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2440        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2441
2442        // Get current degradation actions for optional phases
2443        let actions = self.get_degradation_actions();
2444
2445        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2446        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2447
2448        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2449        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2450        if !sourcing.contracts.is_empty() {
2451            let mut linked_count = 0usize;
2452            // Collect (vendor_id, po_id) pairs from P2P chains
2453            let po_vendor_pairs: Vec<(String, String)> = document_flows
2454                .p2p_chains
2455                .iter()
2456                .map(|chain| {
2457                    (
2458                        chain.purchase_order.vendor_id.clone(),
2459                        chain.purchase_order.header.document_id.clone(),
2460                    )
2461                })
2462                .collect();
2463
2464            for chain in &mut document_flows.p2p_chains {
2465                if chain.purchase_order.contract_id.is_none() {
2466                    if let Some(contract) = sourcing
2467                        .contracts
2468                        .iter()
2469                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2470                    {
2471                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2472                        linked_count += 1;
2473                    }
2474                }
2475            }
2476
2477            // Populate reverse FK: purchase_order_ids on each contract
2478            for contract in &mut sourcing.contracts {
2479                let po_ids: Vec<String> = po_vendor_pairs
2480                    .iter()
2481                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2482                    .map(|(_, po_id)| po_id.clone())
2483                    .collect();
2484                if !po_ids.is_empty() {
2485                    contract.purchase_order_ids = po_ids;
2486                }
2487            }
2488
2489            if linked_count > 0 {
2490                debug!(
2491                    "Linked {} purchase orders to S2C contracts by vendor match",
2492                    linked_count
2493                );
2494            }
2495        }
2496
2497        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2498        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2499
2500        // Phase 5c: Append IC journal entries to main entries
2501        if !intercompany.seller_journal_entries.is_empty()
2502            || !intercompany.buyer_journal_entries.is_empty()
2503        {
2504            let ic_je_count = intercompany.seller_journal_entries.len()
2505                + intercompany.buyer_journal_entries.len();
2506            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2507            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2508            debug!(
2509                "Appended {} IC journal entries to main entries",
2510                ic_je_count
2511            );
2512        }
2513
2514        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2515        if !intercompany.elimination_entries.is_empty() {
2516            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2517                &intercompany.elimination_entries,
2518            );
2519            if !elim_jes.is_empty() {
2520                debug!(
2521                    "Appended {} elimination journal entries to main entries",
2522                    elim_jes.len()
2523                );
2524                // IC elimination net-zero assertion (v2.5 hardening)
2525                let elim_debit: rust_decimal::Decimal =
2526                    elim_jes.iter().map(|je| je.total_debit()).sum();
2527                let elim_credit: rust_decimal::Decimal =
2528                    elim_jes.iter().map(|je| je.total_credit()).sum();
2529                let elim_diff = (elim_debit - elim_credit).abs();
2530                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2531                if elim_diff > tolerance {
2532                    return Err(datasynth_core::error::SynthError::generation(format!(
2533                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2534                        elim_debit, elim_credit, elim_diff, tolerance
2535                    )));
2536                }
2537                debug!(
2538                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2539                    elim_debit, elim_credit, elim_diff
2540                );
2541                entries.extend(elim_jes);
2542            }
2543        }
2544
2545        // Phase 5e: Wire IC source documents into document flow snapshot
2546        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2547            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2548                document_flows
2549                    .customer_invoices
2550                    .extend(ic_docs.seller_invoices.iter().cloned());
2551                document_flows
2552                    .purchase_orders
2553                    .extend(ic_docs.buyer_orders.iter().cloned());
2554                document_flows
2555                    .goods_receipts
2556                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2557                document_flows
2558                    .vendor_invoices
2559                    .extend(ic_docs.buyer_invoices.iter().cloned());
2560                debug!(
2561                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2562                    ic_docs.seller_invoices.len(),
2563                    ic_docs.buyer_orders.len(),
2564                    ic_docs.buyer_goods_receipts.len(),
2565                    ic_docs.buyer_invoices.len(),
2566                );
2567            }
2568        }
2569
2570        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2571        let hr = self.phase_hr_data(&mut stats)?;
2572
2573        // Phase 6b: Generate JEs from payroll runs
2574        if !hr.payroll_runs.is_empty() {
2575            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2576            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2577            entries.extend(payroll_jes);
2578        }
2579
2580        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2581        if !hr.pension_journal_entries.is_empty() {
2582            debug!(
2583                "Generated {} JEs from pension plans",
2584                hr.pension_journal_entries.len()
2585            );
2586            entries.extend(hr.pension_journal_entries.iter().cloned());
2587        }
2588
2589        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2590        if !hr.stock_comp_journal_entries.is_empty() {
2591            debug!(
2592                "Generated {} JEs from stock-based compensation",
2593                hr.stock_comp_journal_entries.len()
2594            );
2595            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2596        }
2597
2598        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2599        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2600
2601        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2602        if !manufacturing_snap.production_orders.is_empty() {
2603            let currency = self
2604                .config
2605                .companies
2606                .first()
2607                .map(|c| c.currency.as_str())
2608                .unwrap_or("USD");
2609            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2610                &manufacturing_snap.production_orders,
2611                &manufacturing_snap.quality_inspections,
2612                currency,
2613            );
2614            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2615            entries.extend(mfg_jes);
2616        }
2617
2618        // Phase 7a-warranty: Generate warranty provisions per company
2619        if !manufacturing_snap.quality_inspections.is_empty() {
2620            let framework = match self.config.accounting_standards.framework {
2621                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2622                _ => "US_GAAP",
2623            };
2624            for company in &self.config.companies {
2625                let company_orders: Vec<_> = manufacturing_snap
2626                    .production_orders
2627                    .iter()
2628                    .filter(|o| o.company_code == company.code)
2629                    .cloned()
2630                    .collect();
2631                let company_inspections: Vec<_> = manufacturing_snap
2632                    .quality_inspections
2633                    .iter()
2634                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2635                    .cloned()
2636                    .collect();
2637                if company_inspections.is_empty() {
2638                    continue;
2639                }
2640                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2641                let warranty_result = warranty_gen.generate(
2642                    &company.code,
2643                    &company_orders,
2644                    &company_inspections,
2645                    &company.currency,
2646                    framework,
2647                );
2648                if !warranty_result.journal_entries.is_empty() {
2649                    debug!(
2650                        "Generated {} warranty provision JEs for {}",
2651                        warranty_result.journal_entries.len(),
2652                        company.code
2653                    );
2654                    entries.extend(warranty_result.journal_entries);
2655                }
2656            }
2657        }
2658
2659        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2660        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2661        {
2662            let cogs_currency = self
2663                .config
2664                .companies
2665                .first()
2666                .map(|c| c.currency.as_str())
2667                .unwrap_or("USD");
2668            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2669                &document_flows.deliveries,
2670                &manufacturing_snap.production_orders,
2671                cogs_currency,
2672            );
2673            if !cogs_jes.is_empty() {
2674                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2675                entries.extend(cogs_jes);
2676            }
2677        }
2678
2679        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2680        //
2681        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2682        // subledger inventory positions.  Here we reconcile them so that position balances
2683        // reflect the actual stock movements within the generation period.
2684        if !manufacturing_snap.inventory_movements.is_empty()
2685            && !subledger.inventory_positions.is_empty()
2686        {
2687            use datasynth_core::models::MovementType as MfgMovementType;
2688            let mut receipt_count = 0usize;
2689            let mut issue_count = 0usize;
2690            for movement in &manufacturing_snap.inventory_movements {
2691                // Find a matching position by material code and company
2692                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2693                    p.material_id == movement.material_code
2694                        && p.company_code == movement.entity_code
2695                }) {
2696                    match movement.movement_type {
2697                        MfgMovementType::GoodsReceipt => {
2698                            // Increase stock and update weighted-average cost
2699                            pos.add_quantity(
2700                                movement.quantity,
2701                                movement.value,
2702                                movement.movement_date,
2703                            );
2704                            receipt_count += 1;
2705                        }
2706                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2707                            // Decrease stock (best-effort; silently skip if insufficient)
2708                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2709                            issue_count += 1;
2710                        }
2711                        _ => {}
2712                    }
2713                }
2714            }
2715            debug!(
2716                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2717                manufacturing_snap.inventory_movements.len(),
2718                receipt_count,
2719                issue_count,
2720            );
2721        }
2722
2723        // Update final entry/line-item stats after all JE-generating phases
2724        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2725        if !entries.is_empty() {
2726            stats.total_entries = entries.len() as u64;
2727            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2728            debug!(
2729                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2730                stats.total_entries, stats.total_line_items
2731            );
2732        }
2733
2734        // Phase 7b: Apply internal controls to journal entries
2735        if self.config.internal_controls.enabled && !entries.is_empty() {
2736            info!("Phase 7b: Applying internal controls to journal entries");
2737            let control_config = ControlGeneratorConfig {
2738                exception_rate: self.config.internal_controls.exception_rate,
2739                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2740                enable_sox_marking: true,
2741                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2742                    self.config.internal_controls.sox_materiality_threshold,
2743                )
2744                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2745                ..Default::default()
2746            };
2747            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2748            for entry in &mut entries {
2749                control_gen.apply_controls(entry, &coa);
2750            }
2751            let with_controls = entries
2752                .iter()
2753                .filter(|e| !e.header.control_ids.is_empty())
2754                .count();
2755            info!(
2756                "Applied controls to {} entries ({} with control IDs assigned)",
2757                entries.len(),
2758                with_controls
2759            );
2760        }
2761
2762        // Phase 7c: Extract SoD violations from annotated journal entries.
2763        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2764        // Here we materialise those flags into standalone SodViolation records.
2765        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2766            .iter()
2767            .filter(|e| e.header.sod_violation)
2768            .filter_map(|e| {
2769                e.header.sod_conflict_type.map(|ct| {
2770                    use datasynth_core::models::{RiskLevel, SodViolation};
2771                    let severity = match ct {
2772                        datasynth_core::models::SodConflictType::PaymentReleaser
2773                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2774                            RiskLevel::Critical
2775                        }
2776                        datasynth_core::models::SodConflictType::PreparerApprover
2777                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2778                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2779                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2780                            RiskLevel::High
2781                        }
2782                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2783                            RiskLevel::Medium
2784                        }
2785                    };
2786                    let action = format!(
2787                        "SoD conflict {:?} on entry {} ({})",
2788                        ct, e.header.document_id, e.header.company_code
2789                    );
2790                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2791                })
2792            })
2793            .collect();
2794        if !sod_violations.is_empty() {
2795            info!(
2796                "Phase 7c: Extracted {} SoD violations from {} entries",
2797                sod_violations.len(),
2798                entries.len()
2799            );
2800        }
2801
2802        // Emit journal entries to stream sink (after all JE-generating phases)
2803        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2804
2805        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2806        //
2807        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2808        // document-level fraud are exempt from subsequent line-level flag
2809        // overwrites, and so downstream consumers see a coherent picture.
2810        //
2811        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2812        {
2813            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2814            if self.config.fraud.enabled && doc_rate > 0.0 {
2815                use datasynth_core::fraud_propagation::{
2816                    inject_document_fraud, propagate_documents_to_entries,
2817                };
2818                use datasynth_core::utils::weighted_select;
2819                use datasynth_core::FraudType;
2820                use rand_chacha::rand_core::SeedableRng;
2821
2822                let dist = &self.config.fraud.fraud_type_distribution;
2823                let fraud_type_weights: [(FraudType, f64); 8] = [
2824                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2825                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2826                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2827                    (
2828                        FraudType::ImproperCapitalization,
2829                        dist.expense_capitalization,
2830                    ),
2831                    (FraudType::SplitTransaction, dist.split_transaction),
2832                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2833                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2834                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2835                ];
2836                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2837                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2838                    if weights_sum <= 0.0 {
2839                        FraudType::FictitiousEntry
2840                    } else {
2841                        *weighted_select(rng, &fraud_type_weights)
2842                    }
2843                };
2844
2845                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2846                let mut doc_tagged = 0usize;
2847                macro_rules! inject_into {
2848                    ($collection:expr) => {{
2849                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2850                            $collection.iter_mut().map(|d| &mut d.header).collect();
2851                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2852                    }};
2853                }
2854                inject_into!(document_flows.purchase_orders);
2855                inject_into!(document_flows.goods_receipts);
2856                inject_into!(document_flows.vendor_invoices);
2857                inject_into!(document_flows.payments);
2858                inject_into!(document_flows.sales_orders);
2859                inject_into!(document_flows.deliveries);
2860                inject_into!(document_flows.customer_invoices);
2861                if doc_tagged > 0 {
2862                    info!(
2863                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2864                    );
2865                }
2866
2867                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2868                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2869                        Vec::new();
2870                    headers.extend(
2871                        document_flows
2872                            .purchase_orders
2873                            .iter()
2874                            .map(|d| d.header.clone()),
2875                    );
2876                    headers.extend(
2877                        document_flows
2878                            .goods_receipts
2879                            .iter()
2880                            .map(|d| d.header.clone()),
2881                    );
2882                    headers.extend(
2883                        document_flows
2884                            .vendor_invoices
2885                            .iter()
2886                            .map(|d| d.header.clone()),
2887                    );
2888                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2889                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2890                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2891                    headers.extend(
2892                        document_flows
2893                            .customer_invoices
2894                            .iter()
2895                            .map(|d| d.header.clone()),
2896                    );
2897                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2898                    if propagated > 0 {
2899                        info!(
2900                            "Propagated document-level fraud to {propagated} derived journal entries"
2901                        );
2902                    }
2903                }
2904            }
2905        }
2906
2907        // Phase 8: Anomaly Injection (after all JE-generating phases)
2908        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2909
2910        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2911        // through the anomaly injector.
2912        //
2913        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2914        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2915        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2916        //   - Any external mutation that sets is_fraud after the fact
2917        //
2918        // The anomaly injector already applies the same bias inline when it
2919        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2920        // so gating this sweep on `!is_anomaly` avoids double-application.
2921        //
2922        // Without this sweep, fraud entries from these paths show 0 lift on
2923        // the canonical forensic signals (is_round_1000, is_off_hours,
2924        // is_weekend, is_post_close), which is exactly what the SDK-side
2925        // evaluator caught in v3.1 — fraud features had worse lift than
2926        // baseline. See DS-3.1 post-deploy feedback.
2927        {
2928            use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
2929            use rand_chacha::rand_core::SeedableRng;
2930            let cfg = self.config.fraud.effective_bias().to_core();
2931            if cfg.enabled {
2932                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2933                let mut swept = 0usize;
2934                for entry in entries.iter_mut() {
2935                    if entry.header.is_fraud && !entry.header.is_anomaly {
2936                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2937                        swept += 1;
2938                    }
2939                }
2940                if swept > 0 {
2941                    info!(
2942                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2943                         (doc-propagated + je_generator intrinsic fraud)"
2944                    );
2945                }
2946            }
2947        }
2948
2949        // Emit anomaly labels to stream sink
2950        self.emit_phase_items(
2951            "anomaly_injection",
2952            "LabeledAnomaly",
2953            &anomaly_labels.labels,
2954        );
2955
2956        // Propagate fraud labels from journal entries to source documents.
2957        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2958        // instead of tracing through document_references.json.
2959        //
2960        // Gated by `fraud.propagate_to_document` (default true) — disable when
2961        // downstream consumers want document fraud flags to reflect only
2962        // document-level injection, not line-level.
2963        if self.config.fraud.propagate_to_document {
2964            use std::collections::HashMap;
2965            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2966            //
2967            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2968            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2969            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2970            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2971            // we register BOTH the prefixed form (raw reference) AND the bare form
2972            // (post-colon portion) in the map. Also register the JE's document_id
2973            // UUID so documents that set `journal_entry_id` match via that path.
2974            //
2975            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2976            // looked up "foo", silently producing 0 propagations.
2977            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2978            for je in &entries {
2979                if je.header.is_fraud {
2980                    if let Some(ref fraud_type) = je.header.fraud_type {
2981                        if let Some(ref reference) = je.header.reference {
2982                            // Register the full reference ("GR:PO-2024-000001")
2983                            fraud_map.insert(reference.clone(), *fraud_type);
2984                            // Also register the bare document ID ("PO-2024-000001")
2985                            // by stripping the "PREFIX:" if present.
2986                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2987                                if !bare.is_empty() {
2988                                    fraud_map.insert(bare.to_string(), *fraud_type);
2989                                }
2990                            }
2991                        }
2992                        // Also tag via journal_entry_id on document headers
2993                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2994                    }
2995                }
2996            }
2997            if !fraud_map.is_empty() {
2998                let mut propagated = 0usize;
2999                // Use DocumentHeader::propagate_fraud method for each doc type
3000                macro_rules! propagate_to {
3001                    ($collection:expr) => {
3002                        for doc in &mut $collection {
3003                            if doc.header.propagate_fraud(&fraud_map) {
3004                                propagated += 1;
3005                            }
3006                        }
3007                    };
3008                }
3009                propagate_to!(document_flows.purchase_orders);
3010                propagate_to!(document_flows.goods_receipts);
3011                propagate_to!(document_flows.vendor_invoices);
3012                propagate_to!(document_flows.payments);
3013                propagate_to!(document_flows.sales_orders);
3014                propagate_to!(document_flows.deliveries);
3015                propagate_to!(document_flows.customer_invoices);
3016                if propagated > 0 {
3017                    info!(
3018                        "Propagated fraud labels to {} document flow records",
3019                        propagated
3020                    );
3021                }
3022            }
3023        }
3024
3025        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
3026        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3027
3028        // Emit red flags to stream sink
3029        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3030
3031        // Phase 26b: Collusion Ring Generation (after red flags)
3032        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3033
3034        // Emit collusion rings to stream sink
3035        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3036
3037        // Phase 8d: W8.1 — TB drift-correction pass.  When a TB anchor prior is
3038        // loaded (industry bundle with real per-account targets), emit balanced
3039        // "SA" adjustment JEs to nudge the synthetic balance sheet toward the
3040        // corpus-median shape before final balance validation runs.
3041        self.phase_tb_drift_correction(&mut entries)?;
3042
3043        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
3044        let balance_validation = self.phase_balance_validation(&entries)?;
3045
3046        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
3047        // chart of accounts. Soft warning by default; hard fail when the
3048        // user passes --validate-coa-coverage / sets the strict flag.
3049        self.validate_coa_coverage(&entries, coa.as_ref())?;
3050
3051        // Phase 9b: GL-to-Subledger Reconciliation
3052        let subledger_reconciliation =
3053            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3054
3055        // Phase 10: Data Quality Injection
3056        let (data_quality_stats, quality_issues) =
3057            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3058
3059        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
3060        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3061
3062        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
3063        {
3064            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
3065
3066            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
3067            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
3068            let mut unbalanced_clean = 0usize;
3069            for je in &entries {
3070                if je.header.is_fraud || je.header.is_anomaly {
3071                    continue;
3072                }
3073                let diff = (je.total_debit() - je.total_credit()).abs();
3074                if diff > tolerance {
3075                    unbalanced_clean += 1;
3076                    if unbalanced_clean <= 3 {
3077                        warn!(
3078                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3079                            je.header.document_id,
3080                            je.total_debit(),
3081                            je.total_credit(),
3082                            diff
3083                        );
3084                    }
3085                }
3086            }
3087            if unbalanced_clean > 0 {
3088                return Err(datasynth_core::error::SynthError::generation(format!(
3089                    "{} non-anomaly JEs are unbalanced (debits != credits). \
3090                     First few logged above. Tolerance={}",
3091                    unbalanced_clean, tolerance
3092                )));
3093            }
3094            debug!(
3095                "Phase 10c: All {} non-anomaly JEs individually balanced",
3096                entries
3097                    .iter()
3098                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3099                    .count()
3100            );
3101
3102            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
3103            let company_codes: Vec<String> = self
3104                .config
3105                .companies
3106                .iter()
3107                .map(|c| c.code.clone())
3108                .collect();
3109            for company_code in &company_codes {
3110                let mut assets = rust_decimal::Decimal::ZERO;
3111                let mut liab_equity = rust_decimal::Decimal::ZERO;
3112
3113                for entry in &entries {
3114                    if entry.header.company_code != *company_code {
3115                        continue;
3116                    }
3117                    for line in &entry.lines {
3118                        let acct = &line.gl_account;
3119                        let net = line.debit_amount - line.credit_amount;
3120                        // Asset accounts (1xxx): normal debit balance
3121                        if acct.starts_with('1') {
3122                            assets += net;
3123                        }
3124                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3125                        else if acct.starts_with('2') || acct.starts_with('3') {
3126                            liab_equity -= net; // credit-normal, so negate debit-net
3127                        }
3128                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3129                        // so they net to zero after closing entries
3130                    }
3131                }
3132
3133                let bs_diff = (assets - liab_equity).abs();
3134                if bs_diff > tolerance {
3135                    warn!(
3136                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3137                         revenue/expense closing entries may not fully offset",
3138                        company_code, assets, liab_equity, bs_diff
3139                    );
3140                    // Warn rather than error: multi-period datasets may have timing
3141                    // differences from accruals/deferrals that resolve in later periods.
3142                    // The TB footing check (Assert 1) is the hard gate.
3143                } else {
3144                    debug!(
3145                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3146                        company_code, assets, liab_equity, bs_diff
3147                    );
3148                }
3149            }
3150
3151            info!("Phase 10c: All generation-time accounting assertions passed");
3152        }
3153
3154        // Phase 11: Audit Data
3155        let audit = self.phase_audit_data(&entries, &mut stats)?;
3156
3157        // Phase 12: Banking KYC/AML Data
3158        let mut banking = self.phase_banking_data(&mut stats)?;
3159
3160        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3161        // Creates coherence between the accounting layer (payments, JEs) and the
3162        // banking layer (bank transactions). A vendor invoice payment now appears
3163        // on both sides with cross-references and fraud labels propagated.
3164        if self.phase_config.generate_banking
3165            && !document_flows.payments.is_empty()
3166            && !banking.accounts.is_empty()
3167        {
3168            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3169            if bridge_rate > 0.0 {
3170                let mut bridge =
3171                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3172                        self.seed,
3173                    );
3174                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3175                    &document_flows.payments,
3176                    &banking.customers,
3177                    &banking.accounts,
3178                    bridge_rate,
3179                );
3180                info!(
3181                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3182                    bridge_stats.bridged_count,
3183                    bridge_stats.transactions_emitted,
3184                    bridge_stats.fraud_propagated,
3185                );
3186                let bridged_count = bridged_txns.len();
3187                banking.transactions.extend(bridged_txns);
3188
3189                // Re-run velocity computation so bridged txns also get features
3190                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3191                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3192                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3193                        &mut banking.transactions,
3194                    );
3195                }
3196
3197                // Recompute suspicious count after bridging
3198                banking.suspicious_count = banking
3199                    .transactions
3200                    .iter()
3201                    .filter(|t| t.is_suspicious)
3202                    .count();
3203                stats.banking_transaction_count = banking.transactions.len();
3204                stats.banking_suspicious_count = banking.suspicious_count;
3205            }
3206        }
3207
3208        // Phase 13: Graph Export
3209        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3210
3211        // Phase 14: LLM Enrichment
3212        self.phase_llm_enrichment(&mut stats);
3213
3214        // Phase 15: Diffusion Enhancement
3215        self.phase_diffusion_enhancement(&entries, &mut stats);
3216
3217        // Phase 16: Causal Overlay
3218        self.phase_causal_overlay(&mut stats);
3219
3220        // Phase 17: Bank Reconciliation + Financial Statements
3221        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3222        // provision data (from accounting_standards / tax snapshots) can be wired in.
3223        let mut financial_reporting = self.phase_financial_reporting(
3224            &document_flows,
3225            &entries,
3226            &coa,
3227            &hr,
3228            &audit,
3229            &mut stats,
3230        )?;
3231
3232        // BS coherence check: assets = liabilities + equity
3233        {
3234            use datasynth_core::models::StatementType;
3235            for stmt in &financial_reporting.consolidated_statements {
3236                if stmt.statement_type == StatementType::BalanceSheet {
3237                    let total_assets: rust_decimal::Decimal = stmt
3238                        .line_items
3239                        .iter()
3240                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3241                        .map(|li| li.amount)
3242                        .sum();
3243                    let total_le: rust_decimal::Decimal = stmt
3244                        .line_items
3245                        .iter()
3246                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3247                        .map(|li| li.amount)
3248                        .sum();
3249                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3250                        warn!(
3251                            "BS equation imbalance: assets={}, L+E={}",
3252                            total_assets, total_le
3253                        );
3254                    }
3255                }
3256            }
3257        }
3258
3259        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3260        let accounting_standards =
3261            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3262
3263        // Phase 18a: Merge ECL journal entries into main GL
3264        if !accounting_standards.ecl_journal_entries.is_empty() {
3265            debug!(
3266                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3267                accounting_standards.ecl_journal_entries.len()
3268            );
3269            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3270        }
3271
3272        // Phase 18a: Merge provision journal entries into main GL
3273        if !accounting_standards.provision_journal_entries.is_empty() {
3274            debug!(
3275                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3276                accounting_standards.provision_journal_entries.len()
3277            );
3278            entries.extend(
3279                accounting_standards
3280                    .provision_journal_entries
3281                    .iter()
3282                    .cloned(),
3283            );
3284        }
3285
3286        // Phase 18b: OCPM Events (after all process data is available)
3287        let mut ocpm = self.phase_ocpm_events(
3288            &document_flows,
3289            &sourcing,
3290            &hr,
3291            &manufacturing_snap,
3292            &banking,
3293            &audit,
3294            &financial_reporting,
3295            &mut stats,
3296        )?;
3297
3298        // Emit OCPM events to stream sink
3299        if let Some(ref event_log) = ocpm.event_log {
3300            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3301        }
3302
3303        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3304        if let Some(ref event_log) = ocpm.event_log {
3305            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3306            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3307                std::collections::HashMap::new();
3308            for (idx, event) in event_log.events.iter().enumerate() {
3309                if let Some(ref doc_ref) = event.document_ref {
3310                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3311                }
3312            }
3313
3314            if !doc_index.is_empty() {
3315                let mut annotated = 0usize;
3316                for entry in &mut entries {
3317                    let doc_id_str = entry.header.document_id.to_string();
3318                    // Collect matching event indices from document_id and reference
3319                    let mut matched_indices: Vec<usize> = Vec::new();
3320                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3321                        matched_indices.extend(indices);
3322                    }
3323                    if let Some(ref reference) = entry.header.reference {
3324                        let bare_ref = reference
3325                            .find(':')
3326                            .map(|i| &reference[i + 1..])
3327                            .unwrap_or(reference.as_str());
3328                        if let Some(indices) = doc_index.get(bare_ref) {
3329                            for &idx in indices {
3330                                if !matched_indices.contains(&idx) {
3331                                    matched_indices.push(idx);
3332                                }
3333                            }
3334                        }
3335                    }
3336                    // Apply matches to JE header
3337                    if !matched_indices.is_empty() {
3338                        for &idx in &matched_indices {
3339                            let event = &event_log.events[idx];
3340                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3341                                entry.header.ocpm_event_ids.push(event.event_id);
3342                            }
3343                            for obj_ref in &event.object_refs {
3344                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3345                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3346                                }
3347                            }
3348                            if entry.header.ocpm_case_id.is_none() {
3349                                entry.header.ocpm_case_id = event.case_id;
3350                            }
3351                        }
3352                        annotated += 1;
3353                    }
3354                }
3355                debug!(
3356                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3357                    annotated
3358                );
3359            }
3360        }
3361
3362        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3363        // IC eliminations, opening balances, standards-driven entries) so
3364        // every JournalEntry carries at least one `ocpm_event_ids` link.
3365        if let Some(ref mut event_log) = ocpm.event_log {
3366            let synthesized =
3367                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3368            if synthesized > 0 {
3369                info!(
3370                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3371                );
3372            }
3373
3374            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3375            // events and their owning CaseTrace. Without this, every exported
3376            // OCEL event has `is_anomaly = false` even when the underlying JE
3377            // was flagged.
3378            let anomaly_events =
3379                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3380            if anomaly_events > 0 {
3381                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3382            }
3383
3384            // Phase 18f: Inject process-variant imperfections (rework, skipped
3385            // steps, out-of-order events) so conformance checkers see
3386            // realistic variant counts and fitness < 1.0. Uses the P2P
3387            // process rates as the single source of truth.
3388            let p2p_cfg = &self.config.ocpm.p2p_process;
3389            let any_imperfection = p2p_cfg.rework_probability > 0.0
3390                || p2p_cfg.skip_step_probability > 0.0
3391                || p2p_cfg.out_of_order_probability > 0.0;
3392            if any_imperfection {
3393                use rand_chacha::rand_core::SeedableRng;
3394                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3395                    rework_rate: p2p_cfg.rework_probability,
3396                    skip_rate: p2p_cfg.skip_step_probability,
3397                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3398                };
3399                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3400                let stats =
3401                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3402                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3403                    info!(
3404                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3405                        stats.rework, stats.skipped, stats.out_of_order
3406                    );
3407                }
3408            }
3409        }
3410
3411        // Phase 19: Sales Quotes, Management KPIs, Budgets
3412        let sales_kpi_budgets =
3413            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &entries, &mut stats)?;
3414
3415        // Phase 22: Treasury Data Generation
3416        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3417        // are included in the pre-tax income used by phase_tax_generation.
3418        let treasury =
3419            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3420
3421        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3422        if !treasury.journal_entries.is_empty() {
3423            debug!(
3424                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3425                treasury.journal_entries.len()
3426            );
3427            entries.extend(treasury.journal_entries.iter().cloned());
3428        }
3429
3430        // Phase 20: Tax Generation
3431        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3432
3433        // Phase 20 JEs: Merge tax posting journal entries into main GL
3434        if !tax.tax_posting_journal_entries.is_empty() {
3435            debug!(
3436                "Merging {} tax posting JEs into GL",
3437                tax.tax_posting_journal_entries.len()
3438            );
3439            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3440        }
3441
3442        // Phase 20b: FINAL fraud behavioral bias sweep.
3443        //
3444        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3445        // period close) extend `entries` with new journal entries that may
3446        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3447        // already-fraudulent transactions). Those late additions miss the
3448        // Phase 8b sweep and ship without bias applied — which is exactly
3449        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3450        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3451        //
3452        // Running the sweep one more time here guarantees every is_fraud
3453        // entry — regardless of which phase added it — has bias applied.
3454        // `!is_anomaly` gates out anomaly-injector entries (which already
3455        // got biased inline); the sweep is otherwise idempotent-ish:
3456        // weekend / off_hours re-fire to another valid weekend / off-hour,
3457        // post_close is guarded by `!is_post_close`, and round-dollar
3458        // rescaling on an already-round amount is a no-op (ratio = 1).
3459        {
3460            use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
3461            use rand_chacha::rand_core::SeedableRng;
3462            let cfg = self.config.fraud.effective_bias().to_core();
3463            if cfg.enabled {
3464                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3465                let mut swept = 0usize;
3466                for entry in entries.iter_mut() {
3467                    if entry.header.is_fraud && !entry.header.is_anomaly {
3468                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3469                        swept += 1;
3470                    }
3471                }
3472                if swept > 0 {
3473                    info!(
3474                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3475                         non-anomaly fraud entries (covers late-added JEs from \
3476                         ECL / provisions / treasury / tax / period-close)"
3477                    );
3478                }
3479            }
3480        }
3481
3482        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3483        // Build supplementary cash flow items from upstream JE data (depreciation,
3484        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3485        {
3486            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3487
3488            let framework_str = {
3489                use datasynth_config::schema::AccountingFrameworkConfig;
3490                match self
3491                    .config
3492                    .accounting_standards
3493                    .framework
3494                    .unwrap_or_default()
3495                {
3496                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3497                        "IFRS"
3498                    }
3499                    _ => "US_GAAP",
3500                }
3501            };
3502
3503            // Sum depreciation debits (account 6000) from close JEs
3504            let depreciation_total: rust_decimal::Decimal = entries
3505                .iter()
3506                .filter(|je| je.header.document_type == "CL")
3507                .flat_map(|je| je.lines.iter())
3508                .filter(|l| l.gl_account.starts_with("6000"))
3509                .map(|l| l.debit_amount)
3510                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3511
3512            // Sum interest expense debits (account 7100)
3513            let interest_paid: rust_decimal::Decimal = entries
3514                .iter()
3515                .flat_map(|je| je.lines.iter())
3516                .filter(|l| l.gl_account.starts_with("7100"))
3517                .map(|l| l.debit_amount)
3518                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3519
3520            // Sum tax expense debits (account 8000)
3521            let tax_paid: rust_decimal::Decimal = entries
3522                .iter()
3523                .flat_map(|je| je.lines.iter())
3524                .filter(|l| l.gl_account.starts_with("8000"))
3525                .map(|l| l.debit_amount)
3526                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3527
3528            // Sum capex debits on fixed assets (account 1500)
3529            let capex: rust_decimal::Decimal = entries
3530                .iter()
3531                .flat_map(|je| je.lines.iter())
3532                .filter(|l| l.gl_account.starts_with("1500"))
3533                .map(|l| l.debit_amount)
3534                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3535
3536            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3537            let dividends_paid: rust_decimal::Decimal = entries
3538                .iter()
3539                .flat_map(|je| je.lines.iter())
3540                .filter(|l| l.gl_account == "2170")
3541                .map(|l| l.debit_amount)
3542                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3543
3544            let cf_data = CashFlowSourceData {
3545                depreciation_total,
3546                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3547                delta_ar: rust_decimal::Decimal::ZERO,
3548                delta_ap: rust_decimal::Decimal::ZERO,
3549                delta_inventory: rust_decimal::Decimal::ZERO,
3550                capex,
3551                debt_issuance: rust_decimal::Decimal::ZERO,
3552                debt_repayment: rust_decimal::Decimal::ZERO,
3553                interest_paid,
3554                tax_paid,
3555                dividends_paid,
3556                framework: framework_str.to_string(),
3557            };
3558
3559            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3560            if !enhanced_cf_items.is_empty() {
3561                // Merge into ALL cash flow statements (standalone + consolidated)
3562                use datasynth_core::models::StatementType;
3563                let merge_count = enhanced_cf_items.len();
3564                for stmt in financial_reporting
3565                    .financial_statements
3566                    .iter_mut()
3567                    .chain(financial_reporting.consolidated_statements.iter_mut())
3568                    .chain(
3569                        financial_reporting
3570                            .standalone_statements
3571                            .values_mut()
3572                            .flat_map(|v| v.iter_mut()),
3573                    )
3574                {
3575                    if stmt.statement_type == StatementType::CashFlowStatement {
3576                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3577                    }
3578                }
3579                info!(
3580                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3581                    merge_count
3582                );
3583            }
3584        }
3585
3586        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3587        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3588        self.generate_notes_to_financial_statements(
3589            &mut financial_reporting,
3590            &accounting_standards,
3591            &tax,
3592            &hr,
3593            &audit,
3594            &treasury,
3595        );
3596
3597        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3598        // When we have 2+ companies, derive segment data from actual journal entries
3599        // to complement or replace the FS-generator-based segments.
3600        if self.config.companies.len() >= 2 && !entries.is_empty() {
3601            let companies: Vec<(String, String)> = self
3602                .config
3603                .companies
3604                .iter()
3605                .map(|c| (c.code.clone(), c.name.clone()))
3606                .collect();
3607            let ic_elim: rust_decimal::Decimal =
3608                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3609            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3610                .unwrap_or(NaiveDate::MIN);
3611            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3612            let period_label = format!(
3613                "{}-{:02}",
3614                end_date.year(),
3615                (end_date - chrono::Days::new(1)).month()
3616            );
3617
3618            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3619            let (je_segments, je_recon) =
3620                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3621            if !je_segments.is_empty() {
3622                info!(
3623                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3624                    je_segments.len(),
3625                    ic_elim,
3626                );
3627                // Replace if existing segment_reports were empty; otherwise supplement
3628                if financial_reporting.segment_reports.is_empty() {
3629                    financial_reporting.segment_reports = je_segments;
3630                    financial_reporting.segment_reconciliations = vec![je_recon];
3631                } else {
3632                    financial_reporting.segment_reports.extend(je_segments);
3633                    financial_reporting.segment_reconciliations.push(je_recon);
3634                }
3635            }
3636        }
3637
3638        // Phase 21: ESG Data Generation
3639        let esg_snap =
3640            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3641
3642        // Phase 23: Project Accounting Data Generation
3643        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3644
3645        // Phase 24: Process Evolution + Organizational Events
3646        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3647
3648        // Phase 24b: Disruption Events
3649        let disruption_events = self.phase_disruption_events(&mut stats)?;
3650
3651        // Phase 27: Bi-Temporal Vendor Version Chains
3652        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3653
3654        // Phase 28: Entity Relationship Graph + Cross-Process Links
3655        let (entity_relationship_graph, cross_process_links) =
3656            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3657
3658        // Phase 29: Industry-specific GL accounts
3659        let industry_output = self.phase_industry_data(&mut stats);
3660
3661        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3662        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3663
3664        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3665        //
3666        // The neural / hybrid diffusion path was a documented L2 stub
3667        // in v3.x; actual neural-network training requires ML
3668        // infrastructure (PyTorch / candle bindings, GPU access,
3669        // training loops) that was never wired through the
3670        // orchestrator. Rather than keep a silently-no-op block that
3671        // misleads users into thinking neural training happens, v4.0
3672        // acknowledges the config — exposing stats so downstream
3673        // tooling can see the request — but emits a clear warning
3674        // when a non-statistical backend is requested. The statistical
3675        // diffusion backend continues to run via
3676        // `phase_diffusion_enhancement`.
3677        //
3678        // Users who need real neural diffusion: track the roadmap item
3679        // in the v4.x backlog and consider contributing the backend
3680        // (the `DiffusionBackend` trait is the integration point).
3681        if self.config.diffusion.enabled
3682            && (self.config.diffusion.backend == "neural"
3683                || self.config.diffusion.backend == "hybrid")
3684        {
3685            let neural = &self.config.diffusion.neural;
3686            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3687            stats.neural_hybrid_weight = Some(weight);
3688            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3689            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3690            warn!(
3691                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3692                 the neural/hybrid training path is not yet shipped. Config \
3693                 is captured in stats (weight={weight:.2}, strategy={}, \
3694                 columns={}) but no neural training runs. Statistical \
3695                 diffusion (backend='statistical') continues to work.",
3696                self.config.diffusion.backend,
3697                neural.hybrid_strategy,
3698                neural.neural_columns.len(),
3699            );
3700        }
3701
3702        // Phase 19b: Hypergraph Export (after all data is available)
3703        self.phase_hypergraph_export(
3704            &coa,
3705            &entries,
3706            &document_flows,
3707            &sourcing,
3708            &hr,
3709            &manufacturing_snap,
3710            &banking,
3711            &audit,
3712            &financial_reporting,
3713            &ocpm,
3714            &compliance_regulations,
3715            &mut stats,
3716        )?;
3717
3718        // Phase 10c: Additional graph builders (approval, entity, banking)
3719        // These run after all data is available since they need banking/IC data.
3720        if self.phase_config.generate_graph_export {
3721            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3722        }
3723
3724        // Log informational messages for config sections not yet fully wired
3725        if self.config.streaming.enabled {
3726            info!("Note: streaming config is enabled but batch mode does not use it");
3727        }
3728        if self.config.vendor_network.enabled {
3729            debug!("Vendor network config available; relationship graph generation is partial");
3730        }
3731        if self.config.customer_segmentation.enabled {
3732            debug!("Customer segmentation config available; segment-aware generation is partial");
3733        }
3734
3735        // Log final resource statistics
3736        let resource_stats = self.resource_guard.stats();
3737        info!(
3738            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3739            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3740            resource_stats.disk.estimated_bytes_written,
3741            resource_stats.degradation_level
3742        );
3743
3744        // Flush any remaining stream sink data
3745        if let Some(ref sink) = self.phase_sink {
3746            if let Err(e) = sink.flush() {
3747                warn!("Stream sink flush failed: {e}");
3748            }
3749        }
3750
3751        // Build data lineage graph
3752        let lineage = self.build_lineage_graph();
3753
3754        // Evaluate quality gates if enabled in config
3755        let gate_result = if self.config.quality_gates.enabled {
3756            let profile_name = &self.config.quality_gates.profile;
3757            match datasynth_eval::gates::get_profile(profile_name) {
3758                Some(profile) => {
3759                    // Build an evaluation populated with actual generation metrics.
3760                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3761
3762                    // Populate balance sheet evaluation from balance validation results
3763                    if balance_validation.validated {
3764                        eval.coherence.balance =
3765                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3766                                equation_balanced: balance_validation.is_balanced,
3767                                max_imbalance: (balance_validation.total_debits
3768                                    - balance_validation.total_credits)
3769                                    .abs(),
3770                                periods_evaluated: 1,
3771                                periods_imbalanced: if balance_validation.is_balanced {
3772                                    0
3773                                } else {
3774                                    1
3775                                },
3776                                period_results: Vec::new(),
3777                                companies_evaluated: self.config.companies.len(),
3778                            });
3779                    }
3780
3781                    // Set coherence passes based on balance validation
3782                    eval.coherence.passes = balance_validation.is_balanced;
3783                    if !balance_validation.is_balanced {
3784                        eval.coherence
3785                            .failures
3786                            .push("Balance sheet equation not satisfied".to_string());
3787                    }
3788
3789                    // Set statistical score based on entry count (basic sanity)
3790                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3791                    eval.statistical.passes = !entries.is_empty();
3792
3793                    // Set quality score from data quality stats
3794                    eval.quality.overall_score = 0.9; // Default high for generated data
3795                    eval.quality.passes = true;
3796
3797                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3798                    info!(
3799                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3800                        profile_name, result.gates_passed, result.gates_total, result.summary
3801                    );
3802                    Some(result)
3803                }
3804                None => {
3805                    warn!(
3806                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3807                        profile_name
3808                    );
3809                    None
3810                }
3811            }
3812        } else {
3813            None
3814        };
3815
3816        // Generate internal controls if enabled
3817        let internal_controls = if self.config.internal_controls.enabled {
3818            InternalControl::standard_controls()
3819        } else {
3820            Vec::new()
3821        };
3822
3823        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3824        // phases (including fraud-bias sweep at Phase 20b) so derived
3825        // outputs reflect final data.
3826        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3827
3828        // v3.5.1: statistical validation over the final amount
3829        // distribution. Runs *after* all JE-adding phases so the report
3830        // reflects everything the user will see in the output. Returns
3831        // `None` unless `distributions.validation.enabled = true`.
3832        let statistical_validation = self.phase_statistical_validation(&entries)?;
3833
3834        // v4.1.3+: interconnectivity snapshot — tier assignments,
3835        // value-segment labels, industry-specific metadata. Runs after
3836        // master data is settled so it can index stable IDs.
3837        let interconnectivity = self.phase_interconnectivity();
3838
3839        // SP5.2 — snapshot the CoA semantic prior (if any) into the result so
3840        // output_writer can use it as a fallback index for account_description
3841        // resolution when the synthetic CoA index misses.
3842        let coa_semantic_prior = self
3843            .cached_priors
3844            .as_ref()
3845            .and_then(|p| p.coa_semantic.clone());
3846
3847        Ok(EnhancedGenerationResult {
3848            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3849            master_data: std::mem::take(&mut self.master_data),
3850            document_flows,
3851            subledger,
3852            ocpm,
3853            audit,
3854            banking,
3855            graph_export,
3856            sourcing,
3857            financial_reporting,
3858            hr,
3859            accounting_standards,
3860            manufacturing: manufacturing_snap,
3861            sales_kpi_budgets,
3862            tax,
3863            esg: esg_snap,
3864            treasury,
3865            project_accounting,
3866            process_evolution,
3867            organizational_events,
3868            disruption_events,
3869            intercompany,
3870            journal_entries: entries,
3871            anomaly_labels,
3872            balance_validation,
3873            data_quality_stats,
3874            quality_issues,
3875            statistics: stats,
3876            lineage: Some(lineage),
3877            gate_result,
3878            internal_controls,
3879            sod_violations,
3880            opening_balances,
3881            subledger_reconciliation,
3882            counterfactual_pairs,
3883            red_flags,
3884            collusion_rings,
3885            temporal_vendor_chains,
3886            entity_relationship_graph,
3887            cross_process_links,
3888            industry_output,
3889            coa_semantic_prior,
3890            compliance_regulations,
3891            analytics_metadata,
3892            statistical_validation,
3893            interconnectivity,
3894        })
3895    }
3896
3897    /// v4.1.3+: populate the interconnectivity snapshot from
3898    /// previously-inert schema sections. Empty when all sections are
3899    /// disabled.
3900    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3901        use rand::{RngExt, SeedableRng};
3902        use rand_chacha::ChaCha8Rng;
3903
3904        let mut snap = InterconnectivitySnapshot::default();
3905        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3906
3907        // --- Vendor network ---
3908        let vn = &self.config.vendor_network;
3909        if vn.enabled {
3910            let total = self.master_data.vendors.len();
3911            if total > 0 {
3912                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3913                let remaining_after_t1 = total.saturating_sub(tier1_count);
3914                let depth = vn.depth.clamp(1, 3);
3915                let tier2_count = if depth >= 2 {
3916                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3917                    (tier1_count * avg).min(remaining_after_t1)
3918                } else {
3919                    0
3920                };
3921                let tier3_count = total
3922                    .saturating_sub(tier1_count)
3923                    .saturating_sub(tier2_count);
3924
3925                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3926                    let tier = if idx < tier1_count {
3927                        1
3928                    } else if idx < tier1_count + tier2_count {
3929                        2
3930                    } else {
3931                        3
3932                    };
3933                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3934
3935                    // Cluster assignment via configured ratios.
3936                    let cl = &vn.clusters;
3937                    let roll: f64 = rng.random();
3938                    let cluster = if roll < cl.reliable_strategic {
3939                        "reliable_strategic"
3940                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3941                        "standard_operational"
3942                    } else if roll
3943                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3944                    {
3945                        "transactional"
3946                    } else {
3947                        "problematic"
3948                    };
3949                    snap.vendor_clusters
3950                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3951                }
3952                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3953            }
3954        }
3955
3956        // --- Customer segmentation ---
3957        let cs = &self.config.customer_segmentation;
3958        if cs.enabled {
3959            let seg = &cs.value_segments;
3960            for customer in &self.master_data.customers {
3961                let roll: f64 = rng.random();
3962                let value_segment = if roll < seg.enterprise.customer_share {
3963                    "enterprise"
3964                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3965                    "mid_market"
3966                } else if roll
3967                    < seg.enterprise.customer_share
3968                        + seg.mid_market.customer_share
3969                        + seg.smb.customer_share
3970                {
3971                    "smb"
3972                } else {
3973                    "consumer"
3974                };
3975                snap.customer_value_segments
3976                    .push((customer.customer_id.clone(), value_segment.to_string()));
3977
3978                let roll2: f64 = rng.random();
3979                let life = &cs.lifecycle;
3980                let lifecycle = if roll2 < life.prospect_rate {
3981                    "prospect"
3982                } else if roll2 < life.prospect_rate + life.new_rate {
3983                    "new"
3984                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3985                    "growth"
3986                } else if roll2
3987                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3988                {
3989                    "mature"
3990                } else if roll2
3991                    < life.prospect_rate
3992                        + life.new_rate
3993                        + life.growth_rate
3994                        + life.mature_rate
3995                        + life.at_risk_rate
3996                {
3997                    "at_risk"
3998                } else if roll2
3999                    < life.prospect_rate
4000                        + life.new_rate
4001                        + life.growth_rate
4002                        + life.mature_rate
4003                        + life.at_risk_rate
4004                        + life.churned_rate
4005                {
4006                    "churned"
4007                } else {
4008                    "won_back"
4009                };
4010                snap.customer_lifecycle_stages
4011                    .push((customer.customer_id.clone(), lifecycle.to_string()));
4012            }
4013        }
4014
4015        // --- Industry-specific metadata (minimal) ---
4016        let is = &self.config.industry_specific;
4017        if is.enabled {
4018            snap.industry_metadata.push(format!(
4019                "industry_specific.enabled=true (industry={:?})",
4020                self.config.global.industry
4021            ));
4022        }
4023
4024        snap
4025    }
4026
4027    // ========================================================================
4028    // Generation Phase Methods
4029    // ========================================================================
4030
4031    /// Phase 1: Generate Chart of Accounts and update statistics.
4032    fn phase_chart_of_accounts(
4033        &mut self,
4034        stats: &mut EnhancedGenerationStatistics,
4035    ) -> SynthResult<Arc<ChartOfAccounts>> {
4036        info!("Phase 1: Generating Chart of Accounts");
4037        let coa = self.generate_coa()?;
4038        stats.accounts_count = coa.account_count();
4039        info!(
4040            "Chart of Accounts generated: {} accounts",
4041            stats.accounts_count
4042        );
4043        self.check_resources_with_log("post-coa")?;
4044        Ok(coa)
4045    }
4046
4047    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
4048    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4049        if self.phase_config.generate_master_data {
4050            info!("Phase 2: Generating Master Data");
4051            self.generate_master_data()?;
4052            stats.vendor_count = self.master_data.vendors.len();
4053            stats.customer_count = self.master_data.customers.len();
4054            stats.material_count = self.master_data.materials.len();
4055            stats.asset_count = self.master_data.assets.len();
4056            stats.employee_count = self.master_data.employees.len();
4057            info!(
4058                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4059                stats.vendor_count, stats.customer_count, stats.material_count,
4060                stats.asset_count, stats.employee_count
4061            );
4062            self.check_resources_with_log("post-master-data")?;
4063        } else {
4064            debug!("Phase 2: Skipped (master data generation disabled)");
4065        }
4066        Ok(())
4067    }
4068
4069    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
4070    fn phase_document_flows(
4071        &mut self,
4072        stats: &mut EnhancedGenerationStatistics,
4073    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4074        let mut document_flows = DocumentFlowSnapshot::default();
4075        let mut subledger = SubledgerSnapshot::default();
4076        // Dunning JEs (interest + charges) accumulated here and merged into the
4077        // main FA-JE list below so they appear in the GL.
4078        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4079
4080        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4081            info!("Phase 3: Generating Document Flows");
4082            self.generate_document_flows(&mut document_flows)?;
4083            stats.p2p_chain_count = document_flows.p2p_chains.len();
4084            stats.o2c_chain_count = document_flows.o2c_chains.len();
4085            info!(
4086                "Document flows generated: {} P2P chains, {} O2C chains",
4087                stats.p2p_chain_count, stats.o2c_chain_count
4088            );
4089
4090            // Phase 3b: Link document flows to subledgers (for data coherence)
4091            debug!("Phase 3b: Linking document flows to subledgers");
4092            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4093            stats.ap_invoice_count = subledger.ap_invoices.len();
4094            stats.ar_invoice_count = subledger.ar_invoices.len();
4095            debug!(
4096                "Subledgers linked: {} AP invoices, {} AR invoices",
4097                stats.ap_invoice_count, stats.ar_invoice_count
4098            );
4099
4100            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
4101            // Without this step the subledger is systematically overstated because
4102            // amount_remaining is set at invoice creation and never reduced by
4103            // the payments that were generated in the document-flow phase.
4104            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4105            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4106            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4107            debug!("Payment settlements applied to AP and AR subledgers");
4108
4109            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
4110            // The as-of date is the last day of the configured period.
4111            if let Ok(start_date) =
4112                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4113            {
4114                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4115                    - chrono::Days::new(1);
4116                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4117                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4118                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4119                // derived from JE-level aggregation and will typically differ. This is a known
4120                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4121                // generated independently. A future reconciliation phase should align them by
4122                // using subledger totals as the authoritative source for BS Receivables.
4123                for company in &self.config.companies {
4124                    let ar_report = ARAgingReport::from_invoices(
4125                        company.code.clone(),
4126                        &subledger.ar_invoices,
4127                        as_of_date,
4128                    );
4129                    subledger.ar_aging_reports.push(ar_report);
4130
4131                    let ap_report = APAgingReport::from_invoices(
4132                        company.code.clone(),
4133                        &subledger.ap_invoices,
4134                        as_of_date,
4135                    );
4136                    subledger.ap_aging_reports.push(ap_report);
4137                }
4138                debug!(
4139                    "AR/AP aging reports built: {} AR, {} AP",
4140                    subledger.ar_aging_reports.len(),
4141                    subledger.ap_aging_reports.len()
4142                );
4143
4144                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4145                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4146                {
4147                    use datasynth_generators::DunningGenerator;
4148                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4149                    for company in &self.config.companies {
4150                        let currency = company.currency.as_str();
4151                        // Collect mutable references to AR invoices for this company
4152                        // (dunning generator updates dunning_info on invoices in-place).
4153                        let mut company_invoices: Vec<
4154                            datasynth_core::models::subledger::ar::ARInvoice,
4155                        > = subledger
4156                            .ar_invoices
4157                            .iter()
4158                            .filter(|inv| inv.company_code == company.code)
4159                            .cloned()
4160                            .collect();
4161
4162                        if company_invoices.is_empty() {
4163                            continue;
4164                        }
4165
4166                        let result = dunning_gen.execute_dunning_run(
4167                            &company.code,
4168                            as_of_date,
4169                            &mut company_invoices,
4170                            currency,
4171                        );
4172
4173                        // Write back updated dunning info to the main AR invoice list
4174                        for updated in &company_invoices {
4175                            if let Some(orig) = subledger
4176                                .ar_invoices
4177                                .iter_mut()
4178                                .find(|i| i.invoice_number == updated.invoice_number)
4179                            {
4180                                orig.dunning_info = updated.dunning_info.clone();
4181                            }
4182                        }
4183
4184                        subledger.dunning_runs.push(result.dunning_run);
4185                        subledger.dunning_letters.extend(result.letters);
4186                        // Dunning JEs (interest + charges) collected into local buffer.
4187                        dunning_journal_entries.extend(result.journal_entries);
4188                    }
4189                    debug!(
4190                        "Dunning runs complete: {} runs, {} letters",
4191                        subledger.dunning_runs.len(),
4192                        subledger.dunning_letters.len()
4193                    );
4194                }
4195            }
4196
4197            self.check_resources_with_log("post-document-flows")?;
4198        } else {
4199            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4200        }
4201
4202        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4203        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4204        if !self.master_data.assets.is_empty() {
4205            debug!("Generating FA subledger records");
4206            let company_code = self
4207                .config
4208                .companies
4209                .first()
4210                .map(|c| c.code.as_str())
4211                .unwrap_or("1000");
4212            let currency = self
4213                .config
4214                .companies
4215                .first()
4216                .map(|c| c.currency.as_str())
4217                .unwrap_or("USD");
4218
4219            let mut fa_gen = datasynth_generators::FAGenerator::new(
4220                datasynth_generators::FAGeneratorConfig::default(),
4221                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4222            );
4223
4224            for asset in &self.master_data.assets {
4225                let (record, je) = fa_gen.generate_asset_acquisition(
4226                    company_code,
4227                    &format!("{:?}", asset.asset_class),
4228                    &asset.description,
4229                    asset.acquisition_date,
4230                    currency,
4231                    asset.cost_center.as_deref(),
4232                );
4233                subledger.fa_records.push(record);
4234                fa_journal_entries.push(je);
4235            }
4236
4237            stats.fa_subledger_count = subledger.fa_records.len();
4238            debug!(
4239                "FA subledger records generated: {} (with {} acquisition JEs)",
4240                stats.fa_subledger_count,
4241                fa_journal_entries.len()
4242            );
4243        }
4244
4245        // Generate Inventory subledger records from master data materials
4246        if !self.master_data.materials.is_empty() {
4247            debug!("Generating Inventory subledger records");
4248            let first_company = self.config.companies.first();
4249            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4250            let inv_currency = first_company
4251                .map(|c| c.currency.clone())
4252                .unwrap_or_else(|| "USD".to_string());
4253
4254            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4255                datasynth_generators::InventoryGeneratorConfig::default(),
4256                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4257                inv_currency.clone(),
4258            );
4259
4260            for (i, material) in self.master_data.materials.iter().enumerate() {
4261                let plant = format!("PLANT{:02}", (i % 3) + 1);
4262                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4263                let initial_qty = rust_decimal::Decimal::from(
4264                    material
4265                        .safety_stock
4266                        .to_string()
4267                        .parse::<i64>()
4268                        .unwrap_or(100),
4269                );
4270
4271                let position = inv_gen.generate_position(
4272                    company_code,
4273                    &plant,
4274                    &storage_loc,
4275                    &material.material_id,
4276                    &material.description,
4277                    initial_qty,
4278                    Some(material.standard_cost),
4279                    &inv_currency,
4280                );
4281                subledger.inventory_positions.push(position);
4282            }
4283
4284            stats.inventory_subledger_count = subledger.inventory_positions.len();
4285            debug!(
4286                "Inventory subledger records generated: {}",
4287                stats.inventory_subledger_count
4288            );
4289        }
4290
4291        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4292        if !subledger.fa_records.is_empty() {
4293            if let Ok(start_date) =
4294                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4295            {
4296                let company_code = self
4297                    .config
4298                    .companies
4299                    .first()
4300                    .map(|c| c.code.as_str())
4301                    .unwrap_or("1000");
4302                let fiscal_year = start_date.year();
4303                let start_period = start_date.month();
4304                let end_period =
4305                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4306
4307                let depr_cfg = FaDepreciationScheduleConfig {
4308                    fiscal_year,
4309                    start_period,
4310                    end_period,
4311                    seed_offset: 800,
4312                };
4313                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4314                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4315                let run_count = runs.len();
4316                subledger.depreciation_runs = runs;
4317                debug!(
4318                    "Depreciation runs generated: {} runs for {} periods",
4319                    run_count, self.config.global.period_months
4320                );
4321            }
4322        }
4323
4324        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4325        if !subledger.inventory_positions.is_empty() {
4326            if let Ok(start_date) =
4327                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4328            {
4329                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4330                    - chrono::Days::new(1);
4331
4332                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4333                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4334
4335                for company in &self.config.companies {
4336                    let result = inv_val_gen.generate(
4337                        &company.code,
4338                        &subledger.inventory_positions,
4339                        as_of_date,
4340                    );
4341                    subledger.inventory_valuations.push(result);
4342                }
4343                debug!(
4344                    "Inventory valuations generated: {} company reports",
4345                    subledger.inventory_valuations.len()
4346                );
4347            }
4348        }
4349
4350        Ok((document_flows, subledger, fa_journal_entries))
4351    }
4352
4353    /// Phase 3c: Generate OCPM events from document flows.
4354    #[allow(clippy::too_many_arguments)]
4355    fn phase_ocpm_events(
4356        &mut self,
4357        document_flows: &DocumentFlowSnapshot,
4358        sourcing: &SourcingSnapshot,
4359        hr: &HrSnapshot,
4360        manufacturing: &ManufacturingSnapshot,
4361        banking: &BankingSnapshot,
4362        audit: &AuditSnapshot,
4363        financial_reporting: &FinancialReportingSnapshot,
4364        stats: &mut EnhancedGenerationStatistics,
4365    ) -> SynthResult<OcpmSnapshot> {
4366        let degradation = self.check_resources()?;
4367        if degradation >= DegradationLevel::Reduced {
4368            debug!(
4369                "Phase skipped due to resource pressure (degradation: {:?})",
4370                degradation
4371            );
4372            return Ok(OcpmSnapshot::default());
4373        }
4374        if self.phase_config.generate_ocpm_events {
4375            info!("Phase 3c: Generating OCPM Events");
4376            let ocpm_snapshot = self.generate_ocpm_events(
4377                document_flows,
4378                sourcing,
4379                hr,
4380                manufacturing,
4381                banking,
4382                audit,
4383                financial_reporting,
4384            )?;
4385            stats.ocpm_event_count = ocpm_snapshot.event_count;
4386            stats.ocpm_object_count = ocpm_snapshot.object_count;
4387            stats.ocpm_case_count = ocpm_snapshot.case_count;
4388            info!(
4389                "OCPM events generated: {} events, {} objects, {} cases",
4390                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4391            );
4392            self.check_resources_with_log("post-ocpm")?;
4393            Ok(ocpm_snapshot)
4394        } else {
4395            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4396            Ok(OcpmSnapshot::default())
4397        }
4398    }
4399
4400    /// Phase 4: Generate journal entries from document flows and standalone generation.
4401    fn phase_journal_entries(
4402        &mut self,
4403        coa: &Arc<ChartOfAccounts>,
4404        document_flows: &DocumentFlowSnapshot,
4405        _stats: &mut EnhancedGenerationStatistics,
4406    ) -> SynthResult<Vec<JournalEntry>> {
4407        let mut entries = Vec::new();
4408
4409        // Phase 4a: Generate JEs from document flows (for data coherence)
4410        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4411            debug!("Phase 4a: Generating JEs from document flows");
4412            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4413            debug!("Generated {} JEs from document flows", flow_entries.len());
4414            entries.extend(flow_entries);
4415        }
4416
4417        // Phase 4b: Generate standalone journal entries
4418        if self.phase_config.generate_journal_entries {
4419            info!("Phase 4: Generating Journal Entries");
4420            let je_entries = self.generate_journal_entries(coa)?;
4421            info!("Generated {} standalone journal entries", je_entries.len());
4422            entries.extend(je_entries);
4423        } else {
4424            debug!("Phase 4: Skipped (journal entry generation disabled)");
4425        }
4426
4427        // Phase 4c (shard mode): inject pre-built IC journal entries from
4428        // `ShardContext`. When running standalone (no group engine), this
4429        // is a no-op. See crate::shard_context::ShardContext for rationale.
4430        if let Some(ctx) = &self.shard_context {
4431            if !ctx.extra_journal_entries.is_empty() {
4432                debug!(
4433                    "Phase 4c: appending {} shard-mode IC journal entries",
4434                    ctx.extra_journal_entries.len()
4435                );
4436                entries.extend(ctx.extra_journal_entries.iter().cloned());
4437            }
4438        }
4439
4440        if !entries.is_empty() {
4441            // Note: stats.total_entries/total_line_items are set in generate()
4442            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4443            self.check_resources_with_log("post-journal-entries")?;
4444        }
4445
4446        Ok(entries)
4447    }
4448
4449    /// Phase 5: Inject anomalies into journal entries.
4450    fn phase_anomaly_injection(
4451        &mut self,
4452        entries: &mut [JournalEntry],
4453        actions: &DegradationActions,
4454        stats: &mut EnhancedGenerationStatistics,
4455    ) -> SynthResult<AnomalyLabels> {
4456        if self.phase_config.inject_anomalies
4457            && !entries.is_empty()
4458            && !actions.skip_anomaly_injection
4459        {
4460            info!("Phase 5: Injecting Anomalies");
4461            let result = self.inject_anomalies(entries)?;
4462            stats.anomalies_injected = result.labels.len();
4463            info!("Injected {} anomalies", stats.anomalies_injected);
4464            self.check_resources_with_log("post-anomaly-injection")?;
4465            Ok(result)
4466        } else if actions.skip_anomaly_injection {
4467            warn!("Phase 5: Skipped due to resource degradation");
4468            Ok(AnomalyLabels::default())
4469        } else {
4470            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4471            Ok(AnomalyLabels::default())
4472        }
4473    }
4474
4475    /// Phase 8d (W8.1): TB drift-correction pass.
4476    ///
4477    /// Builds a `RunningBalanceTracker` over all JEs assembled so far, attaches
4478    /// the TB anchor prior (when available), and — if `drift_correction_needed()`
4479    /// fires for any company — emits one balanced "SA" adjustment JE per company
4480    /// to pull the synthetic balances toward the corpus-median targets.
4481    ///
4482    /// No-op when no TB anchor is loaded (backwards-compatible).
4483    fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4484        // Only proceed when priors with a TB anchor are loaded.
4485        let tb_anchor = match &self.cached_priors {
4486            Some(priors) => match &priors.tb_anchor {
4487                Some(anchor) => anchor.clone(),
4488                None => return Ok(()),
4489            },
4490            None => return Ok(()),
4491        };
4492
4493        if !tb_anchor.has_data() {
4494            return Ok(());
4495        }
4496
4497        tracing::info!(
4498            target: "datasynth_runtime::tb_anchor",
4499            accounts = tb_anchor.per_account.len(),
4500            total_assets = tb_anchor.total_assets,
4501            "W8.1 — TB anchor loaded; running drift-correction pass"
4502        );
4503
4504        // Build a tracker over all current JEs.
4505        let tracker_config = BalanceTrackerConfig {
4506            validate_on_each_entry: false,
4507            track_history: false,
4508            fail_on_validation_error: false,
4509            ..Default::default()
4510        };
4511        let currency = self
4512            .config
4513            .companies
4514            .first()
4515            .map(|c| c.currency.clone())
4516            .unwrap_or_else(|| "USD".to_string());
4517
4518        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4519        tracker.set_tb_anchor(tb_anchor.clone());
4520        let _ = tracker.apply_entries(entries);
4521
4522        // SP5.1 — Diagnostic: log the number of accounts being tracked vs in the
4523        // anchor, plus the top-5 most-drifted accounts for each company so we
4524        // can distinguish "no drift" from "drift below threshold" at a glance.
4525        for company in &self.config.companies {
4526            let code = &company.code;
4527            let drifts = tracker.account_drift(code);
4528            let mut sorted_drifts = drifts.clone();
4529            sorted_drifts.sort_by(|a, b| {
4530                b.1.abs()
4531                    .partial_cmp(&a.1.abs())
4532                    .unwrap_or(std::cmp::Ordering::Equal)
4533            });
4534            let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4535            let correction_needed = tracker.drift_correction_needed(code);
4536            tracing::info!(
4537                target: "datasynth_runtime::tb_anchor",
4538                company = %code,
4539                anchor_accounts = tb_anchor.per_account.len(),
4540                tracked_accounts = drifts.len(),
4541                aggregate_drift = aggregate_drift,
4542                correction_needed = correction_needed,
4543                "W8.1 SP5.1 — per-company drift summary before correction"
4544            );
4545            for (acc, drift) in sorted_drifts.iter().take(5) {
4546                tracing::info!(
4547                    target: "datasynth_runtime::tb_anchor",
4548                    company = %code,
4549                    account = %acc,
4550                    drift = drift,
4551                    "W8.1 SP5.1 — top-5 drifted accounts"
4552                );
4553            }
4554        }
4555
4556        // Derive the posting date: use the last day of the simulation period.
4557        let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4558            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4559            .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4560
4561        // Distinct seed offset so drift-correction draws are independent of other phases.
4562        use rand_chacha::rand_core::SeedableRng as _;
4563        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4564
4565        let mut correction_count = 0usize;
4566        for company in &self.config.companies {
4567            let code = &company.code;
4568            if !tracker.drift_correction_needed(code) {
4569                tracing::debug!(
4570                    target: "datasynth_runtime::tb_anchor",
4571                    company = %code,
4572                    "W8.1 — drift_correction_needed returned false; skipping company"
4573                );
4574                continue;
4575            }
4576            if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4577                tracing::debug!(
4578                    target: "datasynth_runtime::tb_anchor",
4579                    company = %code,
4580                    lines = je.lines.len(),
4581                    debit = %je.total_debit(),
4582                    credit = %je.total_credit(),
4583                    "W8.1 — emitting drift-correction JE"
4584                );
4585                // Apply the correction to the tracker so the running state is current.
4586                let _ = tracker.apply_entry(&je);
4587                entries.push(je);
4588                correction_count += 1;
4589            }
4590        }
4591
4592        if correction_count > 0 {
4593            tracing::info!(
4594                target: "datasynth_runtime::tb_anchor",
4595                correction_count,
4596                "W8.1 — drift-correction pass emitted {} JE(s)",
4597                correction_count
4598            );
4599        } else {
4600            tracing::debug!(
4601                target: "datasynth_runtime::tb_anchor",
4602                "W8.1 — drift-correction pass: no corrections needed"
4603            );
4604        }
4605
4606        Ok(())
4607    }
4608
4609    /// Phase 6: Validate balance sheet equation on journal entries.
4610    fn phase_balance_validation(
4611        &mut self,
4612        entries: &[JournalEntry],
4613    ) -> SynthResult<BalanceValidationResult> {
4614        if self.phase_config.validate_balances && !entries.is_empty() {
4615            debug!("Phase 6: Validating Balances");
4616            let balance_validation = self.validate_journal_entries(entries)?;
4617            if balance_validation.is_balanced {
4618                debug!("Balance validation passed");
4619            } else {
4620                warn!(
4621                    "Balance validation found {} errors",
4622                    balance_validation.validation_errors.len()
4623                );
4624            }
4625            Ok(balance_validation)
4626        } else {
4627            Ok(BalanceValidationResult::default())
4628        }
4629    }
4630
4631    /// Validate that every `gl_account` referenced in `entries` exists in the
4632    /// chart of accounts.
4633    ///
4634    /// Always emits a warn-level log when the COA is missing accounts; in
4635    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4636    /// `SynthError::generation` so the caller can fail fast.
4637    fn validate_coa_coverage(
4638        &self,
4639        entries: &[JournalEntry],
4640        coa: &ChartOfAccounts,
4641    ) -> SynthResult<()> {
4642        if entries.is_empty() {
4643            return Ok(());
4644        }
4645        let coa_set: std::collections::HashSet<&str> = coa
4646            .accounts
4647            .iter()
4648            .map(|a| a.account_number.as_str())
4649            .collect();
4650        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4651        for je in entries {
4652            for line in je.lines.iter() {
4653                if !coa_set.contains(line.gl_account.as_str()) {
4654                    missing.insert(line.gl_account.clone());
4655                }
4656            }
4657        }
4658        if missing.is_empty() {
4659            debug!("COA coverage validation passed");
4660            return Ok(());
4661        }
4662        let msg = format!(
4663            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4664            missing.len(),
4665            missing.iter().take(10).collect::<Vec<_>>()
4666        );
4667        if self.phase_config.validate_coa_coverage_strict {
4668            Err(SynthError::generation(msg))
4669        } else {
4670            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4671            Ok(())
4672        }
4673    }
4674
4675    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4676    fn phase_data_quality_injection(
4677        &mut self,
4678        entries: &mut [JournalEntry],
4679        actions: &DegradationActions,
4680        stats: &mut EnhancedGenerationStatistics,
4681    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4682        if self.phase_config.inject_data_quality
4683            && !entries.is_empty()
4684            && !actions.skip_data_quality
4685        {
4686            info!("Phase 7: Injecting Data Quality Variations");
4687            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4688            stats.data_quality_issues = dq_stats.records_with_issues;
4689            info!("Injected {} data quality issues", stats.data_quality_issues);
4690            self.check_resources_with_log("post-data-quality")?;
4691            Ok((dq_stats, quality_issues))
4692        } else if actions.skip_data_quality {
4693            warn!("Phase 7: Skipped due to resource degradation");
4694            // v4.4.1: report the denominator (entries seen) even when
4695            // injection is skipped, so downstream consumers can tell
4696            // "skipped, 0/N" apart from "ran but found nothing".
4697            Ok((stats_with_denominator(entries.len()), Vec::new()))
4698        } else {
4699            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4700            Ok((stats_with_denominator(entries.len()), Vec::new()))
4701        }
4702    }
4703
4704    /// Phase 10b: Generate period-close journal entries.
4705    ///
4706    /// Generates:
4707    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4708    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4709    ///    for the configured period.
4710    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4711    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4712    ///    earnings via the Income Summary (3600) clearing account.
4713    fn phase_period_close(
4714        &mut self,
4715        entries: &mut Vec<JournalEntry>,
4716        subledger: &SubledgerSnapshot,
4717        stats: &mut EnhancedGenerationStatistics,
4718    ) -> SynthResult<()> {
4719        if !self.phase_config.generate_period_close || entries.is_empty() {
4720            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4721            return Ok(());
4722        }
4723
4724        info!("Phase 10b: Generating period-close journal entries");
4725
4726        use datasynth_core::accounts::{
4727            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4728        };
4729        use rust_decimal::Decimal;
4730
4731        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4732            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4733        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4734        // Posting date for close entries is the last day of the period
4735        let close_date = end_date - chrono::Days::new(1);
4736
4737        // Statutory tax rate (21% — configurable rates come in later tiers)
4738        let tax_rate = Decimal::new(21, 2); // 0.21
4739
4740        // Collect company codes from config
4741        let company_codes: Vec<String> = self
4742            .config
4743            .companies
4744            .iter()
4745            .map(|c| c.code.clone())
4746            .collect();
4747
4748        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4749        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4750        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4751
4752        // --- Depreciation JEs (per asset) ---
4753        // Compute period depreciation for each active fixed asset using straight-line method.
4754        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4755        let period_months = self.config.global.period_months;
4756        for asset in &subledger.fa_records {
4757            // Skip assets that are inactive / fully depreciated / non-depreciable
4758            use datasynth_core::models::subledger::fa::AssetStatus;
4759            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4760                continue;
4761            }
4762            let useful_life_months = asset.useful_life_months();
4763            if useful_life_months == 0 {
4764                // Land or CIP — not depreciated
4765                continue;
4766            }
4767            let salvage_value = asset.salvage_value();
4768            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4769            if depreciable_base == Decimal::ZERO {
4770                continue;
4771            }
4772            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4773                * Decimal::from(period_months))
4774            .round_dp(2);
4775            if period_depr <= Decimal::ZERO {
4776                continue;
4777            }
4778
4779            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4780            depr_header.document_type = "CL".to_string();
4781            depr_header.header_text = Some(format!(
4782                "Depreciation - {} {}",
4783                asset.asset_number, asset.description
4784            ));
4785            depr_header.created_by = "CLOSE_ENGINE".to_string();
4786            depr_header.source = TransactionSource::Automated;
4787            depr_header.business_process = Some(BusinessProcess::R2R);
4788
4789            let doc_id = depr_header.document_id;
4790            let mut depr_je = JournalEntry::new(depr_header);
4791
4792            // DR Depreciation Expense (6000)
4793            depr_je.add_line(JournalEntryLine::debit(
4794                doc_id,
4795                1,
4796                expense_accounts::DEPRECIATION.to_string(),
4797                period_depr,
4798            ));
4799            // CR Accumulated Depreciation (1510)
4800            depr_je.add_line(JournalEntryLine::credit(
4801                doc_id,
4802                2,
4803                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4804                period_depr,
4805            ));
4806
4807            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4808            close_jes.push(depr_je);
4809        }
4810
4811        if !subledger.fa_records.is_empty() {
4812            debug!(
4813                "Generated {} depreciation JEs from {} FA records",
4814                close_jes.len(),
4815                subledger.fa_records.len()
4816            );
4817        }
4818
4819        // --- Accrual entries (standard period-end accruals per company) ---
4820        // Generate standard accrued expense entries (utilities, rent, interest) using
4821        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4822        {
4823            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4824            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4825            // v3.4.3: snap reversal dates to business days. No-op when
4826            // temporal_patterns.business_days is disabled.
4827            if let Some(ctx) = &self.temporal_context {
4828                accrual_gen.set_temporal_context(Arc::clone(ctx));
4829            }
4830
4831            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4832            let accrual_items: &[(&str, &str, &str)] = &[
4833                ("Accrued Utilities", "6200", "2100"),
4834                ("Accrued Rent", "6300", "2100"),
4835                ("Accrued Interest", "6100", "2150"),
4836            ];
4837
4838            for company_code in &company_codes {
4839                // Estimate company revenue from existing JEs
4840                let company_revenue: Decimal = entries
4841                    .iter()
4842                    .filter(|e| e.header.company_code == *company_code)
4843                    .flat_map(|e| e.lines.iter())
4844                    .filter(|l| l.gl_account.starts_with('4'))
4845                    .map(|l| l.credit_amount - l.debit_amount)
4846                    .fold(Decimal::ZERO, |acc, v| acc + v);
4847
4848                if company_revenue <= Decimal::ZERO {
4849                    continue;
4850                }
4851
4852                // Use 0.5% of period revenue per accrual item as a proxy
4853                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4854                if accrual_base <= Decimal::ZERO {
4855                    continue;
4856                }
4857
4858                for (description, expense_acct, liability_acct) in accrual_items {
4859                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4860                        company_code,
4861                        description,
4862                        accrual_base,
4863                        expense_acct,
4864                        liability_acct,
4865                        close_date,
4866                        None,
4867                    );
4868                    close_jes.push(accrual_je);
4869                    if let Some(rev_je) = reversal_je {
4870                        close_jes.push(rev_je);
4871                    }
4872                }
4873            }
4874
4875            debug!(
4876                "Generated accrual entries for {} companies",
4877                company_codes.len()
4878            );
4879        }
4880
4881        for company_code in &company_codes {
4882            // Calculate net income for this company from existing JEs:
4883            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4884            // Revenue (4xxx): credit-normal, so net = credits - debits
4885            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4886            let mut total_revenue = Decimal::ZERO;
4887            let mut total_expenses = Decimal::ZERO;
4888
4889            for entry in entries.iter() {
4890                if entry.header.company_code != *company_code {
4891                    continue;
4892                }
4893                for line in &entry.lines {
4894                    let category = AccountCategory::from_account(&line.gl_account);
4895                    match category {
4896                        AccountCategory::Revenue => {
4897                            // Revenue is credit-normal: net revenue = credits - debits
4898                            total_revenue += line.credit_amount - line.debit_amount;
4899                        }
4900                        AccountCategory::Cogs
4901                        | AccountCategory::OperatingExpense
4902                        | AccountCategory::OtherIncomeExpense
4903                        | AccountCategory::Tax => {
4904                            // Expenses are debit-normal: net expense = debits - credits
4905                            total_expenses += line.debit_amount - line.credit_amount;
4906                        }
4907                        _ => {}
4908                    }
4909                }
4910            }
4911
4912            let pre_tax_income = total_revenue - total_expenses;
4913
4914            // Skip if no income statement activity
4915            if pre_tax_income == Decimal::ZERO {
4916                debug!(
4917                    "Company {}: no pre-tax income, skipping period close",
4918                    company_code
4919                );
4920                continue;
4921            }
4922
4923            // --- Tax provision / DTA JE ---
4924            if pre_tax_income > Decimal::ZERO {
4925                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4926                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4927
4928                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4929                tax_header.document_type = "CL".to_string();
4930                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4931                tax_header.created_by = "CLOSE_ENGINE".to_string();
4932                tax_header.source = TransactionSource::Automated;
4933                tax_header.business_process = Some(BusinessProcess::R2R);
4934
4935                let doc_id = tax_header.document_id;
4936                let mut tax_je = JournalEntry::new(tax_header);
4937
4938                // DR Tax Expense (8000)
4939                tax_je.add_line(JournalEntryLine::debit(
4940                    doc_id,
4941                    1,
4942                    tax_accounts::TAX_EXPENSE.to_string(),
4943                    tax_amount,
4944                ));
4945                // CR Income Tax Payable (2130)
4946                tax_je.add_line(JournalEntryLine::credit(
4947                    doc_id,
4948                    2,
4949                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4950                    tax_amount,
4951                ));
4952
4953                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4954                close_jes.push(tax_je);
4955            } else {
4956                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4957                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4958                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4959                if dta_amount > Decimal::ZERO {
4960                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4961                    dta_header.document_type = "CL".to_string();
4962                    dta_header.header_text =
4963                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4964                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4965                    dta_header.source = TransactionSource::Automated;
4966                    dta_header.business_process = Some(BusinessProcess::R2R);
4967
4968                    let doc_id = dta_header.document_id;
4969                    let mut dta_je = JournalEntry::new(dta_header);
4970
4971                    // DR Deferred Tax Asset (1600)
4972                    dta_je.add_line(JournalEntryLine::debit(
4973                        doc_id,
4974                        1,
4975                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4976                        dta_amount,
4977                    ));
4978                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4979                    // reflecting the benefit of the future deductible temporary difference.
4980                    dta_je.add_line(JournalEntryLine::credit(
4981                        doc_id,
4982                        2,
4983                        tax_accounts::TAX_EXPENSE.to_string(),
4984                        dta_amount,
4985                    ));
4986
4987                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4988                    close_jes.push(dta_je);
4989                    debug!(
4990                        "Company {}: loss year — recognised DTA of {}",
4991                        company_code, dta_amount
4992                    );
4993                }
4994            }
4995
4996            // --- Dividend JEs (v2.4) ---
4997            // If the entity is profitable after tax, declare a 10% dividend payout.
4998            // This runs AFTER tax provision so the dividend is based on post-tax income
4999            // but BEFORE the retained earnings close so the RE transfer reflects the
5000            // reduced balance.
5001            let tax_provision = if pre_tax_income > Decimal::ZERO {
5002                (pre_tax_income * tax_rate).round_dp(2)
5003            } else {
5004                Decimal::ZERO
5005            };
5006            let net_income = pre_tax_income - tax_provision;
5007
5008            if net_income > Decimal::ZERO {
5009                use datasynth_generators::DividendGenerator;
5010                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
5011                let mut div_gen = DividendGenerator::new(self.seed + 460);
5012                let currency_str = self
5013                    .config
5014                    .companies
5015                    .iter()
5016                    .find(|c| c.code == *company_code)
5017                    .map(|c| c.currency.as_str())
5018                    .unwrap_or("USD");
5019                let div_result = div_gen.generate(
5020                    company_code,
5021                    close_date,
5022                    Decimal::new(1, 0), // $1 per share placeholder
5023                    dividend_amount,
5024                    currency_str,
5025                );
5026                let div_je_count = div_result.journal_entries.len();
5027                close_jes.extend(div_result.journal_entries);
5028                debug!(
5029                    "Company {}: declared dividend of {} ({} JEs)",
5030                    company_code, dividend_amount, div_je_count
5031                );
5032            }
5033
5034            // --- Income statement closing JE ---
5035            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
5036            // For a loss year the DTA JE above already recognises the deferred benefit; here we
5037            // close the pre-tax loss into Retained Earnings as-is.
5038            if net_income != Decimal::ZERO {
5039                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5040                close_header.document_type = "CL".to_string();
5041                close_header.header_text =
5042                    Some(format!("Income statement close - {}", company_code));
5043                close_header.created_by = "CLOSE_ENGINE".to_string();
5044                close_header.source = TransactionSource::Automated;
5045                close_header.business_process = Some(BusinessProcess::R2R);
5046
5047                let doc_id = close_header.document_id;
5048                let mut close_je = JournalEntry::new(close_header);
5049
5050                let abs_net_income = net_income.abs();
5051
5052                if net_income > Decimal::ZERO {
5053                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
5054                    close_je.add_line(JournalEntryLine::debit(
5055                        doc_id,
5056                        1,
5057                        equity_accounts::INCOME_SUMMARY.to_string(),
5058                        abs_net_income,
5059                    ));
5060                    close_je.add_line(JournalEntryLine::credit(
5061                        doc_id,
5062                        2,
5063                        equity_accounts::RETAINED_EARNINGS.to_string(),
5064                        abs_net_income,
5065                    ));
5066                } else {
5067                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
5068                    close_je.add_line(JournalEntryLine::debit(
5069                        doc_id,
5070                        1,
5071                        equity_accounts::RETAINED_EARNINGS.to_string(),
5072                        abs_net_income,
5073                    ));
5074                    close_je.add_line(JournalEntryLine::credit(
5075                        doc_id,
5076                        2,
5077                        equity_accounts::INCOME_SUMMARY.to_string(),
5078                        abs_net_income,
5079                    ));
5080                }
5081
5082                debug_assert!(
5083                    close_je.is_balanced(),
5084                    "Income statement closing JE must be balanced"
5085                );
5086                close_jes.push(close_je);
5087            }
5088        }
5089
5090        let close_count = close_jes.len();
5091        if close_count > 0 {
5092            info!("Generated {} period-close journal entries", close_count);
5093            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5094            entries.extend(close_jes);
5095            stats.period_close_je_count = close_count;
5096
5097            // Update total entry/line-item stats
5098            stats.total_entries = entries.len() as u64;
5099            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5100        } else {
5101            debug!("No period-close entries generated (no income statement activity)");
5102        }
5103
5104        Ok(())
5105    }
5106
5107    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
5108    fn phase_audit_data(
5109        &mut self,
5110        entries: &[JournalEntry],
5111        stats: &mut EnhancedGenerationStatistics,
5112    ) -> SynthResult<AuditSnapshot> {
5113        if self.phase_config.generate_audit {
5114            info!("Phase 8: Generating Audit Data");
5115            let audit_snapshot = self.generate_audit_data(entries)?;
5116            stats.audit_engagement_count = audit_snapshot.engagements.len();
5117            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5118            stats.audit_evidence_count = audit_snapshot.evidence.len();
5119            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5120            stats.audit_finding_count = audit_snapshot.findings.len();
5121            stats.audit_judgment_count = audit_snapshot.judgments.len();
5122            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5123            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5124            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5125            stats.audit_sample_count = audit_snapshot.samples.len();
5126            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5127            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5128            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5129            stats.audit_related_party_count = audit_snapshot.related_parties.len();
5130            stats.audit_related_party_transaction_count =
5131                audit_snapshot.related_party_transactions.len();
5132            info!(
5133                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5134                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5135                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5136                 {} RP transactions",
5137                stats.audit_engagement_count,
5138                stats.audit_workpaper_count,
5139                stats.audit_evidence_count,
5140                stats.audit_risk_count,
5141                stats.audit_finding_count,
5142                stats.audit_judgment_count,
5143                stats.audit_confirmation_count,
5144                stats.audit_procedure_step_count,
5145                stats.audit_sample_count,
5146                stats.audit_analytical_result_count,
5147                stats.audit_ia_function_count,
5148                stats.audit_ia_report_count,
5149                stats.audit_related_party_count,
5150                stats.audit_related_party_transaction_count,
5151            );
5152            self.check_resources_with_log("post-audit")?;
5153            Ok(audit_snapshot)
5154        } else {
5155            debug!("Phase 8: Skipped (audit generation disabled)");
5156            Ok(AuditSnapshot::default())
5157        }
5158    }
5159
5160    /// Phase 9: Generate banking KYC/AML data.
5161    fn phase_banking_data(
5162        &mut self,
5163        stats: &mut EnhancedGenerationStatistics,
5164    ) -> SynthResult<BankingSnapshot> {
5165        if self.phase_config.generate_banking {
5166            info!("Phase 9: Generating Banking KYC/AML Data");
5167            let banking_snapshot = self.generate_banking_data()?;
5168            stats.banking_customer_count = banking_snapshot.customers.len();
5169            stats.banking_account_count = banking_snapshot.accounts.len();
5170            stats.banking_transaction_count = banking_snapshot.transactions.len();
5171            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5172            info!(
5173                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5174                stats.banking_customer_count, stats.banking_account_count,
5175                stats.banking_transaction_count, stats.banking_suspicious_count
5176            );
5177            self.check_resources_with_log("post-banking")?;
5178            Ok(banking_snapshot)
5179        } else {
5180            debug!("Phase 9: Skipped (banking generation disabled)");
5181            Ok(BankingSnapshot::default())
5182        }
5183    }
5184
5185    /// Phase 10: Export accounting network graphs for ML training.
5186    fn phase_graph_export(
5187        &mut self,
5188        entries: &[JournalEntry],
5189        coa: &Arc<ChartOfAccounts>,
5190        stats: &mut EnhancedGenerationStatistics,
5191    ) -> SynthResult<GraphExportSnapshot> {
5192        if self.phase_config.generate_graph_export && !entries.is_empty() {
5193            info!("Phase 10: Exporting Accounting Network Graphs");
5194            match self.export_graphs(entries, coa, stats) {
5195                Ok(snapshot) => {
5196                    info!(
5197                        "Graph export complete: {} graphs ({} nodes, {} edges)",
5198                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5199                    );
5200                    Ok(snapshot)
5201                }
5202                Err(e) => {
5203                    warn!("Phase 10: Graph export failed: {}", e);
5204                    Ok(GraphExportSnapshot::default())
5205                }
5206            }
5207        } else {
5208            debug!("Phase 10: Skipped (graph export disabled or no entries)");
5209            Ok(GraphExportSnapshot::default())
5210        }
5211    }
5212
5213    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
5214    #[allow(clippy::too_many_arguments)]
5215    fn phase_hypergraph_export(
5216        &self,
5217        coa: &Arc<ChartOfAccounts>,
5218        entries: &[JournalEntry],
5219        document_flows: &DocumentFlowSnapshot,
5220        sourcing: &SourcingSnapshot,
5221        hr: &HrSnapshot,
5222        manufacturing: &ManufacturingSnapshot,
5223        banking: &BankingSnapshot,
5224        audit: &AuditSnapshot,
5225        financial_reporting: &FinancialReportingSnapshot,
5226        ocpm: &OcpmSnapshot,
5227        compliance: &ComplianceRegulationsSnapshot,
5228        stats: &mut EnhancedGenerationStatistics,
5229    ) -> SynthResult<()> {
5230        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5231            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5232            match self.export_hypergraph(
5233                coa,
5234                entries,
5235                document_flows,
5236                sourcing,
5237                hr,
5238                manufacturing,
5239                banking,
5240                audit,
5241                financial_reporting,
5242                ocpm,
5243                compliance,
5244                stats,
5245            ) {
5246                Ok(info) => {
5247                    info!(
5248                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5249                        info.node_count, info.edge_count, info.hyperedge_count
5250                    );
5251                }
5252                Err(e) => {
5253                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5254                }
5255            }
5256        } else {
5257            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5258        }
5259        Ok(())
5260    }
5261
5262    /// Phase 11: LLM Enrichment.
5263    ///
5264    /// Uses an LLM provider (mock by default) to enrich vendor names with
5265    /// realistic, context-aware names. This phase is non-blocking: failures
5266    /// log a warning but do not stop the generation pipeline.
5267    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5268        if !self.config.llm.enabled {
5269            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5270            return;
5271        }
5272
5273        info!("Phase 11: Starting LLM Enrichment");
5274        let start = std::time::Instant::now();
5275
5276        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5277            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5278            // and the corresponding API key environment variable is present.
5279            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5280                let schema_provider = &self.config.llm.provider;
5281                let api_key_env = match schema_provider.as_str() {
5282                    "openai" => Some("OPENAI_API_KEY"),
5283                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5284                    "custom" => Some("LLM_API_KEY"),
5285                    _ => None,
5286                };
5287                if let Some(key_env) = api_key_env {
5288                    if std::env::var(key_env).is_ok() {
5289                        let llm_config = datasynth_core::llm::LlmConfig {
5290                            model: self.config.llm.model.clone(),
5291                            api_key_env: key_env.to_string(),
5292                            ..datasynth_core::llm::LlmConfig::default()
5293                        };
5294                        match HttpLlmProvider::new(llm_config) {
5295                            Ok(p) => Arc::new(p),
5296                            Err(e) => {
5297                                warn!(
5298                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5299                                    e
5300                                );
5301                                Arc::new(MockLlmProvider::new(self.seed))
5302                            }
5303                        }
5304                    } else {
5305                        Arc::new(MockLlmProvider::new(self.seed))
5306                    }
5307                } else {
5308                    Arc::new(MockLlmProvider::new(self.seed))
5309                }
5310            };
5311            // v4.1.1+: multi-category enrichment. Vendors remain the
5312            // default path; customers and materials opt in via
5313            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5314            let industry = format!("{:?}", self.config.global.industry);
5315
5316            let vendor_enricher =
5317                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5318            let max_vendors = self
5319                .config
5320                .llm
5321                .max_vendor_enrichments
5322                .min(self.master_data.vendors.len());
5323            let mut vendors_enriched = 0usize;
5324            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5325                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5326                    Ok(name) => {
5327                        vendor.name = name;
5328                        vendors_enriched += 1;
5329                    }
5330                    Err(e) => warn!(
5331                        "LLM vendor enrichment failed for {}: {}",
5332                        vendor.vendor_id, e
5333                    ),
5334                }
5335            }
5336
5337            let mut customers_enriched = 0usize;
5338            if self.config.llm.enrich_customers {
5339                let customer_enricher =
5340                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5341                        &provider,
5342                    ));
5343                let max_customers = self
5344                    .config
5345                    .llm
5346                    .max_customer_enrichments
5347                    .min(self.master_data.customers.len());
5348                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5349                    match customer_enricher.enrich_customer_name(
5350                        &industry,
5351                        "general",
5352                        &customer.country,
5353                    ) {
5354                        Ok(name) => {
5355                            customer.name = name;
5356                            customers_enriched += 1;
5357                        }
5358                        Err(e) => warn!(
5359                            "LLM customer enrichment failed for {}: {}",
5360                            customer.customer_id, e
5361                        ),
5362                    }
5363                }
5364            }
5365
5366            let mut materials_enriched = 0usize;
5367            if self.config.llm.enrich_materials {
5368                let material_enricher =
5369                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5370                        &provider,
5371                    ));
5372                let max_materials = self
5373                    .config
5374                    .llm
5375                    .max_material_enrichments
5376                    .min(self.master_data.materials.len());
5377                for material in self.master_data.materials.iter_mut().take(max_materials) {
5378                    let material_type = format!("{:?}", material.material_type);
5379                    match material_enricher.enrich_material_description(&material_type, &industry) {
5380                        Ok(desc) => {
5381                            material.description = desc;
5382                            materials_enriched += 1;
5383                        }
5384                        Err(e) => warn!(
5385                            "LLM material enrichment failed for {}: {}",
5386                            material.material_id, e
5387                        ),
5388                    }
5389                }
5390            }
5391
5392            (vendors_enriched, customers_enriched, materials_enriched)
5393        }));
5394
5395        match result {
5396            Ok((v, c, m)) => {
5397                stats.llm_vendors_enriched = v;
5398                stats.llm_customers_enriched = c;
5399                stats.llm_materials_enriched = m;
5400                let elapsed = start.elapsed();
5401                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5402                info!(
5403                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5404                    v, c, m, stats.llm_enrichment_ms
5405                );
5406            }
5407            Err(_) => {
5408                let elapsed = start.elapsed();
5409                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5410                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5411            }
5412        }
5413    }
5414
5415    /// Phase 12: Diffusion Enhancement.
5416    ///
5417    /// Generates a sample set matching distribution properties from the
5418    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5419    /// - `"statistical"` (default) — moment-matching backend, always fast.
5420    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5421    ///   the `neural` Cargo feature; falls back to statistical when the
5422    ///   feature isn't compiled in, with a loud warning.
5423    ///
5424    /// This phase is non-blocking: failures log a warning but do not
5425    /// stop the pipeline.
5426    fn phase_diffusion_enhancement(
5427        &self,
5428        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5429        stats: &mut EnhancedGenerationStatistics,
5430    ) {
5431        if !self.config.diffusion.enabled {
5432            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5433            return;
5434        }
5435
5436        info!("Phase 12: Starting Diffusion Enhancement");
5437        let start = std::time::Instant::now();
5438
5439        let backend_choice = self.config.diffusion.backend.as_str();
5440        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5441
5442        if use_neural {
5443            #[cfg(feature = "neural")]
5444            {
5445                match self.run_neural_diffusion_phase(entries) {
5446                    Ok(sample_count) => {
5447                        stats.diffusion_samples_generated = sample_count;
5448                        let elapsed = start.elapsed();
5449                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5450                        info!(
5451                            "Phase 12 complete ({}): {} samples in {}ms",
5452                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5453                        );
5454                        return;
5455                    }
5456                    Err(e) => {
5457                        warn!(
5458                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5459                        );
5460                        // Fall through to statistical path below.
5461                    }
5462                }
5463            }
5464            #[cfg(not(feature = "neural"))]
5465            {
5466                warn!(
5467                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5468                     not compiled in — falling back to statistical. Rebuild with \
5469                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5470                    backend_choice
5471                );
5472            }
5473        } else if !matches!(backend_choice, "statistical" | "") {
5474            warn!(
5475                "Phase 12: unknown backend '{}', falling back to statistical",
5476                backend_choice
5477            );
5478        }
5479
5480        // Statistical path (default + fallback).
5481        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5482            let means = vec![5000.0, 3.0, 2.0];
5483            let stds = vec![2000.0, 1.5, 1.0];
5484
5485            let diffusion_config = DiffusionConfig {
5486                n_steps: self.config.diffusion.n_steps,
5487                seed: self.seed,
5488                ..Default::default()
5489            };
5490
5491            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5492            let n_samples = self.config.diffusion.sample_size;
5493            let n_features = 3;
5494            backend.generate(n_samples, n_features, self.seed).len()
5495        }));
5496
5497        match result {
5498            Ok(sample_count) => {
5499                stats.diffusion_samples_generated = sample_count;
5500                let elapsed = start.elapsed();
5501                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5502                info!(
5503                    "Phase 12 complete (statistical): {} samples in {}ms",
5504                    sample_count, stats.diffusion_enhancement_ms
5505                );
5506            }
5507            Err(_) => {
5508                let elapsed = start.elapsed();
5509                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5510                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5511            }
5512        }
5513    }
5514
5515    /// Neural-backend execution — either load a pre-trained checkpoint
5516    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5517    /// from the first batch of JE amounts. Returns the sample count
5518    /// produced; any error bubbles up to the statistical fallback.
5519    #[cfg(feature = "neural")]
5520    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5521        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5522
5523        if entries.is_empty() {
5524            return Err(SynthError::generation(
5525                "neural diffusion: no journal entries available as training data",
5526            ));
5527        }
5528
5529        let training_data: Vec<Vec<f64>> = entries
5530            .iter()
5531            .take(5000)
5532            .map(|je| {
5533                let total_amount: f64 = je
5534                    .lines
5535                    .iter()
5536                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5537                    .map(|l| {
5538                        use rust_decimal::prelude::ToPrimitive;
5539                        l.debit_amount.to_f64().unwrap_or(0.0)
5540                    })
5541                    .sum();
5542                let line_count = je.lines.len() as f64;
5543                // Use the approval-workflow depth as the third feature
5544                // (proxy for complexity / risk). `None` → 1.
5545                let approval_level = je
5546                    .header
5547                    .approval_workflow
5548                    .as_ref()
5549                    .map(|w| w.required_levels as f64)
5550                    .unwrap_or(1.0);
5551                vec![total_amount, line_count, approval_level]
5552            })
5553            .collect();
5554
5555        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5556
5557        let cfg = &self.config.diffusion;
5558        let neural_cfg = &cfg.neural;
5559
5560        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5561            neural_cfg.checkpoint_path.as_ref()
5562        {
5563            let path = std::path::Path::new(ckpt_path);
5564            info!(
5565                "  Neural diffusion: loading checkpoint from {}",
5566                path.display()
5567            );
5568            NeuralDiffusionBackend::load(path)
5569                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5570        } else {
5571            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5572            info!(
5573                "  Neural diffusion: training score network on {} rows × {} features, \
5574                     {} epochs, hidden_dims={:?}",
5575                training_data.len(),
5576                n_features,
5577                neural_cfg.training_epochs,
5578                neural_cfg.hidden_dims
5579            );
5580            let training_config = NeuralTrainingConfig {
5581                n_steps: cfg.n_steps,
5582                schedule: cfg.schedule.clone(),
5583                hidden_dims: neural_cfg.hidden_dims.clone(),
5584                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5585                learning_rate: neural_cfg.learning_rate,
5586                epochs: neural_cfg.training_epochs,
5587                batch_size: neural_cfg.batch_size,
5588            };
5589            let (backend, report) =
5590                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5591                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5592            info!(
5593                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5594                report.epochs_completed, report.final_loss
5595            );
5596            backend
5597        };
5598
5599        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5600        Ok(samples.len())
5601    }
5602
5603    /// Phase 13: Causal Overlay.
5604    ///
5605    /// Builds a structural causal model from a built-in template (e.g.,
5606    /// fraud_detection) and generates causal samples. Optionally validates
5607    /// that the output respects the causal structure. This phase is
5608    /// non-blocking: failures log a warning but do not stop the pipeline.
5609    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5610        if !self.config.causal.enabled {
5611            debug!("Phase 13: Skipped (causal generation disabled)");
5612            return;
5613        }
5614
5615        info!("Phase 13: Starting Causal Overlay");
5616        let start = std::time::Instant::now();
5617
5618        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5619            // Select template based on config
5620            let graph = match self.config.causal.template.as_str() {
5621                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5622                _ => CausalGraph::fraud_detection_template(),
5623            };
5624
5625            let scm = StructuralCausalModel::new(graph.clone())
5626                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5627
5628            let n_samples = self.config.causal.sample_size;
5629            let samples = scm
5630                .generate(n_samples, self.seed)
5631                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5632
5633            // Optionally validate causal structure
5634            let validation_passed = if self.config.causal.validate {
5635                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5636                if report.valid {
5637                    info!(
5638                        "Causal validation passed: all {} checks OK",
5639                        report.checks.len()
5640                    );
5641                } else {
5642                    warn!(
5643                        "Causal validation: {} violations detected: {:?}",
5644                        report.violations.len(),
5645                        report.violations
5646                    );
5647                }
5648                Some(report.valid)
5649            } else {
5650                None
5651            };
5652
5653            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5654        }));
5655
5656        match result {
5657            Ok(Ok((sample_count, validation_passed))) => {
5658                stats.causal_samples_generated = sample_count;
5659                stats.causal_validation_passed = validation_passed;
5660                let elapsed = start.elapsed();
5661                stats.causal_generation_ms = elapsed.as_millis() as u64;
5662                info!(
5663                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5664                    sample_count, stats.causal_generation_ms, validation_passed,
5665                );
5666            }
5667            Ok(Err(e)) => {
5668                let elapsed = start.elapsed();
5669                stats.causal_generation_ms = elapsed.as_millis() as u64;
5670                warn!("Phase 13: Causal generation failed: {}", e);
5671            }
5672            Err(_) => {
5673                let elapsed = start.elapsed();
5674                stats.causal_generation_ms = elapsed.as_millis() as u64;
5675                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5676            }
5677        }
5678    }
5679
5680    /// Phase 14: Generate S2C sourcing data.
5681    fn phase_sourcing_data(
5682        &mut self,
5683        stats: &mut EnhancedGenerationStatistics,
5684    ) -> SynthResult<SourcingSnapshot> {
5685        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5686            debug!("Phase 14: Skipped (sourcing generation disabled)");
5687            return Ok(SourcingSnapshot::default());
5688        }
5689        let degradation = self.check_resources()?;
5690        if degradation >= DegradationLevel::Reduced {
5691            debug!(
5692                "Phase skipped due to resource pressure (degradation: {:?})",
5693                degradation
5694            );
5695            return Ok(SourcingSnapshot::default());
5696        }
5697
5698        info!("Phase 14: Generating S2C Sourcing Data");
5699        let seed = self.seed;
5700
5701        // Gather vendor data from master data
5702        let vendor_ids: Vec<String> = self
5703            .master_data
5704            .vendors
5705            .iter()
5706            .map(|v| v.vendor_id.clone())
5707            .collect();
5708        if vendor_ids.is_empty() {
5709            debug!("Phase 14: Skipped (no vendors available)");
5710            return Ok(SourcingSnapshot::default());
5711        }
5712
5713        let categories: Vec<(String, String)> = vec![
5714            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5715            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5716            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5717            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5718            ("CAT-LOG".to_string(), "Logistics".to_string()),
5719        ];
5720        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5721            .iter()
5722            .map(|(id, name)| {
5723                (
5724                    id.clone(),
5725                    name.clone(),
5726                    rust_decimal::Decimal::from(100_000),
5727                )
5728            })
5729            .collect();
5730
5731        let company_code = self
5732            .config
5733            .companies
5734            .first()
5735            .map(|c| c.code.as_str())
5736            .unwrap_or("1000");
5737        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5738            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5739        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5740        let fiscal_year = start_date.year() as u16;
5741        let owner_ids: Vec<String> = self
5742            .master_data
5743            .employees
5744            .iter()
5745            .take(5)
5746            .map(|e| e.employee_id.clone())
5747            .collect();
5748        let owner_id = owner_ids
5749            .first()
5750            .map(std::string::String::as_str)
5751            .unwrap_or("BUYER-001");
5752
5753        // Step 1: Spend Analysis
5754        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5755        let spend_analyses =
5756            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5757
5758        // Step 2: Sourcing Projects
5759        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5760        let sourcing_projects = if owner_ids.is_empty() {
5761            Vec::new()
5762        } else {
5763            project_gen.generate(
5764                company_code,
5765                &categories_with_spend,
5766                &owner_ids,
5767                start_date,
5768                self.config.global.period_months,
5769            )
5770        };
5771        stats.sourcing_project_count = sourcing_projects.len();
5772
5773        // Step 3: Qualifications
5774        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5775        let mut qual_gen = QualificationGenerator::new(seed + 2);
5776        let qualifications = qual_gen.generate(
5777            company_code,
5778            &qual_vendor_ids,
5779            sourcing_projects.first().map(|p| p.project_id.as_str()),
5780            owner_id,
5781            start_date,
5782        );
5783
5784        // Step 4: RFx Events
5785        let mut rfx_gen = RfxGenerator::new(seed + 3);
5786        let rfx_events: Vec<RfxEvent> = sourcing_projects
5787            .iter()
5788            .map(|proj| {
5789                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5790                rfx_gen.generate(
5791                    company_code,
5792                    &proj.project_id,
5793                    &proj.category_id,
5794                    &qualified_vids,
5795                    owner_id,
5796                    start_date,
5797                    50000.0,
5798                )
5799            })
5800            .collect();
5801        stats.rfx_event_count = rfx_events.len();
5802
5803        // Step 5: Bids
5804        let mut bid_gen = BidGenerator::new(seed + 4);
5805        let mut all_bids = Vec::new();
5806        for rfx in &rfx_events {
5807            let bidder_count = vendor_ids.len().clamp(2, 5);
5808            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5809            let bids = bid_gen.generate(rfx, &responding, start_date);
5810            all_bids.extend(bids);
5811        }
5812        stats.bid_count = all_bids.len();
5813
5814        // Step 6: Bid Evaluations
5815        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5816        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5817            .iter()
5818            .map(|rfx| {
5819                let rfx_bids: Vec<SupplierBid> = all_bids
5820                    .iter()
5821                    .filter(|b| b.rfx_id == rfx.rfx_id)
5822                    .cloned()
5823                    .collect();
5824                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5825            })
5826            .collect();
5827
5828        // Step 7: Contracts from winning bids
5829        let mut contract_gen = ContractGenerator::new(seed + 6);
5830        let contracts: Vec<ProcurementContract> = bid_evaluations
5831            .iter()
5832            .zip(rfx_events.iter())
5833            .filter_map(|(eval, rfx)| {
5834                eval.ranked_bids.first().and_then(|winner| {
5835                    all_bids
5836                        .iter()
5837                        .find(|b| b.bid_id == winner.bid_id)
5838                        .map(|winning_bid| {
5839                            contract_gen.generate_from_bid(
5840                                winning_bid,
5841                                Some(&rfx.sourcing_project_id),
5842                                &rfx.category_id,
5843                                owner_id,
5844                                start_date,
5845                            )
5846                        })
5847                })
5848            })
5849            .collect();
5850        stats.contract_count = contracts.len();
5851
5852        // Step 8: Catalog Items
5853        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5854        let catalog_items = catalog_gen.generate(&contracts);
5855        stats.catalog_item_count = catalog_items.len();
5856
5857        // Step 9: Scorecards
5858        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5859        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5860            .iter()
5861            .fold(
5862                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5863                |mut acc, c| {
5864                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5865                    acc
5866                },
5867            )
5868            .into_iter()
5869            .collect();
5870        let scorecards = scorecard_gen.generate(
5871            company_code,
5872            &vendor_contracts,
5873            start_date,
5874            end_date,
5875            owner_id,
5876        );
5877        stats.scorecard_count = scorecards.len();
5878
5879        // Back-populate cross-references on sourcing projects (Task 35)
5880        // Link each project to its RFx events, contracts, and spend analyses
5881        let mut sourcing_projects = sourcing_projects;
5882        for project in &mut sourcing_projects {
5883            // Link RFx events generated for this project
5884            project.rfx_ids = rfx_events
5885                .iter()
5886                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5887                .map(|rfx| rfx.rfx_id.clone())
5888                .collect();
5889
5890            // Link contract awarded from this project's RFx
5891            project.contract_id = contracts
5892                .iter()
5893                .find(|c| {
5894                    c.sourcing_project_id
5895                        .as_deref()
5896                        .is_some_and(|sp| sp == project.project_id)
5897                })
5898                .map(|c| c.contract_id.clone());
5899
5900            // Link spend analysis for matching category (use category_id as the reference)
5901            project.spend_analysis_id = spend_analyses
5902                .iter()
5903                .find(|sa| sa.category_id == project.category_id)
5904                .map(|sa| sa.category_id.clone());
5905        }
5906
5907        info!(
5908            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5909            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5910            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5911        );
5912        self.check_resources_with_log("post-sourcing")?;
5913
5914        Ok(SourcingSnapshot {
5915            spend_analyses,
5916            sourcing_projects,
5917            qualifications,
5918            rfx_events,
5919            bids: all_bids,
5920            bid_evaluations,
5921            contracts,
5922            catalog_items,
5923            scorecards,
5924        })
5925    }
5926
5927    /// Build a [`GroupStructure`] from the current company configuration.
5928    ///
5929    /// The first company in the configuration is treated as the ultimate parent.
5930    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5931    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5932    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5933        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5934
5935        let parent_code = self
5936            .config
5937            .companies
5938            .first()
5939            .map(|c| c.code.clone())
5940            .unwrap_or_else(|| "PARENT".to_string());
5941
5942        let mut group = GroupStructure::new(parent_code);
5943
5944        for company in self.config.companies.iter().skip(1) {
5945            let sub =
5946                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5947            group.add_subsidiary(sub);
5948        }
5949
5950        group
5951    }
5952
5953    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5954    fn phase_intercompany(
5955        &mut self,
5956        journal_entries: &[JournalEntry],
5957        stats: &mut EnhancedGenerationStatistics,
5958    ) -> SynthResult<IntercompanySnapshot> {
5959        // Skip if intercompany is disabled in config
5960        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5961            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5962            return Ok(IntercompanySnapshot::default());
5963        }
5964
5965        // Intercompany requires at least 2 companies
5966        if self.config.companies.len() < 2 {
5967            debug!(
5968                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5969                self.config.companies.len()
5970            );
5971            return Ok(IntercompanySnapshot::default());
5972        }
5973
5974        info!("Phase 14b: Generating Intercompany Transactions");
5975
5976        // Build the group structure early — used by ISA 600 component auditor scope
5977        // and consolidated financial statement generators downstream.
5978        let group_structure = self.build_group_structure();
5979        debug!(
5980            "Group structure built: parent={}, subsidiaries={}",
5981            group_structure.parent_entity,
5982            group_structure.subsidiaries.len()
5983        );
5984
5985        let seed = self.seed;
5986        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5987            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5988        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5989
5990        // Build ownership structure from company configs
5991        // First company is treated as the parent, remaining are subsidiaries
5992        let parent_code = self.config.companies[0].code.clone();
5993        let mut ownership_structure =
5994            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5995
5996        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5997            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5998                format!("REL{:03}", i + 1),
5999                parent_code.clone(),
6000                company.code.clone(),
6001                rust_decimal::Decimal::from(100), // Default 100% ownership
6002                start_date,
6003            );
6004            ownership_structure.add_relationship(relationship);
6005        }
6006
6007        // Convert config transfer pricing method to core model enum
6008        let tp_method = match self.config.intercompany.transfer_pricing_method {
6009            datasynth_config::schema::TransferPricingMethod::CostPlus => {
6010                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6011            }
6012            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6013                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6014            }
6015            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6016                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6017            }
6018            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6019                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6020            }
6021            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6022                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6023            }
6024        };
6025
6026        // Build IC generator config from schema config
6027        let ic_currency = self
6028            .config
6029            .companies
6030            .first()
6031            .map(|c| c.currency.clone())
6032            .unwrap_or_else(|| "USD".to_string());
6033        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6034            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6035            transfer_pricing_method: tp_method,
6036            markup_percent: rust_decimal::Decimal::from_f64_retain(
6037                self.config.intercompany.markup_percent,
6038            )
6039            .unwrap_or(rust_decimal::Decimal::from(5)),
6040            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6041            default_currency: ic_currency,
6042            ..Default::default()
6043        };
6044
6045        // Create IC generator
6046        let mut ic_generator = datasynth_generators::ICGenerator::new(
6047            ic_gen_config,
6048            ownership_structure.clone(),
6049            seed + 50,
6050        );
6051
6052        // Generate IC transactions for the period
6053        // Use ~3 transactions per day as a reasonable default
6054        let transactions_per_day = 3;
6055        let matched_pairs = ic_generator.generate_transactions_for_period(
6056            start_date,
6057            end_date,
6058            transactions_per_day,
6059        );
6060
6061        // Generate IC source P2P/O2C documents
6062        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6063        debug!(
6064            "Generated {} IC seller invoices, {} IC buyer POs",
6065            ic_doc_chains.seller_invoices.len(),
6066            ic_doc_chains.buyer_orders.len()
6067        );
6068
6069        // Generate journal entries from matched pairs
6070        let mut seller_entries = Vec::new();
6071        let mut buyer_entries = Vec::new();
6072        let fiscal_year = start_date.year();
6073
6074        for pair in &matched_pairs {
6075            let fiscal_period = pair.posting_date.month();
6076            let (seller_je, buyer_je) =
6077                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6078            seller_entries.push(seller_je);
6079            buyer_entries.push(buyer_je);
6080        }
6081
6082        // Run matching engine
6083        let matching_config = datasynth_generators::ICMatchingConfig {
6084            base_currency: self
6085                .config
6086                .companies
6087                .first()
6088                .map(|c| c.currency.clone())
6089                .unwrap_or_else(|| "USD".to_string()),
6090            ..Default::default()
6091        };
6092        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6093        matching_engine.load_matched_pairs(&matched_pairs);
6094        let matching_result = matching_engine.run_matching(end_date);
6095
6096        // Generate elimination entries if configured
6097        let mut elimination_entries = Vec::new();
6098        if self.config.intercompany.generate_eliminations {
6099            let elim_config = datasynth_generators::EliminationConfig {
6100                consolidation_entity: "GROUP".to_string(),
6101                base_currency: self
6102                    .config
6103                    .companies
6104                    .first()
6105                    .map(|c| c.currency.clone())
6106                    .unwrap_or_else(|| "USD".to_string()),
6107                ..Default::default()
6108            };
6109
6110            let mut elim_generator =
6111                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6112
6113            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6114            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6115                matching_result
6116                    .matched_balances
6117                    .iter()
6118                    .chain(matching_result.unmatched_balances.iter())
6119                    .cloned()
6120                    .collect();
6121
6122            // Build investment and equity maps from the group structure so that the
6123            // elimination generator can produce equity-investment elimination entries
6124            // (parent's investment in subsidiary vs. subsidiary's equity capital).
6125            //
6126            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
6127            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
6128            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
6129            //
6130            // Net assets are derived from the journal entries using account-range heuristics:
6131            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
6132            // no JE data is available (IC phase runs early in the generation pipeline).
6133            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6134                std::collections::HashMap::new();
6135            let mut equity_amounts: std::collections::HashMap<
6136                String,
6137                std::collections::HashMap<String, rust_decimal::Decimal>,
6138            > = std::collections::HashMap::new();
6139            {
6140                use rust_decimal::Decimal;
6141                let hundred = Decimal::from(100u32);
6142                let ten_pct = Decimal::new(10, 2); // 0.10
6143                let thirty_pct = Decimal::new(30, 2); // 0.30
6144                let sixty_pct = Decimal::new(60, 2); // 0.60
6145                let parent_code = &group_structure.parent_entity;
6146                for sub in &group_structure.subsidiaries {
6147                    let net_assets = {
6148                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6149                        if na > Decimal::ZERO {
6150                            na
6151                        } else {
6152                            Decimal::from(1_000_000u64)
6153                        }
6154                    };
6155                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
6156                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6157                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6158
6159                    // Split subsidiary equity into conventional components:
6160                    // 10 % share capital / 30 % APIC / 60 % retained earnings
6161                    let mut eq_map = std::collections::HashMap::new();
6162                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6163                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6164                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6165                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
6166                }
6167            }
6168
6169            let journal = elim_generator.generate_eliminations(
6170                &fiscal_period,
6171                end_date,
6172                &all_balances,
6173                &matched_pairs,
6174                &investment_amounts,
6175                &equity_amounts,
6176            );
6177
6178            elimination_entries = journal.entries.clone();
6179        }
6180
6181        let matched_pair_count = matched_pairs.len();
6182        let elimination_entry_count = elimination_entries.len();
6183        let match_rate = matching_result.match_rate;
6184
6185        stats.ic_matched_pair_count = matched_pair_count;
6186        stats.ic_elimination_count = elimination_entry_count;
6187        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6188
6189        info!(
6190            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6191            matched_pair_count,
6192            stats.ic_transaction_count,
6193            seller_entries.len(),
6194            buyer_entries.len(),
6195            elimination_entry_count,
6196            match_rate * 100.0
6197        );
6198        self.check_resources_with_log("post-intercompany")?;
6199
6200        // ----------------------------------------------------------------
6201        // NCI measurements: derive from group structure ownership percentages
6202        // ----------------------------------------------------------------
6203        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6204            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6205            use rust_decimal::Decimal;
6206
6207            let eight_pct = Decimal::new(8, 2); // 0.08
6208
6209            group_structure
6210                .subsidiaries
6211                .iter()
6212                .filter(|sub| {
6213                    sub.nci_percentage > Decimal::ZERO
6214                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6215                })
6216                .map(|sub| {
6217                    // Compute net assets from actual journal entries for this subsidiary.
6218                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
6219                    // IC phase runs before the main JE batch has been populated).
6220                    let net_assets_from_jes =
6221                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6222
6223                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
6224                        net_assets_from_jes.round_dp(2)
6225                    } else {
6226                        // Fallback: use a plausible base amount
6227                        Decimal::from(1_000_000u64)
6228                    };
6229
6230                    // Net income approximated as 8% of net assets
6231                    let net_income = (net_assets * eight_pct).round_dp(2);
6232
6233                    NciMeasurement::compute(
6234                        sub.entity_code.clone(),
6235                        sub.nci_percentage,
6236                        net_assets,
6237                        net_income,
6238                    )
6239                })
6240                .collect()
6241        };
6242
6243        if !nci_measurements.is_empty() {
6244            info!(
6245                "NCI measurements: {} subsidiaries with non-controlling interests",
6246                nci_measurements.len()
6247            );
6248        }
6249
6250        Ok(IntercompanySnapshot {
6251            group_structure: Some(group_structure),
6252            matched_pairs,
6253            seller_journal_entries: seller_entries,
6254            buyer_journal_entries: buyer_entries,
6255            elimination_entries,
6256            nci_measurements,
6257            ic_document_chains: Some(ic_doc_chains),
6258            matched_pair_count,
6259            elimination_entry_count,
6260            match_rate,
6261        })
6262    }
6263
6264    /// Phase 15: Generate bank reconciliations and financial statements.
6265    fn phase_financial_reporting(
6266        &mut self,
6267        document_flows: &DocumentFlowSnapshot,
6268        journal_entries: &[JournalEntry],
6269        coa: &Arc<ChartOfAccounts>,
6270        _hr: &HrSnapshot,
6271        _audit: &AuditSnapshot,
6272        stats: &mut EnhancedGenerationStatistics,
6273    ) -> SynthResult<FinancialReportingSnapshot> {
6274        let fs_enabled = self.phase_config.generate_financial_statements
6275            || self.config.financial_reporting.enabled;
6276        let br_enabled = self.phase_config.generate_bank_reconciliation;
6277
6278        if !fs_enabled && !br_enabled {
6279            debug!("Phase 15: Skipped (financial reporting disabled)");
6280            return Ok(FinancialReportingSnapshot::default());
6281        }
6282
6283        info!("Phase 15: Generating Financial Reporting Data");
6284
6285        let seed = self.seed;
6286        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6287            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6288
6289        let mut financial_statements = Vec::new();
6290        let mut bank_reconciliations = Vec::new();
6291        let mut trial_balances = Vec::new();
6292        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6293        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6294            Vec::new();
6295        // Standalone statements keyed by entity code
6296        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6297            std::collections::HashMap::new();
6298        // Consolidated statements (one per period)
6299        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6300        // Consolidation schedules (one per period)
6301        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6302
6303        // Generate financial statements from JE-derived trial balances.
6304        //
6305        // When journal entries are available, we use cumulative trial balances for
6306        // balance sheet accounts and current-period trial balances for income
6307        // statement accounts. We also track prior-period trial balances so the
6308        // generator can produce comparative amounts, and we build a proper
6309        // cash flow statement from working capital changes rather than random data.
6310        if fs_enabled {
6311            let has_journal_entries = !journal_entries.is_empty();
6312
6313            // Use FinancialStatementGenerator for balance sheet and income statement,
6314            // but build cash flow ourselves from TB data when JEs are available.
6315            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6316            // Separate generator for consolidated statements (different seed offset)
6317            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6318
6319            // Collect elimination JEs once (reused across periods)
6320            let elimination_entries: Vec<&JournalEntry> = journal_entries
6321                .iter()
6322                .filter(|je| je.header.is_elimination)
6323                .collect();
6324
6325            // Generate one set of statements per period, per entity
6326            for period in 0..self.config.global.period_months {
6327                let period_start = start_date + chrono::Months::new(period);
6328                let period_end =
6329                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6330                let fiscal_year = period_end.year() as u16;
6331                let fiscal_period = period_end.month() as u8;
6332                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6333
6334                // Build per-entity trial balances for this period (non-elimination JEs)
6335                // We accumulate them for the consolidation step.
6336                let mut entity_tb_map: std::collections::HashMap<
6337                    String,
6338                    std::collections::HashMap<String, rust_decimal::Decimal>,
6339                > = std::collections::HashMap::new();
6340
6341                // --- Standalone: one set of statements per company ---
6342                // v5.33: resolve once per phase. In single-shard / standalone
6343                // mode this is the primary country's framework; in group
6344                // mode each shard runs against its own entity (one company)
6345                // so the primary-country lookup is the entity's. Either way
6346                // the string drives framework-aware TB classification (Defect
6347                // A fix — German SKR / French PCG accounts no longer routed
6348                // through a US-only prefix table).
6349                let framework_str = self.resolve_framework_str();
6350                for (company_idx, company) in self.config.companies.iter().enumerate() {
6351                    let company_code = company.code.as_str();
6352                    let currency = company.currency.as_str();
6353                    // Use a unique seed offset per company to keep statements deterministic
6354                    // and distinct across companies
6355                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6356                    let mut company_fs_gen =
6357                        FinancialStatementGenerator::new(seed + company_seed_offset);
6358
6359                    if has_journal_entries {
6360                        let tb_entries = Self::build_cumulative_trial_balance(
6361                            journal_entries,
6362                            coa,
6363                            company_code,
6364                            start_date,
6365                            period_end,
6366                            fiscal_year,
6367                            fiscal_period,
6368                            framework_str,
6369                        );
6370
6371                        // Accumulate per-entity category balances for consolidation
6372                        let entity_cat_map =
6373                            entity_tb_map.entry(company_code.to_string()).or_default();
6374                        for tb_entry in &tb_entries {
6375                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6376                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6377                        }
6378
6379                        let stmts = company_fs_gen.generate(
6380                            company_code,
6381                            currency,
6382                            &tb_entries,
6383                            period_start,
6384                            period_end,
6385                            fiscal_year,
6386                            fiscal_period,
6387                            None,
6388                            "SYS-AUTOCLOSE",
6389                        );
6390
6391                        let mut entity_stmts = Vec::new();
6392                        for stmt in stmts {
6393                            if stmt.statement_type == StatementType::CashFlowStatement {
6394                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6395                                let cf_items = Self::build_cash_flow_from_trial_balances(
6396                                    &tb_entries,
6397                                    None,
6398                                    net_income,
6399                                );
6400                                entity_stmts.push(FinancialStatement {
6401                                    cash_flow_items: cf_items,
6402                                    ..stmt
6403                                });
6404                            } else {
6405                                entity_stmts.push(stmt);
6406                            }
6407                        }
6408
6409                        // Add to the flat financial_statements list (used by KPI/budget)
6410                        financial_statements.extend(entity_stmts.clone());
6411
6412                        // Store standalone per-entity
6413                        standalone_statements
6414                            .entry(company_code.to_string())
6415                            .or_default()
6416                            .extend(entity_stmts);
6417
6418                        // Only store trial balance for the first company in the period
6419                        // to avoid duplicates in the trial_balances list
6420                        if company_idx == 0 {
6421                            trial_balances.push(PeriodTrialBalance {
6422                                fiscal_year,
6423                                fiscal_period,
6424                                period_start,
6425                                period_end,
6426                                entries: tb_entries,
6427                                framework: framework_str.to_string(),
6428                            });
6429                        }
6430                    } else {
6431                        // Fallback: no JEs available
6432                        let tb_entries = Self::build_trial_balance_from_entries(
6433                            journal_entries,
6434                            coa,
6435                            company_code,
6436                            fiscal_year,
6437                            fiscal_period,
6438                            framework_str,
6439                        );
6440
6441                        let stmts = company_fs_gen.generate(
6442                            company_code,
6443                            currency,
6444                            &tb_entries,
6445                            period_start,
6446                            period_end,
6447                            fiscal_year,
6448                            fiscal_period,
6449                            None,
6450                            "SYS-AUTOCLOSE",
6451                        );
6452                        financial_statements.extend(stmts.clone());
6453                        standalone_statements
6454                            .entry(company_code.to_string())
6455                            .or_default()
6456                            .extend(stmts);
6457
6458                        if company_idx == 0 && !tb_entries.is_empty() {
6459                            trial_balances.push(PeriodTrialBalance {
6460                                fiscal_year,
6461                                fiscal_period,
6462                                period_start,
6463                                period_end,
6464                                entries: tb_entries,
6465                                framework: framework_str.to_string(),
6466                            });
6467                        }
6468                    }
6469                }
6470
6471                // --- Consolidated: aggregate all entities + apply eliminations ---
6472                // Use the primary (first) company's currency for the consolidated statement
6473                let group_currency = self
6474                    .config
6475                    .companies
6476                    .first()
6477                    .map(|c| c.currency.as_str())
6478                    .unwrap_or("USD");
6479
6480                // Build owned elimination entries for this period
6481                let period_eliminations: Vec<JournalEntry> = elimination_entries
6482                    .iter()
6483                    .filter(|je| {
6484                        je.header.fiscal_year == fiscal_year
6485                            && je.header.fiscal_period == fiscal_period
6486                    })
6487                    .map(|je| (*je).clone())
6488                    .collect();
6489
6490                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6491                    &entity_tb_map,
6492                    &period_eliminations,
6493                    &period_label,
6494                );
6495
6496                // Build a pseudo trial balance from consolidated line items for the
6497                // FinancialStatementGenerator to use (only for cash flow direction).
6498                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6499                    .line_items
6500                    .iter()
6501                    .map(|li| {
6502                        let net = li.post_elimination_total;
6503                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6504                            (net, rust_decimal::Decimal::ZERO)
6505                        } else {
6506                            (rust_decimal::Decimal::ZERO, -net)
6507                        };
6508                        datasynth_generators::TrialBalanceEntry {
6509                            account_code: li.account_category.clone(),
6510                            account_name: li.account_category.clone(),
6511                            category: li.account_category.clone(),
6512                            debit_balance: debit,
6513                            credit_balance: credit,
6514                        }
6515                    })
6516                    .collect();
6517
6518                let mut cons_stmts = cons_gen.generate(
6519                    "GROUP",
6520                    group_currency,
6521                    &cons_tb,
6522                    period_start,
6523                    period_end,
6524                    fiscal_year,
6525                    fiscal_period,
6526                    None,
6527                    "SYS-AUTOCLOSE",
6528                );
6529
6530                // Split consolidated line items by statement type.
6531                // The consolidation generator returns BS items first, then IS items,
6532                // identified by their CONS- prefix and category.
6533                let bs_categories: &[&str] = &[
6534                    "CASH",
6535                    "RECEIVABLES",
6536                    "INVENTORY",
6537                    "FIXEDASSETS",
6538                    "PAYABLES",
6539                    "ACCRUEDLIABILITIES",
6540                    "LONGTERMDEBT",
6541                    "EQUITY",
6542                ];
6543                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6544                    cons_line_items.into_iter().partition(|li| {
6545                        let upper = li.label.to_uppercase();
6546                        bs_categories.iter().any(|c| upper == *c)
6547                    });
6548
6549                for stmt in &mut cons_stmts {
6550                    stmt.is_consolidated = true;
6551                    match stmt.statement_type {
6552                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6553                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6554                        _ => {} // CF and equity change statements keep generator output
6555                    }
6556                }
6557
6558                consolidated_statements.extend(cons_stmts);
6559                consolidation_schedules.push(schedule);
6560            }
6561
6562            // Backward compat: if only 1 company, use existing code path logic
6563            // (prior_cumulative_tb for comparative amounts). Already handled above;
6564            // the prior_ref is omitted to keep this change minimal.
6565            let _ = &mut fs_gen; // suppress unused warning
6566
6567            stats.financial_statement_count = financial_statements.len();
6568            info!(
6569                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6570                stats.financial_statement_count,
6571                consolidated_statements.len(),
6572                has_journal_entries
6573            );
6574
6575            // ----------------------------------------------------------------
6576            // IFRS 8 / ASC 280: Operating Segment Reporting
6577            // ----------------------------------------------------------------
6578            // Build entity seeds from the company configuration.
6579            let entity_seeds: Vec<SegmentSeed> = self
6580                .config
6581                .companies
6582                .iter()
6583                .map(|c| SegmentSeed {
6584                    code: c.code.clone(),
6585                    name: c.name.clone(),
6586                    currency: c.currency.clone(),
6587                })
6588                .collect();
6589
6590            let mut seg_gen = SegmentGenerator::new(seed + 30);
6591
6592            // Generate one set of segment reports per period.
6593            // We extract consolidated revenue / profit / assets from the consolidated
6594            // financial statements produced above, falling back to simple sums when
6595            // no consolidated statements were generated (single-entity path).
6596            for period in 0..self.config.global.period_months {
6597                let period_end =
6598                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6599                let fiscal_year = period_end.year() as u16;
6600                let fiscal_period = period_end.month() as u8;
6601                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6602
6603                use datasynth_core::models::StatementType;
6604
6605                // Try to find consolidated income statement for this period
6606                let cons_is = consolidated_statements.iter().find(|s| {
6607                    s.fiscal_year == fiscal_year
6608                        && s.fiscal_period == fiscal_period
6609                        && s.statement_type == StatementType::IncomeStatement
6610                });
6611                let cons_bs = consolidated_statements.iter().find(|s| {
6612                    s.fiscal_year == fiscal_year
6613                        && s.fiscal_period == fiscal_period
6614                        && s.statement_type == StatementType::BalanceSheet
6615                });
6616
6617                // If consolidated statements not available fall back to the flat list
6618                let is_stmt = cons_is.or_else(|| {
6619                    financial_statements.iter().find(|s| {
6620                        s.fiscal_year == fiscal_year
6621                            && s.fiscal_period == fiscal_period
6622                            && s.statement_type == StatementType::IncomeStatement
6623                    })
6624                });
6625                let bs_stmt = cons_bs.or_else(|| {
6626                    financial_statements.iter().find(|s| {
6627                        s.fiscal_year == fiscal_year
6628                            && s.fiscal_period == fiscal_period
6629                            && s.statement_type == StatementType::BalanceSheet
6630                    })
6631                });
6632
6633                let consolidated_revenue = is_stmt
6634                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6635                    .map(|li| -li.amount) // revenue is stored as negative in IS
6636                    .unwrap_or(rust_decimal::Decimal::ZERO);
6637
6638                let consolidated_profit = is_stmt
6639                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6640                    .map(|li| li.amount)
6641                    .unwrap_or(rust_decimal::Decimal::ZERO);
6642
6643                let consolidated_assets = bs_stmt
6644                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6645                    .map(|li| li.amount)
6646                    .unwrap_or(rust_decimal::Decimal::ZERO);
6647
6648                // Skip periods where we have no financial data
6649                if consolidated_revenue == rust_decimal::Decimal::ZERO
6650                    && consolidated_assets == rust_decimal::Decimal::ZERO
6651                {
6652                    continue;
6653                }
6654
6655                let group_code = self
6656                    .config
6657                    .companies
6658                    .first()
6659                    .map(|c| c.code.as_str())
6660                    .unwrap_or("GROUP");
6661
6662                // Compute period depreciation from JEs with document type "CL" hitting account
6663                // 6000 (depreciation expense).  These are generated by phase_period_close.
6664                let total_depr: rust_decimal::Decimal = journal_entries
6665                    .iter()
6666                    .filter(|je| je.header.document_type == "CL")
6667                    .flat_map(|je| je.lines.iter())
6668                    .filter(|l| l.gl_account.starts_with("6000"))
6669                    .map(|l| l.debit_amount)
6670                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6671                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6672                    Some(total_depr)
6673                } else {
6674                    None
6675                };
6676
6677                let (segs, recon) = seg_gen.generate(
6678                    group_code,
6679                    &period_label,
6680                    consolidated_revenue,
6681                    consolidated_profit,
6682                    consolidated_assets,
6683                    &entity_seeds,
6684                    depr_param,
6685                );
6686                segment_reports.extend(segs);
6687                segment_reconciliations.push(recon);
6688            }
6689
6690            info!(
6691                "Segment reports generated: {} segments, {} reconciliations",
6692                segment_reports.len(),
6693                segment_reconciliations.len()
6694            );
6695        }
6696
6697        // Generate bank reconciliations from payment data
6698        if br_enabled && !document_flows.payments.is_empty() {
6699            let employee_ids: Vec<String> = self
6700                .master_data
6701                .employees
6702                .iter()
6703                .map(|e| e.employee_id.clone())
6704                .collect();
6705            let mut br_gen =
6706                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6707
6708            // Group payments by company code and period
6709            for company in &self.config.companies {
6710                let company_payments: Vec<PaymentReference> = document_flows
6711                    .payments
6712                    .iter()
6713                    .filter(|p| p.header.company_code == company.code)
6714                    .map(|p| PaymentReference {
6715                        id: p.header.document_id.clone(),
6716                        amount: if p.is_vendor { p.amount } else { -p.amount },
6717                        date: p.header.document_date,
6718                        reference: p
6719                            .check_number
6720                            .clone()
6721                            .or_else(|| p.wire_reference.clone())
6722                            .unwrap_or_else(|| p.header.document_id.clone()),
6723                    })
6724                    .collect();
6725
6726                if company_payments.is_empty() {
6727                    continue;
6728                }
6729
6730                let bank_account_id = format!("{}-MAIN", company.code);
6731
6732                // Generate one reconciliation per period
6733                for period in 0..self.config.global.period_months {
6734                    let period_start = start_date + chrono::Months::new(period);
6735                    let period_end =
6736                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6737
6738                    let period_payments: Vec<PaymentReference> = company_payments
6739                        .iter()
6740                        .filter(|p| p.date >= period_start && p.date <= period_end)
6741                        .cloned()
6742                        .collect();
6743
6744                    let recon = br_gen.generate(
6745                        &company.code,
6746                        &bank_account_id,
6747                        period_start,
6748                        period_end,
6749                        &company.currency,
6750                        &period_payments,
6751                    );
6752                    bank_reconciliations.push(recon);
6753                }
6754            }
6755            info!(
6756                "Bank reconciliations generated: {} reconciliations",
6757                bank_reconciliations.len()
6758            );
6759        }
6760
6761        stats.bank_reconciliation_count = bank_reconciliations.len();
6762        self.check_resources_with_log("post-financial-reporting")?;
6763
6764        if !trial_balances.is_empty() {
6765            info!(
6766                "Period-close trial balances captured: {} periods",
6767                trial_balances.len()
6768            );
6769        }
6770
6771        // Notes to financial statements are generated in a separate post-processing step
6772        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6773        // phases have completed, so that deferred tax and provision data can be wired in.
6774        let notes_to_financial_statements = Vec::new();
6775
6776        Ok(FinancialReportingSnapshot {
6777            financial_statements,
6778            standalone_statements,
6779            consolidated_statements,
6780            consolidation_schedules,
6781            bank_reconciliations,
6782            trial_balances,
6783            segment_reports,
6784            segment_reconciliations,
6785            notes_to_financial_statements,
6786        })
6787    }
6788
6789    /// Populate notes to financial statements using fully-resolved snapshots.
6790    ///
6791    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6792    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6793    /// can be wired into the notes context.  The method mutates
6794    /// `financial_reporting.notes_to_financial_statements` in-place.
6795    fn generate_notes_to_financial_statements(
6796        &self,
6797        financial_reporting: &mut FinancialReportingSnapshot,
6798        accounting_standards: &AccountingStandardsSnapshot,
6799        tax: &TaxSnapshot,
6800        hr: &HrSnapshot,
6801        audit: &AuditSnapshot,
6802        treasury: &TreasurySnapshot,
6803    ) {
6804        use datasynth_config::schema::AccountingFrameworkConfig;
6805        use datasynth_core::models::StatementType;
6806        use datasynth_generators::period_close::notes_generator::{
6807            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6808        };
6809
6810        let seed = self.seed;
6811        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6812        {
6813            Ok(d) => d,
6814            Err(_) => return,
6815        };
6816
6817        let mut notes_gen = NotesGenerator::new(seed + 4235);
6818
6819        for company in &self.config.companies {
6820            let last_period_end = start_date
6821                + chrono::Months::new(self.config.global.period_months)
6822                - chrono::Days::new(1);
6823            let fiscal_year = last_period_end.year() as u16;
6824
6825            // Extract relevant amounts from the already-generated financial statements
6826            let entity_is = financial_reporting
6827                .standalone_statements
6828                .get(&company.code)
6829                .and_then(|stmts| {
6830                    stmts.iter().find(|s| {
6831                        s.fiscal_year == fiscal_year
6832                            && s.statement_type == StatementType::IncomeStatement
6833                    })
6834                });
6835            let entity_bs = financial_reporting
6836                .standalone_statements
6837                .get(&company.code)
6838                .and_then(|stmts| {
6839                    stmts.iter().find(|s| {
6840                        s.fiscal_year == fiscal_year
6841                            && s.statement_type == StatementType::BalanceSheet
6842                    })
6843                });
6844
6845            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6846            let revenue_amount = entity_is
6847                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6848                .map(|li| li.amount);
6849            let ppe_gross = entity_bs
6850                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6851                .map(|li| li.amount);
6852
6853            let framework = match self
6854                .config
6855                .accounting_standards
6856                .framework
6857                .unwrap_or_default()
6858            {
6859                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6860                    "IFRS".to_string()
6861                }
6862                _ => "US GAAP".to_string(),
6863            };
6864
6865            // ---- Deferred tax (IAS 12 / ASC 740) ----
6866            // Sum closing DTA and DTL from rollforward entries for this entity.
6867            let (entity_dta, entity_dtl) = {
6868                let mut dta = rust_decimal::Decimal::ZERO;
6869                let mut dtl = rust_decimal::Decimal::ZERO;
6870                for rf in &tax.deferred_tax.rollforwards {
6871                    if rf.entity_code == company.code {
6872                        dta += rf.closing_dta;
6873                        dtl += rf.closing_dtl;
6874                    }
6875                }
6876                (
6877                    if dta > rust_decimal::Decimal::ZERO {
6878                        Some(dta)
6879                    } else {
6880                        None
6881                    },
6882                    if dtl > rust_decimal::Decimal::ZERO {
6883                        Some(dtl)
6884                    } else {
6885                        None
6886                    },
6887                )
6888            };
6889
6890            // ---- Provisions (IAS 37 / ASC 450) ----
6891            // Filter provisions to this entity; sum best_estimate amounts.
6892            let entity_provisions: Vec<_> = accounting_standards
6893                .provisions
6894                .iter()
6895                .filter(|p| p.entity_code == company.code)
6896                .collect();
6897            let provision_count = entity_provisions.len();
6898            let total_provisions = if provision_count > 0 {
6899                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6900            } else {
6901                None
6902            };
6903
6904            // ---- Pension data from HR snapshot ----
6905            let entity_pension_plan_count = hr
6906                .pension_plans
6907                .iter()
6908                .filter(|p| p.entity_code == company.code)
6909                .count();
6910            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6911                let sum: rust_decimal::Decimal = hr
6912                    .pension_disclosures
6913                    .iter()
6914                    .filter(|d| {
6915                        hr.pension_plans
6916                            .iter()
6917                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6918                    })
6919                    .map(|d| d.net_pension_liability)
6920                    .sum();
6921                let plan_assets_sum: rust_decimal::Decimal = hr
6922                    .pension_plan_assets
6923                    .iter()
6924                    .filter(|a| {
6925                        hr.pension_plans
6926                            .iter()
6927                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6928                    })
6929                    .map(|a| a.fair_value_closing)
6930                    .sum();
6931                if entity_pension_plan_count > 0 {
6932                    Some(sum + plan_assets_sum)
6933                } else {
6934                    None
6935                }
6936            };
6937            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6938                let sum: rust_decimal::Decimal = hr
6939                    .pension_plan_assets
6940                    .iter()
6941                    .filter(|a| {
6942                        hr.pension_plans
6943                            .iter()
6944                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6945                    })
6946                    .map(|a| a.fair_value_closing)
6947                    .sum();
6948                if entity_pension_plan_count > 0 {
6949                    Some(sum)
6950                } else {
6951                    None
6952                }
6953            };
6954
6955            // ---- Audit data: related parties + subsequent events ----
6956            // Audit snapshot covers all entities; use total counts (common case = single entity).
6957            let rp_count = audit.related_party_transactions.len();
6958            let se_count = audit.subsequent_events.len();
6959            let adjusting_count = audit
6960                .subsequent_events
6961                .iter()
6962                .filter(|e| {
6963                    matches!(
6964                        e.classification,
6965                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6966                    )
6967                })
6968                .count();
6969
6970            let ctx = NotesGeneratorContext {
6971                entity_code: company.code.clone(),
6972                framework,
6973                period: format!("FY{}", fiscal_year),
6974                period_end: last_period_end,
6975                currency: company.currency.clone(),
6976                revenue_amount,
6977                total_ppe_gross: ppe_gross,
6978                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6979                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6980                deferred_tax_asset: entity_dta,
6981                deferred_tax_liability: entity_dtl,
6982                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6983                provision_count,
6984                total_provisions,
6985                // Pension data from HR snapshot
6986                pension_plan_count: entity_pension_plan_count,
6987                total_dbo: entity_total_dbo,
6988                total_plan_assets: entity_total_plan_assets,
6989                // Audit data
6990                related_party_transaction_count: rp_count,
6991                subsequent_event_count: se_count,
6992                adjusting_event_count: adjusting_count,
6993                ..NotesGeneratorContext::default()
6994            };
6995
6996            let entity_notes = notes_gen.generate(&ctx);
6997            let standard_note_count = entity_notes.len() as u32;
6998            info!(
6999                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
7000                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
7001            );
7002            financial_reporting
7003                .notes_to_financial_statements
7004                .extend(entity_notes);
7005
7006            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
7007            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
7008                .debt_instruments
7009                .iter()
7010                .filter(|d| d.entity_id == company.code)
7011                .map(|d| {
7012                    (
7013                        format!("{:?}", d.instrument_type),
7014                        d.principal,
7015                        d.maturity_date.to_string(),
7016                    )
7017                })
7018                .collect();
7019
7020            let hedge_count = treasury.hedge_relationships.len();
7021            let effective_hedges = treasury
7022                .hedge_relationships
7023                .iter()
7024                .filter(|h| h.is_effective)
7025                .count();
7026            let total_notional: rust_decimal::Decimal = treasury
7027                .hedging_instruments
7028                .iter()
7029                .map(|h| h.notional_amount)
7030                .sum();
7031            let total_fair_value: rust_decimal::Decimal = treasury
7032                .hedging_instruments
7033                .iter()
7034                .map(|h| h.fair_value)
7035                .sum();
7036
7037            // Join provision_movements with provisions to get entity/type info
7038            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7039                .provisions
7040                .iter()
7041                .filter(|p| p.entity_code == company.code)
7042                .map(|p| p.id.as_str())
7043                .collect();
7044            let provision_movements: Vec<(
7045                String,
7046                rust_decimal::Decimal,
7047                rust_decimal::Decimal,
7048                rust_decimal::Decimal,
7049            )> = accounting_standards
7050                .provision_movements
7051                .iter()
7052                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7053                .map(|m| {
7054                    let prov_type = accounting_standards
7055                        .provisions
7056                        .iter()
7057                        .find(|p| p.id == m.provision_id)
7058                        .map(|p| format!("{:?}", p.provision_type))
7059                        .unwrap_or_else(|| "Unknown".to_string());
7060                    (prov_type, m.opening, m.additions, m.closing)
7061                })
7062                .collect();
7063
7064            let enhanced_ctx = EnhancedNotesContext {
7065                entity_code: company.code.clone(),
7066                period: format!("FY{}", fiscal_year),
7067                currency: company.currency.clone(),
7068                // Inventory breakdown: best-effort using zero (would need balance tracker)
7069                finished_goods_value: rust_decimal::Decimal::ZERO,
7070                wip_value: rust_decimal::Decimal::ZERO,
7071                raw_materials_value: rust_decimal::Decimal::ZERO,
7072                debt_instruments,
7073                hedge_count,
7074                effective_hedges,
7075                total_notional,
7076                total_fair_value,
7077                provision_movements,
7078            };
7079
7080            let enhanced_notes =
7081                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7082            if !enhanced_notes.is_empty() {
7083                info!(
7084                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7085                    company.code,
7086                    enhanced_notes.len(),
7087                    enhanced_ctx.debt_instruments.len(),
7088                    hedge_count,
7089                    enhanced_ctx.provision_movements.len(),
7090                );
7091                financial_reporting
7092                    .notes_to_financial_statements
7093                    .extend(enhanced_notes);
7094            }
7095        }
7096    }
7097
7098    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
7099    ///
7100    /// This ensures the trial balance is coherent with the JEs: every debit and credit
7101    /// posted in the journal entries flows through to the trial balance, using the real
7102    /// GL account numbers from the CoA.
7103    fn build_trial_balance_from_entries(
7104        journal_entries: &[JournalEntry],
7105        coa: &ChartOfAccounts,
7106        company_code: &str,
7107        fiscal_year: u16,
7108        fiscal_period: u8,
7109        framework: &str,
7110    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7111        use rust_decimal::Decimal;
7112
7113        // Accumulate total debits and credits per GL account
7114        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7115        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7116
7117        for je in journal_entries {
7118            // Filter to matching company, fiscal year, and period
7119            if je.header.company_code != company_code
7120                || je.header.fiscal_year != fiscal_year
7121                || je.header.fiscal_period != fiscal_period
7122            {
7123                continue;
7124            }
7125
7126            for line in &je.lines {
7127                let acct = &line.gl_account;
7128                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7129                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7130            }
7131        }
7132
7133        // Build a TrialBalanceEntry for each account that had activity
7134        let mut all_accounts: Vec<&String> = account_debits
7135            .keys()
7136            .chain(account_credits.keys())
7137            .collect::<std::collections::HashSet<_>>()
7138            .into_iter()
7139            .collect();
7140        all_accounts.sort();
7141
7142        let mut entries = Vec::new();
7143
7144        for acct_number in all_accounts {
7145            let debit = account_debits
7146                .get(acct_number)
7147                .copied()
7148                .unwrap_or(Decimal::ZERO);
7149            let credit = account_credits
7150                .get(acct_number)
7151                .copied()
7152                .unwrap_or(Decimal::ZERO);
7153
7154            if debit.is_zero() && credit.is_zero() {
7155                continue;
7156            }
7157
7158            // Look up account name from CoA, fall back to "Account {code}"
7159            let account_name = coa
7160                .get_account(acct_number)
7161                .map(|gl| gl.short_description.clone())
7162                .unwrap_or_else(|| format!("Account {acct_number}"));
7163
7164            // Map account code prefix to the category strings expected by
7165            // FinancialStatementGenerator (Cash, Receivables, Inventory,
7166            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
7167            // OperatingExpenses).
7168            let category = Self::category_from_account_code(acct_number, framework);
7169
7170            entries.push(datasynth_generators::TrialBalanceEntry {
7171                account_code: acct_number.clone(),
7172                account_name,
7173                category,
7174                debit_balance: debit,
7175                credit_balance: credit,
7176            });
7177        }
7178
7179        entries
7180    }
7181
7182    /// Build a cumulative trial balance by aggregating all JEs from the start up to
7183    /// (and including) the given period end date.
7184    ///
7185    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
7186    /// while income statement accounts (revenue, expenses) show only the current period.
7187    /// The two are merged into a single Vec for the FinancialStatementGenerator.
7188    #[allow(clippy::too_many_arguments)]
7189    fn build_cumulative_trial_balance(
7190        journal_entries: &[JournalEntry],
7191        coa: &ChartOfAccounts,
7192        company_code: &str,
7193        start_date: NaiveDate,
7194        period_end: NaiveDate,
7195        fiscal_year: u16,
7196        fiscal_period: u8,
7197        framework: &str,
7198    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7199        use rust_decimal::Decimal;
7200
7201        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
7202        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7203        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7204
7205        // Accumulate debits/credits for income statement accounts (current period only)
7206        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7207        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7208
7209        for je in journal_entries {
7210            if je.header.company_code != company_code {
7211                continue;
7212            }
7213
7214            for line in &je.lines {
7215                let acct = &line.gl_account;
7216                // Framework-aware BS bucketing — fixes the Defect A
7217                // mis-classification where US-style prefix tables routed
7218                // SKR/PCG balance-sheet accounts through the P&L bucket
7219                // (or vice versa), giving the resulting TB an asymmetric
7220                // time window with no integrity invariant left to test.
7221                let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7222
7223                if is_bs_account {
7224                    // Balance sheet: accumulate from start through period_end
7225                    if je.header.document_date <= period_end
7226                        && je.header.document_date >= start_date
7227                    {
7228                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7229                            line.debit_amount;
7230                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7231                            line.credit_amount;
7232                    }
7233                } else {
7234                    // Income statement: current period only
7235                    if je.header.fiscal_year == fiscal_year
7236                        && je.header.fiscal_period == fiscal_period
7237                    {
7238                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7239                            line.debit_amount;
7240                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7241                            line.credit_amount;
7242                    }
7243                }
7244            }
7245        }
7246
7247        // Merge all accounts
7248        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7249        all_accounts.extend(bs_debits.keys().cloned());
7250        all_accounts.extend(bs_credits.keys().cloned());
7251        all_accounts.extend(is_debits.keys().cloned());
7252        all_accounts.extend(is_credits.keys().cloned());
7253
7254        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7255        sorted_accounts.sort();
7256
7257        let mut entries = Vec::new();
7258
7259        for acct_number in &sorted_accounts {
7260            let category = Self::category_from_account_code(acct_number, framework);
7261            let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7262
7263            let (debit, credit) = if is_bs_account {
7264                (
7265                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7266                    bs_credits
7267                        .get(acct_number)
7268                        .copied()
7269                        .unwrap_or(Decimal::ZERO),
7270                )
7271            } else {
7272                (
7273                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7274                    is_credits
7275                        .get(acct_number)
7276                        .copied()
7277                        .unwrap_or(Decimal::ZERO),
7278                )
7279            };
7280
7281            if debit.is_zero() && credit.is_zero() {
7282                continue;
7283            }
7284
7285            let account_name = coa
7286                .get_account(acct_number)
7287                .map(|gl| gl.short_description.clone())
7288                .unwrap_or_else(|| format!("Account {acct_number}"));
7289
7290            entries.push(datasynth_generators::TrialBalanceEntry {
7291                account_code: acct_number.clone(),
7292                account_name,
7293                category,
7294                debit_balance: debit,
7295                credit_balance: credit,
7296            });
7297        }
7298
7299        entries
7300    }
7301
7302    /// Build a JE-derived cash flow statement using the indirect method.
7303    ///
7304    /// Compares current and prior cumulative trial balances to derive working capital
7305    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7306    fn build_cash_flow_from_trial_balances(
7307        current_tb: &[datasynth_generators::TrialBalanceEntry],
7308        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7309        net_income: rust_decimal::Decimal,
7310    ) -> Vec<CashFlowItem> {
7311        use rust_decimal::Decimal;
7312
7313        // Helper: aggregate a TB by category and return net (debit - credit)
7314        let aggregate =
7315            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7316                let mut map: HashMap<String, Decimal> = HashMap::new();
7317                for entry in tb {
7318                    let net = entry.debit_balance - entry.credit_balance;
7319                    *map.entry(entry.category.clone()).or_default() += net;
7320                }
7321                map
7322            };
7323
7324        let current = aggregate(current_tb);
7325        let prior = prior_tb.map(aggregate);
7326
7327        // Get balance for a category, defaulting to zero
7328        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7329            *map.get(key).unwrap_or(&Decimal::ZERO)
7330        };
7331
7332        // Compute change: current - prior (or current if no prior)
7333        let change = |key: &str| -> Decimal {
7334            let curr = get(&current, key);
7335            match &prior {
7336                Some(p) => curr - get(p, key),
7337                None => curr,
7338            }
7339        };
7340
7341        // Operating activities (indirect method)
7342        // Depreciation add-back: approximate from FixedAssets decrease
7343        let fixed_asset_change = change("FixedAssets");
7344        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7345            -fixed_asset_change
7346        } else {
7347            Decimal::ZERO
7348        };
7349
7350        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7351        let ar_change = change("Receivables");
7352        let inventory_change = change("Inventory");
7353        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7354        let ap_change = change("Payables");
7355        let accrued_change = change("AccruedLiabilities");
7356
7357        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7358            + (-ap_change)
7359            + (-accrued_change);
7360
7361        // Investing activities
7362        let capex = if fixed_asset_change > Decimal::ZERO {
7363            -fixed_asset_change
7364        } else {
7365            Decimal::ZERO
7366        };
7367        let investing_cf = capex;
7368
7369        // Financing activities
7370        let debt_change = -change("LongTermDebt");
7371        let equity_change = -change("Equity");
7372        let financing_cf = debt_change + equity_change;
7373
7374        let net_change = operating_cf + investing_cf + financing_cf;
7375
7376        vec![
7377            CashFlowItem {
7378                item_code: "CF-NI".to_string(),
7379                label: "Net Income".to_string(),
7380                category: CashFlowCategory::Operating,
7381                amount: net_income,
7382                amount_prior: None,
7383                sort_order: 1,
7384                is_total: false,
7385            },
7386            CashFlowItem {
7387                item_code: "CF-DEP".to_string(),
7388                label: "Depreciation & Amortization".to_string(),
7389                category: CashFlowCategory::Operating,
7390                amount: depreciation_addback,
7391                amount_prior: None,
7392                sort_order: 2,
7393                is_total: false,
7394            },
7395            CashFlowItem {
7396                item_code: "CF-AR".to_string(),
7397                label: "Change in Accounts Receivable".to_string(),
7398                category: CashFlowCategory::Operating,
7399                amount: -ar_change,
7400                amount_prior: None,
7401                sort_order: 3,
7402                is_total: false,
7403            },
7404            CashFlowItem {
7405                item_code: "CF-AP".to_string(),
7406                label: "Change in Accounts Payable".to_string(),
7407                category: CashFlowCategory::Operating,
7408                amount: -ap_change,
7409                amount_prior: None,
7410                sort_order: 4,
7411                is_total: false,
7412            },
7413            CashFlowItem {
7414                item_code: "CF-INV".to_string(),
7415                label: "Change in Inventory".to_string(),
7416                category: CashFlowCategory::Operating,
7417                amount: -inventory_change,
7418                amount_prior: None,
7419                sort_order: 5,
7420                is_total: false,
7421            },
7422            CashFlowItem {
7423                item_code: "CF-OP".to_string(),
7424                label: "Net Cash from Operating Activities".to_string(),
7425                category: CashFlowCategory::Operating,
7426                amount: operating_cf,
7427                amount_prior: None,
7428                sort_order: 6,
7429                is_total: true,
7430            },
7431            CashFlowItem {
7432                item_code: "CF-CAPEX".to_string(),
7433                label: "Capital Expenditures".to_string(),
7434                category: CashFlowCategory::Investing,
7435                amount: capex,
7436                amount_prior: None,
7437                sort_order: 7,
7438                is_total: false,
7439            },
7440            CashFlowItem {
7441                item_code: "CF-INV-T".to_string(),
7442                label: "Net Cash from Investing Activities".to_string(),
7443                category: CashFlowCategory::Investing,
7444                amount: investing_cf,
7445                amount_prior: None,
7446                sort_order: 8,
7447                is_total: true,
7448            },
7449            CashFlowItem {
7450                item_code: "CF-DEBT".to_string(),
7451                label: "Net Borrowings / (Repayments)".to_string(),
7452                category: CashFlowCategory::Financing,
7453                amount: debt_change,
7454                amount_prior: None,
7455                sort_order: 9,
7456                is_total: false,
7457            },
7458            CashFlowItem {
7459                item_code: "CF-EQ".to_string(),
7460                label: "Equity Changes".to_string(),
7461                category: CashFlowCategory::Financing,
7462                amount: equity_change,
7463                amount_prior: None,
7464                sort_order: 10,
7465                is_total: false,
7466            },
7467            CashFlowItem {
7468                item_code: "CF-FIN-T".to_string(),
7469                label: "Net Cash from Financing Activities".to_string(),
7470                category: CashFlowCategory::Financing,
7471                amount: financing_cf,
7472                amount_prior: None,
7473                sort_order: 11,
7474                is_total: true,
7475            },
7476            CashFlowItem {
7477                item_code: "CF-NET".to_string(),
7478                label: "Net Change in Cash".to_string(),
7479                category: CashFlowCategory::Operating,
7480                amount: net_change,
7481                amount_prior: None,
7482                sort_order: 12,
7483                is_total: true,
7484            },
7485        ]
7486    }
7487
7488    /// Calculate net income from a set of trial balance entries.
7489    ///
7490    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7491    fn calculate_net_income_from_tb(
7492        tb: &[datasynth_generators::TrialBalanceEntry],
7493    ) -> rust_decimal::Decimal {
7494        use rust_decimal::Decimal;
7495
7496        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7497        for entry in tb {
7498            let net = entry.debit_balance - entry.credit_balance;
7499            *aggregated.entry(entry.category.clone()).or_default() += net;
7500        }
7501
7502        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7503        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7504        let opex = *aggregated
7505            .get("OperatingExpenses")
7506            .unwrap_or(&Decimal::ZERO);
7507        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7508        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7509
7510        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7511        // other_income is typically negative (credit), other_expenses is typically positive
7512        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7513        let tax_rate = Decimal::new(25, 2); // 0.25
7514        let tax = operating_income * tax_rate;
7515        operating_income - tax
7516    }
7517
7518    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7519    ///
7520    /// Uses the first two digits of the account code to classify into the categories
7521    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7522    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7523    /// OperatingExpenses, OtherIncome, OtherExpenses.
7524    /// Map an account code to the orchestrator's 13-bucket category string
7525    /// (`"Cash"` / `"Receivables"` / `"Inventory"` / `"FixedAssets"` /
7526    /// `"Payables"` / `"AccruedLiabilities"` / `"LongTermDebt"` /
7527    /// `"Equity"` / `"Revenue"` / `"CostOfSales"` / `"OperatingExpenses"`
7528    /// / `"OtherIncome"` / `"OtherExpenses"`).
7529    ///
7530    /// `framework` controls which numbering convention is applied:
7531    ///
7532    /// - `"us_gaap"` / `"ifrs"` / `"dual_reporting"` — US-style 4-digit
7533    ///   chart (1xxx assets, 2xxx liabilities, 3xxx equity, 4xxx revenue,
7534    ///   5xxx COGS, 6xxx OpEx, 7xxx other income, 8xxx other expense).
7535    /// - `"french_gaap"` — French PCG (1 = capital/liabilities, 2 = fixed
7536    ///   assets, 3 = inventory, 4 = third parties, 5 = cash, 6 = expenses,
7537    ///   7 = revenue).
7538    /// - `"german_gaap"` / `"hgb"` — German SKR04 (0 = fixed assets,
7539    ///   1 = current assets, 2 = equity, 3 = liabilities, 4 = revenue,
7540    ///   5 = COGS, 6 = OpEx, 7 = financial, 8 = tax/extraordinary).
7541    ///
7542    /// Unknown frameworks fall back to US-style.
7543    fn category_from_account_code(code: &str, framework: &str) -> String {
7544        match framework {
7545            "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7546            "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7547            _ => Self::us_gaap_category(code),
7548        }
7549        .to_string()
7550    }
7551
7552    fn us_gaap_category(code: &str) -> &'static str {
7553        let prefix: String = code.chars().take(2).collect();
7554        match prefix.as_str() {
7555            "10" => "Cash",
7556            "11" => "Receivables",
7557            "12" | "13" | "14" => "Inventory",
7558            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7559            "20" => "Payables",
7560            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7561            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7562            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7563            "40" | "41" | "42" | "43" | "44" => "Revenue",
7564            "50" | "51" | "52" => "CostOfSales",
7565            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7566                "OperatingExpenses"
7567            }
7568            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7569            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7570            _ => "OperatingExpenses",
7571        }
7572    }
7573
7574    /// SKR04 (German GAAP) prefix → orchestrator category.
7575    ///
7576    /// 0 = fixed assets, 1 = current assets (10-12 cash, 13-14 receivables,
7577    /// 15-19 inventory), 2 = equity, 3 = liabilities (3-31 payables,
7578    /// 32-37 accrued, 38-39 long-term debt), 4 = revenue, 5 = COGS,
7579    /// 6 = OpEx, 7 = financial income, 8 = tax/extraordinary expense.
7580    fn skr_category(code: &str) -> &'static str {
7581        let first = code.chars().next().and_then(|c| c.to_digit(10));
7582        let prefix: String = code.chars().take(2).collect();
7583        match first {
7584            Some(0) => "FixedAssets",
7585            Some(1) => match prefix.as_str() {
7586                "10" | "11" | "12" => "Cash",
7587                "13" | "14" => "Receivables",
7588                _ => "Inventory",
7589            },
7590            Some(2) => "Equity",
7591            Some(3) => match prefix.as_str() {
7592                "30" | "31" => "Payables",
7593                "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7594                _ => "LongTermDebt",
7595            },
7596            Some(4) => "Revenue",
7597            Some(5) => "CostOfSales",
7598            Some(6) => "OperatingExpenses",
7599            Some(7) => "OtherIncome",
7600            Some(8) => "OtherExpenses",
7601            _ => "OperatingExpenses",
7602        }
7603    }
7604
7605    /// French PCG prefix → orchestrator category.
7606    ///
7607    /// 10-14 = equity, 15-19 = liabilities (provisions, debts),
7608    /// 2 = fixed assets, 3 = inventory, 40 = payables, 41 = receivables,
7609    /// 42-49 = liabilities (personnel, tax, group), 5 = cash, 6 = expenses,
7610    /// 7 = revenue.
7611    fn pcg_category(code: &str) -> &'static str {
7612        let first = code.chars().next().and_then(|c| c.to_digit(10));
7613        let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7614        match first {
7615            Some(1) => match second {
7616                Some(0..=4) => "Equity",
7617                Some(5) => "AccruedLiabilities",
7618                _ => "LongTermDebt",
7619            },
7620            Some(2) => "FixedAssets",
7621            Some(3) => "Inventory",
7622            Some(4) => match second {
7623                Some(0) => "Payables",
7624                Some(1) => "Receivables",
7625                _ => "AccruedLiabilities",
7626            },
7627            Some(5) => "Cash",
7628            Some(6) => "OperatingExpenses",
7629            Some(7) => "Revenue",
7630            Some(8) | Some(9) => "OperatingExpenses",
7631            _ => "OperatingExpenses",
7632        }
7633    }
7634
7635    /// Test whether an account code maps to a balance-sheet line under
7636    /// the given framework. Drives the cumulative-vs-period bucketing in
7637    /// [`Self::build_cumulative_trial_balance`].
7638    ///
7639    /// Delegates to the framework-aware classifier in
7640    /// `datasynth-core::framework_accounts` so SKR (German) and PCG
7641    /// (French) codes are recognised, not silently routed through a
7642    /// US-style prefix table.
7643    fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7644        // `AccountType` here is the `balance::AccountType` imported at
7645        // the top of the file; `FrameworkAccounts::classify_account_type`
7646        // returns the same enum, so no cross-namespace mapping is needed.
7647        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7648        matches!(
7649            fa.classify_account_type(code),
7650            AccountType::Asset
7651                | AccountType::ContraAsset
7652                | AccountType::Liability
7653                | AccountType::ContraLiability
7654                | AccountType::Equity
7655                | AccountType::ContraEquity
7656        )
7657    }
7658
7659    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7660    fn phase_hr_data(
7661        &mut self,
7662        stats: &mut EnhancedGenerationStatistics,
7663    ) -> SynthResult<HrSnapshot> {
7664        if !self.phase_config.generate_hr {
7665            debug!("Phase 16: Skipped (HR generation disabled)");
7666            return Ok(HrSnapshot::default());
7667        }
7668
7669        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7670
7671        let seed = self.seed;
7672        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7673            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7674        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7675        let company_code = self
7676            .config
7677            .companies
7678            .first()
7679            .map(|c| c.code.as_str())
7680            .unwrap_or("1000");
7681        let currency = self
7682            .config
7683            .companies
7684            .first()
7685            .map(|c| c.currency.as_str())
7686            .unwrap_or("USD");
7687
7688        let employee_ids: Vec<String> = self
7689            .master_data
7690            .employees
7691            .iter()
7692            .map(|e| e.employee_id.clone())
7693            .collect();
7694
7695        if employee_ids.is_empty() {
7696            debug!("Phase 16: Skipped (no employees available)");
7697            return Ok(HrSnapshot::default());
7698        }
7699
7700        // Extract cost-center pool from master data employees for cross-reference
7701        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7702        let cost_center_ids: Vec<String> = self
7703            .master_data
7704            .employees
7705            .iter()
7706            .filter_map(|e| e.cost_center.clone())
7707            .collect::<std::collections::HashSet<_>>()
7708            .into_iter()
7709            .collect();
7710
7711        let mut snapshot = HrSnapshot::default();
7712
7713        // Generate payroll runs (one per month)
7714        if self.config.hr.payroll.enabled {
7715            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7716                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7717
7718            // Look up country pack for payroll deductions and labels
7719            let payroll_pack = self.primary_pack();
7720
7721            // Store the pack on the generator so generate() resolves
7722            // localized deduction rates and labels from it.
7723            payroll_gen.set_country_pack(payroll_pack.clone());
7724
7725            let employees_with_salary: Vec<(
7726                String,
7727                rust_decimal::Decimal,
7728                Option<String>,
7729                Option<String>,
7730            )> = self
7731                .master_data
7732                .employees
7733                .iter()
7734                .map(|e| {
7735                    // Use the employee's actual annual base salary.
7736                    // Fall back to $60,000 / yr if somehow zero.
7737                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7738                        e.base_salary
7739                    } else {
7740                        rust_decimal::Decimal::from(60_000)
7741                    };
7742                    (
7743                        e.employee_id.clone(),
7744                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7745                        e.cost_center.clone(),
7746                        e.department_id.clone(),
7747                    )
7748                })
7749                .collect();
7750
7751            // Use generate_with_changes when employee change history is available
7752            // so that salary adjustments, transfers, etc. are reflected in payroll.
7753            let change_history = &self.master_data.employee_change_history;
7754            let has_changes = !change_history.is_empty();
7755            if has_changes {
7756                debug!(
7757                    "Payroll will incorporate {} employee change events",
7758                    change_history.len()
7759                );
7760            }
7761
7762            for month in 0..self.config.global.period_months {
7763                let period_start = start_date + chrono::Months::new(month);
7764                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7765                let (run, items) = if has_changes {
7766                    payroll_gen.generate_with_changes(
7767                        company_code,
7768                        &employees_with_salary,
7769                        period_start,
7770                        period_end,
7771                        currency,
7772                        change_history,
7773                    )
7774                } else {
7775                    payroll_gen.generate(
7776                        company_code,
7777                        &employees_with_salary,
7778                        period_start,
7779                        period_end,
7780                        currency,
7781                    )
7782                };
7783                snapshot.payroll_runs.push(run);
7784                snapshot.payroll_run_count += 1;
7785                snapshot.payroll_line_item_count += items.len();
7786                snapshot.payroll_line_items.extend(items);
7787            }
7788        }
7789
7790        // Generate time entries
7791        if self.config.hr.time_attendance.enabled {
7792            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7793                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7794            // v3.4.2: when a temporal context is configured, time entries
7795            // respect holidays (not just weekends) and submitted_at lag
7796            // snaps to business days.
7797            if let Some(ctx) = &self.temporal_context {
7798                time_gen.set_temporal_context(Arc::clone(ctx));
7799            }
7800            let entries = time_gen.generate(
7801                &employee_ids,
7802                start_date,
7803                end_date,
7804                &self.config.hr.time_attendance,
7805            );
7806            snapshot.time_entry_count = entries.len();
7807            snapshot.time_entries = entries;
7808        }
7809
7810        // Generate expense reports
7811        if self.config.hr.expenses.enabled {
7812            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7813                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7814            expense_gen.set_country_pack(self.primary_pack().clone());
7815            // v3.4.2: snap submission / approval / paid / line-item dates
7816            // to business days when temporal_context is present.
7817            if let Some(ctx) = &self.temporal_context {
7818                expense_gen.set_temporal_context(Arc::clone(ctx));
7819            }
7820            let company_currency = self
7821                .config
7822                .companies
7823                .first()
7824                .map(|c| c.currency.as_str())
7825                .unwrap_or("USD");
7826            let reports = expense_gen.generate_with_currency(
7827                &employee_ids,
7828                start_date,
7829                end_date,
7830                &self.config.hr.expenses,
7831                company_currency,
7832            );
7833            snapshot.expense_report_count = reports.len();
7834            snapshot.expense_reports = reports;
7835        }
7836
7837        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7838        if self.config.hr.payroll.enabled {
7839            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7840            let employee_pairs: Vec<(String, String)> = self
7841                .master_data
7842                .employees
7843                .iter()
7844                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7845                .collect();
7846            let enrollments =
7847                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7848            snapshot.benefit_enrollment_count = enrollments.len();
7849            snapshot.benefit_enrollments = enrollments;
7850        }
7851
7852        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7853        if self.phase_config.generate_hr {
7854            let entity_name = self
7855                .config
7856                .companies
7857                .first()
7858                .map(|c| c.name.as_str())
7859                .unwrap_or("Entity");
7860            let period_months = self.config.global.period_months;
7861            let period_label = {
7862                let y = start_date.year();
7863                let m = start_date.month();
7864                if period_months >= 12 {
7865                    format!("FY{y}")
7866                } else {
7867                    format!("{y}-{m:02}")
7868                }
7869            };
7870            let reporting_date =
7871                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7872
7873            // Compute average annual salary from actual payroll data when available.
7874            // PayrollRun.total_gross covers all employees for one pay period; we sum
7875            // across all runs and divide by employee_count to get per-employee total,
7876            // then annualise for sub-annual periods.
7877            let avg_salary: Option<rust_decimal::Decimal> = {
7878                let employee_count = employee_ids.len();
7879                if self.config.hr.payroll.enabled
7880                    && employee_count > 0
7881                    && !snapshot.payroll_runs.is_empty()
7882                {
7883                    // Sum total gross pay across all payroll runs for this company
7884                    let total_gross: rust_decimal::Decimal = snapshot
7885                        .payroll_runs
7886                        .iter()
7887                        .filter(|r| r.company_code == company_code)
7888                        .map(|r| r.total_gross)
7889                        .sum();
7890                    if total_gross > rust_decimal::Decimal::ZERO {
7891                        // Annualise: total_gross covers `period_months` months of pay
7892                        let annual_total = if period_months > 0 && period_months < 12 {
7893                            total_gross * rust_decimal::Decimal::from(12u32)
7894                                / rust_decimal::Decimal::from(period_months)
7895                        } else {
7896                            total_gross
7897                        };
7898                        Some(
7899                            (annual_total / rust_decimal::Decimal::from(employee_count))
7900                                .round_dp(2),
7901                        )
7902                    } else {
7903                        None
7904                    }
7905                } else {
7906                    None
7907                }
7908            };
7909
7910            let mut pension_gen =
7911                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7912            let pension_snap = pension_gen.generate(
7913                company_code,
7914                entity_name,
7915                &period_label,
7916                reporting_date,
7917                employee_ids.len(),
7918                currency,
7919                avg_salary,
7920                period_months,
7921            );
7922            snapshot.pension_plan_count = pension_snap.plans.len();
7923            snapshot.pension_plans = pension_snap.plans;
7924            snapshot.pension_obligations = pension_snap.obligations;
7925            snapshot.pension_plan_assets = pension_snap.plan_assets;
7926            snapshot.pension_disclosures = pension_snap.disclosures;
7927            // Pension JEs are returned here so they can be added to entries
7928            // in the caller (stored temporarily on snapshot for transfer).
7929            // We embed them in the hr snapshot for simplicity; the orchestrator
7930            // will extract and extend `entries`.
7931            snapshot.pension_journal_entries = pension_snap.journal_entries;
7932        }
7933
7934        // Generate stock-based compensation (ASC 718 / IFRS 2)
7935        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7936            let period_months = self.config.global.period_months;
7937            let period_label = {
7938                let y = start_date.year();
7939                let m = start_date.month();
7940                if period_months >= 12 {
7941                    format!("FY{y}")
7942                } else {
7943                    format!("{y}-{m:02}")
7944                }
7945            };
7946            let reporting_date =
7947                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7948
7949            let mut stock_comp_gen =
7950                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7951            let stock_snap = stock_comp_gen.generate(
7952                company_code,
7953                &employee_ids,
7954                start_date,
7955                &period_label,
7956                reporting_date,
7957                currency,
7958            );
7959            snapshot.stock_grant_count = stock_snap.grants.len();
7960            snapshot.stock_grants = stock_snap.grants;
7961            snapshot.stock_comp_expenses = stock_snap.expenses;
7962            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7963        }
7964
7965        stats.payroll_run_count = snapshot.payroll_run_count;
7966        stats.time_entry_count = snapshot.time_entry_count;
7967        stats.expense_report_count = snapshot.expense_report_count;
7968        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7969        stats.pension_plan_count = snapshot.pension_plan_count;
7970        stats.stock_grant_count = snapshot.stock_grant_count;
7971
7972        info!(
7973            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7974            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7975            snapshot.time_entry_count, snapshot.expense_report_count,
7976            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7977            snapshot.stock_grant_count
7978        );
7979        self.check_resources_with_log("post-hr")?;
7980
7981        Ok(snapshot)
7982    }
7983
7984    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7985    fn phase_accounting_standards(
7986        &mut self,
7987        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7988        journal_entries: &[JournalEntry],
7989        stats: &mut EnhancedGenerationStatistics,
7990    ) -> SynthResult<AccountingStandardsSnapshot> {
7991        if !self.phase_config.generate_accounting_standards {
7992            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7993            return Ok(AccountingStandardsSnapshot::default());
7994        }
7995        info!("Phase 17: Generating Accounting Standards Data");
7996
7997        let seed = self.seed;
7998        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7999            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8000        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8001        let company_code = self
8002            .config
8003            .companies
8004            .first()
8005            .map(|c| c.code.as_str())
8006            .unwrap_or("1000");
8007        let currency = self
8008            .config
8009            .companies
8010            .first()
8011            .map(|c| c.currency.as_str())
8012            .unwrap_or("USD");
8013
8014        // Convert config framework to standards framework.
8015        // If the user explicitly set a framework in the YAML config, use that.
8016        // Otherwise, fall back to the country pack's accounting.framework field,
8017        // and if that is also absent or unrecognised, default to US GAAP.
8018        let framework = match self.config.accounting_standards.framework {
8019            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8020                datasynth_standards::framework::AccountingFramework::UsGaap
8021            }
8022            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8023                datasynth_standards::framework::AccountingFramework::Ifrs
8024            }
8025            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8026                datasynth_standards::framework::AccountingFramework::DualReporting
8027            }
8028            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8029                datasynth_standards::framework::AccountingFramework::FrenchGaap
8030            }
8031            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8032                datasynth_standards::framework::AccountingFramework::GermanGaap
8033            }
8034            None => {
8035                // Derive framework from the primary company's country pack
8036                let pack = self.primary_pack();
8037                let pack_fw = pack.accounting.framework.as_str();
8038                match pack_fw {
8039                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8040                    "dual_reporting" => {
8041                        datasynth_standards::framework::AccountingFramework::DualReporting
8042                    }
8043                    "french_gaap" => {
8044                        datasynth_standards::framework::AccountingFramework::FrenchGaap
8045                    }
8046                    "german_gaap" | "hgb" => {
8047                        datasynth_standards::framework::AccountingFramework::GermanGaap
8048                    }
8049                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
8050                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8051                }
8052            }
8053        };
8054
8055        let mut snapshot = AccountingStandardsSnapshot::default();
8056
8057        // Revenue recognition
8058        if self.config.accounting_standards.revenue_recognition.enabled {
8059            let customer_ids: Vec<String> = self
8060                .master_data
8061                .customers
8062                .iter()
8063                .map(|c| c.customer_id.clone())
8064                .collect();
8065
8066            if !customer_ids.is_empty() {
8067                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8068                let contracts = rev_gen.generate(
8069                    company_code,
8070                    &customer_ids,
8071                    start_date,
8072                    end_date,
8073                    currency,
8074                    &self.config.accounting_standards.revenue_recognition,
8075                    framework,
8076                );
8077                snapshot.revenue_contract_count = contracts.len();
8078                snapshot.contracts = contracts;
8079            }
8080        }
8081
8082        // Impairment testing
8083        if self.config.accounting_standards.impairment.enabled {
8084            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8085                .master_data
8086                .assets
8087                .iter()
8088                .map(|a| {
8089                    (
8090                        a.asset_id.clone(),
8091                        a.description.clone(),
8092                        a.acquisition_cost,
8093                    )
8094                })
8095                .collect();
8096
8097            if !asset_data.is_empty() {
8098                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8099                let tests = imp_gen.generate(
8100                    company_code,
8101                    &asset_data,
8102                    end_date,
8103                    &self.config.accounting_standards.impairment,
8104                    framework,
8105                );
8106                snapshot.impairment_test_count = tests.len();
8107                snapshot.impairment_tests = tests;
8108            }
8109        }
8110
8111        // Business combinations (IFRS 3 / ASC 805)
8112        if self
8113            .config
8114            .accounting_standards
8115            .business_combinations
8116            .enabled
8117        {
8118            let bc_config = &self.config.accounting_standards.business_combinations;
8119            let framework_str = match framework {
8120                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8121                _ => "US_GAAP",
8122            };
8123            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8124            let bc_snap = bc_gen.generate(
8125                company_code,
8126                currency,
8127                start_date,
8128                end_date,
8129                bc_config.acquisition_count,
8130                framework_str,
8131            );
8132            snapshot.business_combination_count = bc_snap.combinations.len();
8133            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8134            snapshot.business_combinations = bc_snap.combinations;
8135        }
8136
8137        // Expected Credit Loss (IFRS 9 / ASC 326)
8138        if self
8139            .config
8140            .accounting_standards
8141            .expected_credit_loss
8142            .enabled
8143        {
8144            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8145            let framework_str = match framework {
8146                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8147                _ => "ASC_326",
8148            };
8149
8150            // Use AR aging data from the subledger snapshot if available;
8151            // otherwise generate synthetic bucket exposures.
8152            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8153
8154            let mut ecl_gen = EclGenerator::new(seed + 43);
8155
8156            // Collect combined bucket totals across all company AR aging reports.
8157            let bucket_exposures: Vec<(
8158                datasynth_core::models::subledger::ar::AgingBucket,
8159                rust_decimal::Decimal,
8160            )> = if ar_aging_reports.is_empty() {
8161                // No AR aging data — synthesise plausible bucket exposures.
8162                use datasynth_core::models::subledger::ar::AgingBucket;
8163                vec![
8164                    (
8165                        AgingBucket::Current,
8166                        rust_decimal::Decimal::from(500_000_u32),
8167                    ),
8168                    (
8169                        AgingBucket::Days1To30,
8170                        rust_decimal::Decimal::from(120_000_u32),
8171                    ),
8172                    (
8173                        AgingBucket::Days31To60,
8174                        rust_decimal::Decimal::from(45_000_u32),
8175                    ),
8176                    (
8177                        AgingBucket::Days61To90,
8178                        rust_decimal::Decimal::from(15_000_u32),
8179                    ),
8180                    (
8181                        AgingBucket::Over90Days,
8182                        rust_decimal::Decimal::from(8_000_u32),
8183                    ),
8184                ]
8185            } else {
8186                use datasynth_core::models::subledger::ar::AgingBucket;
8187                // Sum bucket totals from all reports.
8188                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8189                    std::collections::HashMap::new();
8190                for report in ar_aging_reports {
8191                    for (bucket, amount) in &report.bucket_totals {
8192                        *totals.entry(*bucket).or_default() += amount;
8193                    }
8194                }
8195                AgingBucket::all()
8196                    .into_iter()
8197                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8198                    .collect()
8199            };
8200
8201            let ecl_snap = ecl_gen.generate(
8202                company_code,
8203                end_date,
8204                &bucket_exposures,
8205                ecl_config,
8206                &period_label,
8207                framework_str,
8208            );
8209
8210            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8211            snapshot.ecl_models = ecl_snap.ecl_models;
8212            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8213            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8214        }
8215
8216        // Provisions and contingencies (IAS 37 / ASC 450)
8217        {
8218            let framework_str = match framework {
8219                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8220                _ => "US_GAAP",
8221            };
8222
8223            // Compute actual revenue from the journal entries generated so far.
8224            // The `journal_entries` slice passed to this phase contains all GL entries
8225            // up to and including Period Close. Fall back to a minimum of 100_000 to
8226            // avoid degenerate zero-based provision amounts on first-period datasets.
8227            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8228                .max(rust_decimal::Decimal::from(100_000_u32));
8229
8230            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8231
8232            let mut prov_gen = ProvisionGenerator::new(seed + 44);
8233            let prov_snap = prov_gen.generate(
8234                company_code,
8235                currency,
8236                revenue_proxy,
8237                end_date,
8238                &period_label,
8239                framework_str,
8240                None, // prior_opening: no carry-forward data in single-period runs
8241            );
8242
8243            snapshot.provision_count = prov_snap.provisions.len();
8244            snapshot.provisions = prov_snap.provisions;
8245            snapshot.provision_movements = prov_snap.movements;
8246            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8247            snapshot.provision_journal_entries = prov_snap.journal_entries;
8248        }
8249
8250        // IAS 21 Functional Currency Translation
8251        // For each company whose functional currency differs from the presentation
8252        // currency, generate a CurrencyTranslationResult with CTA (OCI).
8253        {
8254            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8255
8256            let presentation_currency = self
8257                .config
8258                .global
8259                .presentation_currency
8260                .clone()
8261                .unwrap_or_else(|| self.config.global.group_currency.clone());
8262
8263            // Build a minimal rate table populated with approximate rates from
8264            // the FX model base rates (USD-based) so we can do the translation.
8265            let mut rate_table = FxRateTable::new(&presentation_currency);
8266
8267            // Populate with base rates against USD; if presentation_currency is
8268            // not USD we do a best-effort two-step conversion using the table's
8269            // triangulation support.
8270            let base_rates = base_rates_usd();
8271            for (ccy, rate) in &base_rates {
8272                rate_table.add_rate(FxRate::new(
8273                    ccy,
8274                    "USD",
8275                    RateType::Closing,
8276                    end_date,
8277                    *rate,
8278                    "SYNTHETIC",
8279                ));
8280                // Average rate = 98% of closing (approximation).
8281                // 0.98 = 98/100 = Decimal::new(98, 2)
8282                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8283                rate_table.add_rate(FxRate::new(
8284                    ccy,
8285                    "USD",
8286                    RateType::Average,
8287                    end_date,
8288                    avg,
8289                    "SYNTHETIC",
8290                ));
8291            }
8292
8293            let mut translation_results = Vec::new();
8294            for company in &self.config.companies {
8295                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
8296                // to ensure the translation produces non-trivial CTA amounts.
8297                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8298                    .max(rust_decimal::Decimal::from(100_000_u32));
8299
8300                let func_ccy = company
8301                    .functional_currency
8302                    .clone()
8303                    .unwrap_or_else(|| company.currency.clone());
8304
8305                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8306                    &company.code,
8307                    &func_ccy,
8308                    &presentation_currency,
8309                    &ias21_period_label,
8310                    end_date,
8311                    company_revenue,
8312                    &rate_table,
8313                );
8314                translation_results.push(result);
8315            }
8316
8317            snapshot.currency_translation_count = translation_results.len();
8318            snapshot.currency_translation_results = translation_results;
8319        }
8320
8321        stats.revenue_contract_count = snapshot.revenue_contract_count;
8322        stats.impairment_test_count = snapshot.impairment_test_count;
8323        stats.business_combination_count = snapshot.business_combination_count;
8324        stats.ecl_model_count = snapshot.ecl_model_count;
8325        stats.provision_count = snapshot.provision_count;
8326
8327        // ------------------------------------------------------------
8328        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
8329        // ------------------------------------------------------------
8330        if self.config.accounting_standards.leases.enabled {
8331            use datasynth_generators::standards::LeaseGenerator;
8332            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8333                .unwrap_or_else(|_| {
8334                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8335                });
8336            let framework =
8337                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8338            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8339            for company in &self.config.companies {
8340                let leases = lease_gen.generate(
8341                    &company.code,
8342                    start_date,
8343                    &self.config.accounting_standards.leases,
8344                    framework,
8345                );
8346                snapshot.lease_count += leases.len();
8347                snapshot.leases.extend(leases);
8348            }
8349            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8350        }
8351
8352        // ------------------------------------------------------------
8353        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
8354        // ------------------------------------------------------------
8355        if self.config.accounting_standards.fair_value.enabled {
8356            use datasynth_generators::standards::FairValueGenerator;
8357            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8358                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8359                + chrono::Months::new(self.config.global.period_months);
8360            let framework =
8361                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8362            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8363            for company in &self.config.companies {
8364                let measurements = fv_gen.generate(
8365                    &company.code,
8366                    end_date,
8367                    &company.currency,
8368                    &self.config.accounting_standards.fair_value,
8369                    framework,
8370                );
8371                snapshot.fair_value_measurement_count += measurements.len();
8372                snapshot.fair_value_measurements.extend(measurements);
8373            }
8374            info!(
8375                "v3.3.1 fair value measurements: {}",
8376                snapshot.fair_value_measurement_count
8377            );
8378        }
8379
8380        // ------------------------------------------------------------
8381        // v3.3.1: Framework reconciliation (dual reporting only)
8382        // ------------------------------------------------------------
8383        if self.config.accounting_standards.generate_differences
8384            && matches!(
8385                self.config.accounting_standards.framework,
8386                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8387            )
8388        {
8389            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8390            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8391                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8392                + chrono::Months::new(self.config.global.period_months);
8393            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8394            for company in &self.config.companies {
8395                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8396                snapshot.framework_difference_count += records.len();
8397                snapshot.framework_differences.extend(records);
8398                snapshot.framework_reconciliations.push(reconciliation);
8399            }
8400            info!(
8401                "v3.3.1 framework reconciliation: {} differences across {} entities",
8402                snapshot.framework_difference_count,
8403                snapshot.framework_reconciliations.len()
8404            );
8405        }
8406
8407        info!(
8408            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8409            snapshot.revenue_contract_count,
8410            snapshot.impairment_test_count,
8411            snapshot.business_combination_count,
8412            snapshot.ecl_model_count,
8413            snapshot.provision_count,
8414            snapshot.currency_translation_count,
8415            snapshot.lease_count,
8416            snapshot.fair_value_measurement_count,
8417            snapshot.framework_difference_count,
8418        );
8419        self.check_resources_with_log("post-accounting-standards")?;
8420
8421        Ok(snapshot)
8422    }
8423
8424    /// v3.3.1: helper to resolve the accounting-standards framework enum
8425    /// from config into the `datasynth_standards::framework::AccountingFramework`
8426    /// type expected by standards generators. Falls back to US GAAP.
8427    fn resolve_accounting_framework(
8428        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8429    ) -> datasynth_standards::framework::AccountingFramework {
8430        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8431        use datasynth_standards::framework::AccountingFramework as Fw;
8432        match cfg {
8433            Some(Cfg::Ifrs) => Fw::Ifrs,
8434            Some(Cfg::DualReporting) => Fw::DualReporting,
8435            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8436            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8437            _ => Fw::UsGaap,
8438        }
8439    }
8440
8441    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8442    fn phase_manufacturing(
8443        &mut self,
8444        stats: &mut EnhancedGenerationStatistics,
8445    ) -> SynthResult<ManufacturingSnapshot> {
8446        if !self.phase_config.generate_manufacturing {
8447            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8448            return Ok(ManufacturingSnapshot::default());
8449        }
8450        info!("Phase 18: Generating Manufacturing Data");
8451
8452        let seed = self.seed;
8453        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8454            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8455        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8456        let company_code = self
8457            .config
8458            .companies
8459            .first()
8460            .map(|c| c.code.as_str())
8461            .unwrap_or("1000");
8462
8463        let material_data: Vec<(String, String)> = self
8464            .master_data
8465            .materials
8466            .iter()
8467            .map(|m| (m.material_id.clone(), m.description.clone()))
8468            .collect();
8469
8470        if material_data.is_empty() {
8471            debug!("Phase 18: Skipped (no materials available)");
8472            return Ok(ManufacturingSnapshot::default());
8473        }
8474
8475        let mut snapshot = ManufacturingSnapshot::default();
8476
8477        // Generate production orders
8478        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8479        // v3.4.3: snap planned / actual / operation dates to business days.
8480        if let Some(ctx) = &self.temporal_context {
8481            prod_gen.set_temporal_context(Arc::clone(ctx));
8482        }
8483        let production_orders = prod_gen.generate(
8484            company_code,
8485            &material_data,
8486            start_date,
8487            end_date,
8488            &self.config.manufacturing.production_orders,
8489            &self.config.manufacturing.costing,
8490            &self.config.manufacturing.routing,
8491        );
8492        snapshot.production_order_count = production_orders.len();
8493
8494        // Generate quality inspections from production orders
8495        let inspection_data: Vec<(String, String, String)> = production_orders
8496            .iter()
8497            .map(|po| {
8498                (
8499                    po.order_id.clone(),
8500                    po.material_id.clone(),
8501                    po.material_description.clone(),
8502                )
8503            })
8504            .collect();
8505
8506        snapshot.production_orders = production_orders;
8507
8508        if !inspection_data.is_empty() {
8509            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8510            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8511            snapshot.quality_inspection_count = inspections.len();
8512            snapshot.quality_inspections = inspections;
8513        }
8514
8515        // Generate cycle counts (one per month)
8516        let storage_locations: Vec<(String, String)> = material_data
8517            .iter()
8518            .enumerate()
8519            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8520            .collect();
8521
8522        let employee_ids: Vec<String> = self
8523            .master_data
8524            .employees
8525            .iter()
8526            .map(|e| e.employee_id.clone())
8527            .collect();
8528        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8529            .with_employee_pool(employee_ids);
8530        let mut cycle_count_total = 0usize;
8531        for month in 0..self.config.global.period_months {
8532            let count_date = start_date + chrono::Months::new(month);
8533            let items_per_count = storage_locations.len().clamp(10, 50);
8534            let cc = cc_gen.generate(
8535                company_code,
8536                &storage_locations,
8537                count_date,
8538                items_per_count,
8539            );
8540            snapshot.cycle_counts.push(cc);
8541            cycle_count_total += 1;
8542        }
8543        snapshot.cycle_count_count = cycle_count_total;
8544
8545        // Generate BOM components
8546        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8547        let bom_components = bom_gen.generate(company_code, &material_data);
8548        snapshot.bom_component_count = bom_components.len();
8549        snapshot.bom_components = bom_components;
8550
8551        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8552        let currency = self
8553            .config
8554            .companies
8555            .first()
8556            .map(|c| c.currency.as_str())
8557            .unwrap_or("USD");
8558        let production_order_ids: Vec<String> = snapshot
8559            .production_orders
8560            .iter()
8561            .map(|po| po.order_id.clone())
8562            .collect();
8563        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8564        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8565            company_code,
8566            &material_data,
8567            start_date,
8568            end_date,
8569            2,
8570            currency,
8571            &production_order_ids,
8572        );
8573        snapshot.inventory_movement_count = inventory_movements.len();
8574        snapshot.inventory_movements = inventory_movements;
8575
8576        stats.production_order_count = snapshot.production_order_count;
8577        stats.quality_inspection_count = snapshot.quality_inspection_count;
8578        stats.cycle_count_count = snapshot.cycle_count_count;
8579        stats.bom_component_count = snapshot.bom_component_count;
8580        stats.inventory_movement_count = snapshot.inventory_movement_count;
8581
8582        info!(
8583            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8584            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8585            snapshot.bom_component_count, snapshot.inventory_movement_count
8586        );
8587        self.check_resources_with_log("post-manufacturing")?;
8588
8589        Ok(snapshot)
8590    }
8591
8592    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8593    fn phase_sales_kpi_budgets(
8594        &mut self,
8595        coa: &Arc<ChartOfAccounts>,
8596        financial_reporting: &FinancialReportingSnapshot,
8597        entries: &[JournalEntry],
8598        stats: &mut EnhancedGenerationStatistics,
8599    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8600        if !self.phase_config.generate_sales_kpi_budgets {
8601            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8602            return Ok(SalesKpiBudgetsSnapshot::default());
8603        }
8604        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8605
8606        let seed = self.seed;
8607        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8608            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8609        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610        let company_code = self
8611            .config
8612            .companies
8613            .first()
8614            .map(|c| c.code.as_str())
8615            .unwrap_or("1000");
8616
8617        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8618
8619        // Sales Quotes
8620        if self.config.sales_quotes.enabled {
8621            let customer_data: Vec<(String, String)> = self
8622                .master_data
8623                .customers
8624                .iter()
8625                .map(|c| (c.customer_id.clone(), c.name.clone()))
8626                .collect();
8627            let material_data: Vec<(String, String)> = self
8628                .master_data
8629                .materials
8630                .iter()
8631                .map(|m| (m.material_id.clone(), m.description.clone()))
8632                .collect();
8633
8634            if !customer_data.is_empty() && !material_data.is_empty() {
8635                let employee_ids: Vec<String> = self
8636                    .master_data
8637                    .employees
8638                    .iter()
8639                    .map(|e| e.employee_id.clone())
8640                    .collect();
8641                let customer_ids: Vec<String> = self
8642                    .master_data
8643                    .customers
8644                    .iter()
8645                    .map(|c| c.customer_id.clone())
8646                    .collect();
8647                let company_currency = self
8648                    .config
8649                    .companies
8650                    .first()
8651                    .map(|c| c.currency.as_str())
8652                    .unwrap_or("USD");
8653
8654                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8655                    .with_pools(employee_ids, customer_ids);
8656                let quotes = quote_gen.generate_with_currency(
8657                    company_code,
8658                    &customer_data,
8659                    &material_data,
8660                    start_date,
8661                    end_date,
8662                    &self.config.sales_quotes,
8663                    company_currency,
8664                );
8665                snapshot.sales_quote_count = quotes.len();
8666                snapshot.sales_quotes = quotes;
8667            }
8668        }
8669
8670        // Management KPIs
8671        if self.config.financial_reporting.management_kpis.enabled {
8672            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8673            let mut kpis = kpi_gen.generate(
8674                company_code,
8675                start_date,
8676                end_date,
8677                &self.config.financial_reporting.management_kpis,
8678            );
8679
8680            // Override financial KPIs with actual data from financial statements
8681            {
8682                use rust_decimal::Decimal;
8683
8684                if let Some(income_stmt) =
8685                    financial_reporting.financial_statements.iter().find(|fs| {
8686                        fs.statement_type == StatementType::IncomeStatement
8687                            && fs.company_code == company_code
8688                    })
8689                {
8690                    // Extract revenue and COGS from income statement line items
8691                    let total_revenue: Decimal = income_stmt
8692                        .line_items
8693                        .iter()
8694                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8695                        .map(|li| li.amount)
8696                        .sum();
8697                    let total_cogs: Decimal = income_stmt
8698                        .line_items
8699                        .iter()
8700                        .filter(|li| {
8701                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8702                                && !li.is_total
8703                        })
8704                        .map(|li| li.amount.abs())
8705                        .sum();
8706                    let total_opex: Decimal = income_stmt
8707                        .line_items
8708                        .iter()
8709                        .filter(|li| {
8710                            li.section.contains("Expense")
8711                                && !li.is_total
8712                                && !li.section.contains("Cost")
8713                        })
8714                        .map(|li| li.amount.abs())
8715                        .sum();
8716
8717                    if total_revenue > Decimal::ZERO {
8718                        let hundred = Decimal::from(100);
8719                        let gross_margin_pct =
8720                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8721                        let operating_income = total_revenue - total_cogs - total_opex;
8722                        let op_margin_pct =
8723                            (operating_income * hundred / total_revenue).round_dp(2);
8724
8725                        // Override gross margin and operating margin KPIs
8726                        for kpi in &mut kpis {
8727                            if kpi.name == "Gross Margin" {
8728                                kpi.value = gross_margin_pct;
8729                            } else if kpi.name == "Operating Margin" {
8730                                kpi.value = op_margin_pct;
8731                            }
8732                        }
8733                    }
8734                }
8735
8736                // Override Current Ratio from balance sheet
8737                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8738                    fs.statement_type == StatementType::BalanceSheet
8739                        && fs.company_code == company_code
8740                }) {
8741                    let current_assets: Decimal = bs
8742                        .line_items
8743                        .iter()
8744                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8745                        .map(|li| li.amount)
8746                        .sum();
8747                    let current_liabilities: Decimal = bs
8748                        .line_items
8749                        .iter()
8750                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8751                        .map(|li| li.amount.abs())
8752                        .sum();
8753
8754                    if current_liabilities > Decimal::ZERO {
8755                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8756                        for kpi in &mut kpis {
8757                            if kpi.name == "Current Ratio" {
8758                                kpi.value = current_ratio;
8759                            }
8760                        }
8761                    }
8762                }
8763            }
8764
8765            snapshot.kpi_count = kpis.len();
8766            snapshot.kpis = kpis;
8767        }
8768
8769        // Budgets
8770        if self.config.financial_reporting.budgets.enabled {
8771            let account_data: Vec<(String, String)> = coa
8772                .accounts
8773                .iter()
8774                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8775                .collect();
8776
8777            if !account_data.is_empty() {
8778                let fiscal_year = start_date.year() as u32;
8779                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8780                let budget = budget_gen.generate(
8781                    company_code,
8782                    fiscal_year,
8783                    &account_data,
8784                    &self.config.financial_reporting.budgets,
8785                );
8786                snapshot.budget_line_count = budget.line_items.len();
8787                snapshot.budgets.push(budget);
8788            }
8789        }
8790
8791        // Phase-2 evidence layer: external expectations (ISA 520, aggregate deviation) + evidence
8792        // anchors (ISA 505, external corroboration). Both consume the per-account actual-vs-legitimate
8793        // split derived from the fraud-flagged journal entries, so they share one pass over `entries`.
8794        let want_expectations = self
8795            .config
8796            .financial_reporting
8797            .external_expectations
8798            .enabled;
8799        let want_anchors = self.config.financial_reporting.evidence_anchors.enabled;
8800        if want_expectations || want_anchors {
8801            use std::collections::HashMap;
8802            // account -> (actual_total, legitimate_total, je_count)
8803            let mut totals: HashMap<String, (Decimal, Decimal, u32)> = HashMap::new();
8804            for je in entries {
8805                let is_fraud = je.header.is_fraud;
8806                let mut touched: Vec<&str> = Vec::new();
8807                for line in &je.lines {
8808                    let amt = line.debit_amount.abs() + line.credit_amount.abs();
8809                    let e = totals.entry(line.gl_account.clone()).or_insert((
8810                        Decimal::ZERO,
8811                        Decimal::ZERO,
8812                        0,
8813                    ));
8814                    e.0 += amt;
8815                    if !is_fraud {
8816                        e.1 += amt;
8817                    }
8818                    if !touched.contains(&line.gl_account.as_str()) {
8819                        touched.push(line.gl_account.as_str());
8820                        e.2 += 1;
8821                    }
8822                }
8823            }
8824            let fiscal_year = start_date.year();
8825
8826            // ISA 520 — substantive-analytics expectations (aggregate deviation)
8827            if want_expectations {
8828                let accounts: Vec<
8829                    datasynth_generators::external_expectation_generator::AccountActuals,
8830                > = coa
8831                    .accounts
8832                    .iter()
8833                    .filter_map(|a| {
8834                        totals.get(&a.account_number).map(|(actual, legit, _)| {
8835                            datasynth_generators::external_expectation_generator::AccountActuals {
8836                                account_code: a.account_number.clone(),
8837                                account_description: a.short_description.clone(),
8838                                account_type: a.account_type,
8839                                actual_total: *actual,
8840                                legit_total: *legit,
8841                            }
8842                        })
8843                    })
8844                    .collect();
8845                if !accounts.is_empty() {
8846                    let mut exp_gen =
8847                        datasynth_generators::ExternalExpectationsGenerator::new(seed + 64);
8848                    let expectations = exp_gen.generate(
8849                        company_code,
8850                        fiscal_year,
8851                        &accounts,
8852                        &self.config.financial_reporting.external_expectations,
8853                    );
8854                    let flagged = expectations.iter().filter(|e| e.exceeds_band).count();
8855                    info!(
8856                        "External expectations: {} material accounts scored, {} exceed the ISA-520 band",
8857                        expectations.len(),
8858                        flagged
8859                    );
8860                    snapshot.external_expectations = expectations;
8861                }
8862            }
8863
8864            // ISA 505 — external-corroboration evidence anchors (dangling-node detection)
8865            if want_anchors {
8866                let accounts: Vec<
8867                    datasynth_generators::evidence_anchor_generator::AccountActivity,
8868                > = coa
8869                    .accounts
8870                    .iter()
8871                    .filter_map(|a| {
8872                        totals.get(&a.account_number).map(|(actual, legit, n)| {
8873                            datasynth_generators::evidence_anchor_generator::AccountActivity {
8874                                account_code: a.account_number.clone(),
8875                                account_description: a.short_description.clone(),
8876                                account_type: a.account_type,
8877                                total_activity: *actual,
8878                                fraud_activity: *actual - *legit,
8879                                transaction_count: *n,
8880                            }
8881                        })
8882                    })
8883                    .collect();
8884                if !accounts.is_empty() {
8885                    let mut anchor_gen =
8886                        datasynth_generators::EvidenceAnchorGenerator::new(seed + 65);
8887                    let anchors = anchor_gen.generate(
8888                        company_code,
8889                        fiscal_year,
8890                        &accounts,
8891                        &self.config.financial_reporting.evidence_anchors,
8892                    );
8893                    let dangling = anchors.iter().filter(|a| a.is_dangling).count();
8894                    info!(
8895                        "Evidence anchors: {} material accounts scored, {} dangling (uncorroborated)",
8896                        anchors.len(),
8897                        dangling
8898                    );
8899                    snapshot.evidence_anchors = anchors;
8900                }
8901            }
8902        }
8903
8904        stats.sales_quote_count = snapshot.sales_quote_count;
8905        stats.kpi_count = snapshot.kpi_count;
8906        stats.budget_line_count = snapshot.budget_line_count;
8907
8908        info!(
8909            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8910            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8911        );
8912        self.check_resources_with_log("post-sales-kpi-budgets")?;
8913
8914        Ok(snapshot)
8915    }
8916
8917    /// Compute pre-tax income for a single company from actual journal entries.
8918    ///
8919    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8920    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8921    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8922    /// and the period-close engine so that all three use a consistent definition.
8923    fn compute_pre_tax_income(
8924        company_code: &str,
8925        journal_entries: &[JournalEntry],
8926    ) -> rust_decimal::Decimal {
8927        use datasynth_core::accounts::AccountCategory;
8928        use rust_decimal::Decimal;
8929
8930        let mut total_revenue = Decimal::ZERO;
8931        let mut total_expenses = Decimal::ZERO;
8932
8933        for je in journal_entries {
8934            if je.header.company_code != company_code {
8935                continue;
8936            }
8937            for line in &je.lines {
8938                let cat = AccountCategory::from_account(&line.gl_account);
8939                match cat {
8940                    AccountCategory::Revenue => {
8941                        total_revenue += line.credit_amount - line.debit_amount;
8942                    }
8943                    AccountCategory::Cogs
8944                    | AccountCategory::OperatingExpense
8945                    | AccountCategory::OtherIncomeExpense => {
8946                        total_expenses += line.debit_amount - line.credit_amount;
8947                    }
8948                    _ => {}
8949                }
8950            }
8951        }
8952
8953        let pti = (total_revenue - total_expenses).round_dp(2);
8954        if pti == rust_decimal::Decimal::ZERO {
8955            // No income statement activity yet — fall back to a synthetic value so the
8956            // tax provision generator can still produce meaningful output.
8957            rust_decimal::Decimal::from(1_000_000u32)
8958        } else {
8959            pti
8960        }
8961    }
8962
8963    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8964    fn phase_tax_generation(
8965        &mut self,
8966        document_flows: &DocumentFlowSnapshot,
8967        journal_entries: &[JournalEntry],
8968        stats: &mut EnhancedGenerationStatistics,
8969    ) -> SynthResult<TaxSnapshot> {
8970        if !self.phase_config.generate_tax {
8971            debug!("Phase 20: Skipped (tax generation disabled)");
8972            return Ok(TaxSnapshot::default());
8973        }
8974        info!("Phase 20: Generating Tax Data");
8975
8976        let seed = self.seed;
8977        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8978            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8979        let fiscal_year = start_date.year();
8980        let company_code = self
8981            .config
8982            .companies
8983            .first()
8984            .map(|c| c.code.as_str())
8985            .unwrap_or("1000");
8986
8987        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8988            seed + 370,
8989            self.config.tax.clone(),
8990        );
8991
8992        let pack = self.primary_pack().clone();
8993        let (jurisdictions, codes) =
8994            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8995
8996        // Generate tax provisions for each company
8997        let mut provisions = Vec::new();
8998        if self.config.tax.provisions.enabled {
8999            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
9000            for company in &self.config.companies {
9001                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
9002                let statutory_rate = rust_decimal::Decimal::new(
9003                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
9004                    2,
9005                );
9006                let provision = provision_gen.generate(
9007                    &company.code,
9008                    start_date,
9009                    pre_tax_income,
9010                    statutory_rate,
9011                );
9012                provisions.push(provision);
9013            }
9014        }
9015
9016        // Generate tax lines from document invoices
9017        let mut tax_lines = Vec::new();
9018        if !codes.is_empty() {
9019            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
9020                datasynth_generators::TaxLineGeneratorConfig::default(),
9021                codes.clone(),
9022                seed + 372,
9023            );
9024
9025            // Tax lines from vendor invoices (input tax)
9026            // Use the first company's country as buyer country
9027            let buyer_country = self
9028                .config
9029                .companies
9030                .first()
9031                .map(|c| c.country.as_str())
9032                .unwrap_or("US");
9033            for vi in &document_flows.vendor_invoices {
9034                let lines = tax_line_gen.generate_for_document(
9035                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
9036                    &vi.header.document_id,
9037                    buyer_country, // seller approx same country
9038                    buyer_country,
9039                    vi.payable_amount,
9040                    vi.header.document_date,
9041                    None,
9042                );
9043                tax_lines.extend(lines);
9044            }
9045
9046            // Tax lines from customer invoices (output tax)
9047            for ci in &document_flows.customer_invoices {
9048                let lines = tax_line_gen.generate_for_document(
9049                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
9050                    &ci.header.document_id,
9051                    buyer_country, // seller is the company
9052                    buyer_country,
9053                    ci.total_gross_amount,
9054                    ci.header.document_date,
9055                    None,
9056                );
9057                tax_lines.extend(lines);
9058            }
9059        }
9060
9061        // Generate deferred tax data (IAS 12 / ASC 740) for each company
9062        let deferred_tax = {
9063            let companies: Vec<(&str, &str)> = self
9064                .config
9065                .companies
9066                .iter()
9067                .map(|c| (c.code.as_str(), c.country.as_str()))
9068                .collect();
9069            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
9070            deferred_gen.generate(&companies, start_date, journal_entries)
9071        };
9072
9073        // Build a document_id → posting_date map so each tax JE uses its
9074        // source document's date rather than a blanket period-end date.
9075        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
9076            std::collections::HashMap::new();
9077        for vi in &document_flows.vendor_invoices {
9078            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
9079        }
9080        for ci in &document_flows.customer_invoices {
9081            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
9082        }
9083
9084        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
9085        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9086        let tax_posting_journal_entries = if !tax_lines.is_empty() {
9087            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
9088                &tax_lines,
9089                company_code,
9090                &doc_dates,
9091                end_date,
9092            );
9093            debug!("Generated {} tax posting JEs", jes.len());
9094            jes
9095        } else {
9096            Vec::new()
9097        };
9098
9099        let snapshot = TaxSnapshot {
9100            jurisdiction_count: jurisdictions.len(),
9101            code_count: codes.len(),
9102            jurisdictions,
9103            codes,
9104            tax_provisions: provisions,
9105            tax_lines,
9106            tax_returns: Vec::new(),
9107            withholding_records: Vec::new(),
9108            tax_anomaly_labels: Vec::new(),
9109            deferred_tax,
9110            tax_posting_journal_entries,
9111        };
9112
9113        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
9114        stats.tax_code_count = snapshot.code_count;
9115        stats.tax_provision_count = snapshot.tax_provisions.len();
9116        stats.tax_line_count = snapshot.tax_lines.len();
9117
9118        info!(
9119            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
9120            snapshot.jurisdiction_count,
9121            snapshot.code_count,
9122            snapshot.tax_provisions.len(),
9123            snapshot.deferred_tax.temporary_differences.len(),
9124            snapshot.deferred_tax.journal_entries.len(),
9125            snapshot.tax_posting_journal_entries.len(),
9126        );
9127        self.check_resources_with_log("post-tax")?;
9128
9129        Ok(snapshot)
9130    }
9131
9132    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
9133    fn phase_esg_generation(
9134        &mut self,
9135        document_flows: &DocumentFlowSnapshot,
9136        manufacturing: &ManufacturingSnapshot,
9137        stats: &mut EnhancedGenerationStatistics,
9138    ) -> SynthResult<EsgSnapshot> {
9139        if !self.phase_config.generate_esg {
9140            debug!("Phase 21: Skipped (ESG generation disabled)");
9141            return Ok(EsgSnapshot::default());
9142        }
9143        let degradation = self.check_resources()?;
9144        if degradation >= DegradationLevel::Reduced {
9145            debug!(
9146                "Phase skipped due to resource pressure (degradation: {:?})",
9147                degradation
9148            );
9149            return Ok(EsgSnapshot::default());
9150        }
9151        info!("Phase 21: Generating ESG Data");
9152
9153        let seed = self.seed;
9154        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9155            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9156        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9157        let entity_id = self
9158            .config
9159            .companies
9160            .first()
9161            .map(|c| c.code.as_str())
9162            .unwrap_or("1000");
9163
9164        let esg_cfg = &self.config.esg;
9165        let mut snapshot = EsgSnapshot::default();
9166
9167        // Energy consumption (feeds into scope 1 & 2 emissions)
9168        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9169            esg_cfg.environmental.energy.clone(),
9170            seed + 80,
9171        );
9172        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9173
9174        // Water usage
9175        let facility_count = esg_cfg.environmental.energy.facility_count;
9176        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9177        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9178
9179        // Waste
9180        let mut waste_gen = datasynth_generators::WasteGenerator::new(
9181            seed + 82,
9182            esg_cfg.environmental.waste.diversion_target,
9183            facility_count,
9184        );
9185        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9186
9187        // Emissions (scope 1, 2, 3)
9188        let mut emission_gen =
9189            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9190
9191        // Build EnergyInput from energy_records
9192        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9193            .iter()
9194            .map(|e| datasynth_generators::EnergyInput {
9195                facility_id: e.facility_id.clone(),
9196                energy_type: match e.energy_source {
9197                    EnergySourceType::NaturalGas => {
9198                        datasynth_generators::EnergyInputType::NaturalGas
9199                    }
9200                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9201                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9202                    _ => datasynth_generators::EnergyInputType::Electricity,
9203                },
9204                consumption_kwh: e.consumption_kwh,
9205                period: e.period,
9206            })
9207            .collect();
9208
9209        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
9210        if !manufacturing.production_orders.is_empty() {
9211            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9212                &manufacturing.production_orders,
9213                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
9214                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
9215            );
9216            if !mfg_energy.is_empty() {
9217                info!(
9218                    "ESG: {} energy inputs derived from {} production orders",
9219                    mfg_energy.len(),
9220                    manufacturing.production_orders.len(),
9221                );
9222                energy_inputs.extend(mfg_energy);
9223            }
9224        }
9225
9226        let mut emissions = Vec::new();
9227        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9228        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9229
9230        // Scope 3: use vendor spend data from actual payments
9231        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9232            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9233            for payment in &document_flows.payments {
9234                if payment.is_vendor {
9235                    *totals
9236                        .entry(payment.business_partner_id.clone())
9237                        .or_default() += payment.amount;
9238                }
9239            }
9240            totals
9241        };
9242        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9243            .master_data
9244            .vendors
9245            .iter()
9246            .map(|v| {
9247                let spend = vendor_payment_totals
9248                    .get(&v.vendor_id)
9249                    .copied()
9250                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9251                datasynth_generators::VendorSpendInput {
9252                    vendor_id: v.vendor_id.clone(),
9253                    category: format!("{:?}", v.vendor_type).to_lowercase(),
9254                    spend,
9255                    country: v.country.clone(),
9256                }
9257            })
9258            .collect();
9259        if !vendor_spend.is_empty() {
9260            emissions.extend(emission_gen.generate_scope3_purchased_goods(
9261                entity_id,
9262                &vendor_spend,
9263                start_date,
9264                end_date,
9265            ));
9266        }
9267
9268        // Business travel & commuting (scope 3)
9269        let headcount = self.master_data.employees.len() as u32;
9270        if headcount > 0 {
9271            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9272            emissions.extend(emission_gen.generate_scope3_business_travel(
9273                entity_id,
9274                travel_spend,
9275                start_date,
9276            ));
9277            emissions
9278                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9279        }
9280
9281        snapshot.emission_count = emissions.len();
9282        snapshot.emissions = emissions;
9283        snapshot.energy = energy_records;
9284
9285        // Social: Workforce diversity, pay equity, safety
9286        let mut workforce_gen =
9287            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9288        let total_headcount = headcount.max(100);
9289        snapshot.diversity =
9290            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9291        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9292
9293        // v2.4: Derive additional workforce diversity metrics from actual employee data
9294        if !self.master_data.employees.is_empty() {
9295            let hr_diversity = workforce_gen.generate_diversity_from_employees(
9296                entity_id,
9297                &self.master_data.employees,
9298                end_date,
9299            );
9300            if !hr_diversity.is_empty() {
9301                info!(
9302                    "ESG: {} diversity metrics derived from {} actual employees",
9303                    hr_diversity.len(),
9304                    self.master_data.employees.len(),
9305                );
9306                snapshot.diversity.extend(hr_diversity);
9307            }
9308        }
9309
9310        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9311            entity_id,
9312            facility_count,
9313            start_date,
9314            end_date,
9315        );
9316
9317        // Compute safety metrics
9318        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
9319        let safety_metric = workforce_gen.compute_safety_metrics(
9320            entity_id,
9321            &snapshot.safety_incidents,
9322            total_hours,
9323            start_date,
9324        );
9325        snapshot.safety_metrics = vec![safety_metric];
9326
9327        // Governance
9328        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9329            seed + 85,
9330            esg_cfg.governance.board_size,
9331            esg_cfg.governance.independence_target,
9332        );
9333        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9334
9335        // Supplier ESG assessments
9336        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9337            esg_cfg.supply_chain_esg.clone(),
9338            seed + 86,
9339        );
9340        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9341            .master_data
9342            .vendors
9343            .iter()
9344            .map(|v| datasynth_generators::VendorInput {
9345                vendor_id: v.vendor_id.clone(),
9346                country: v.country.clone(),
9347                industry: format!("{:?}", v.vendor_type).to_lowercase(),
9348                quality_score: None,
9349            })
9350            .collect();
9351        snapshot.supplier_assessments =
9352            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9353
9354        // Disclosures
9355        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9356            seed + 87,
9357            esg_cfg.reporting.clone(),
9358            esg_cfg.climate_scenarios.clone(),
9359        );
9360        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9361        snapshot.disclosures = disclosure_gen.generate_disclosures(
9362            entity_id,
9363            &snapshot.materiality,
9364            start_date,
9365            end_date,
9366        );
9367        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9368        snapshot.disclosure_count = snapshot.disclosures.len();
9369
9370        // Anomaly injection
9371        if esg_cfg.anomaly_rate > 0.0 {
9372            let mut anomaly_injector =
9373                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9374            let mut labels = Vec::new();
9375            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9376            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9377            labels.extend(
9378                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9379            );
9380            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9381            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9382            snapshot.anomaly_labels = labels;
9383        }
9384
9385        stats.esg_emission_count = snapshot.emission_count;
9386        stats.esg_disclosure_count = snapshot.disclosure_count;
9387
9388        info!(
9389            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9390            snapshot.emission_count,
9391            snapshot.disclosure_count,
9392            snapshot.supplier_assessments.len()
9393        );
9394        self.check_resources_with_log("post-esg")?;
9395
9396        Ok(snapshot)
9397    }
9398
9399    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
9400    fn phase_treasury_data(
9401        &mut self,
9402        document_flows: &DocumentFlowSnapshot,
9403        subledger: &SubledgerSnapshot,
9404        intercompany: &IntercompanySnapshot,
9405        stats: &mut EnhancedGenerationStatistics,
9406    ) -> SynthResult<TreasurySnapshot> {
9407        if !self.phase_config.generate_treasury {
9408            debug!("Phase 22: Skipped (treasury generation disabled)");
9409            return Ok(TreasurySnapshot::default());
9410        }
9411        let degradation = self.check_resources()?;
9412        if degradation >= DegradationLevel::Reduced {
9413            debug!(
9414                "Phase skipped due to resource pressure (degradation: {:?})",
9415                degradation
9416            );
9417            return Ok(TreasurySnapshot::default());
9418        }
9419        info!("Phase 22: Generating Treasury Data");
9420
9421        let seed = self.seed;
9422        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9423            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9424        let currency = self
9425            .config
9426            .companies
9427            .first()
9428            .map(|c| c.currency.as_str())
9429            .unwrap_or("USD");
9430        let entity_id = self
9431            .config
9432            .companies
9433            .first()
9434            .map(|c| c.code.as_str())
9435            .unwrap_or("1000");
9436
9437        let mut snapshot = TreasurySnapshot::default();
9438
9439        // Generate debt instruments
9440        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9441            self.config.treasury.debt.clone(),
9442            seed + 90,
9443        );
9444        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9445
9446        // Generate hedging instruments (IR swaps for floating-rate debt)
9447        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9448            self.config.treasury.hedging.clone(),
9449            seed + 91,
9450        );
9451        for debt in &snapshot.debt_instruments {
9452            if debt.rate_type == InterestRateType::Variable {
9453                let swap = hedge_gen.generate_ir_swap(
9454                    currency,
9455                    debt.principal,
9456                    debt.origination_date,
9457                    debt.maturity_date,
9458                );
9459                snapshot.hedging_instruments.push(swap);
9460            }
9461        }
9462
9463        // Build FX exposures from foreign-currency payments and generate
9464        // FX forwards + hedge relationship designations via generate() API.
9465        {
9466            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9467            for payment in &document_flows.payments {
9468                if payment.currency != currency {
9469                    let entry = fx_map
9470                        .entry(payment.currency.clone())
9471                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9472                    entry.0 += payment.amount;
9473                    // Use the latest settlement date among grouped payments
9474                    if payment.header.document_date > entry.1 {
9475                        entry.1 = payment.header.document_date;
9476                    }
9477                }
9478            }
9479            if !fx_map.is_empty() {
9480                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9481                    .into_iter()
9482                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9483                        datasynth_generators::treasury::FxExposure {
9484                            currency_pair: format!("{foreign_ccy}/{currency}"),
9485                            foreign_currency: foreign_ccy,
9486                            net_amount,
9487                            settlement_date,
9488                            description: "AP payment FX exposure".to_string(),
9489                        }
9490                    })
9491                    .collect();
9492                let (fx_instruments, fx_relationships) =
9493                    hedge_gen.generate(start_date, &fx_exposures);
9494                snapshot.hedging_instruments.extend(fx_instruments);
9495                snapshot.hedge_relationships.extend(fx_relationships);
9496            }
9497        }
9498
9499        // Inject anomalies if configured
9500        if self.config.treasury.anomaly_rate > 0.0 {
9501            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9502                seed + 92,
9503                self.config.treasury.anomaly_rate,
9504            );
9505            let mut labels = Vec::new();
9506            labels.extend(
9507                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9508            );
9509            snapshot.treasury_anomaly_labels = labels;
9510        }
9511
9512        // Generate cash positions from payment flows
9513        if self.config.treasury.cash_positioning.enabled {
9514            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9515
9516            // AP payments as outflows
9517            for payment in &document_flows.payments {
9518                cash_flows.push(datasynth_generators::treasury::CashFlow {
9519                    date: payment.header.document_date,
9520                    account_id: format!("{entity_id}-MAIN"),
9521                    amount: payment.amount,
9522                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9523                });
9524            }
9525
9526            // Customer receipts (from O2C chains) as inflows
9527            for chain in &document_flows.o2c_chains {
9528                if let Some(ref receipt) = chain.customer_receipt {
9529                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9530                        date: receipt.header.document_date,
9531                        account_id: format!("{entity_id}-MAIN"),
9532                        amount: receipt.amount,
9533                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9534                    });
9535                }
9536                // Remainder receipts (follow-up to partial payments)
9537                for receipt in &chain.remainder_receipts {
9538                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9539                        date: receipt.header.document_date,
9540                        account_id: format!("{entity_id}-MAIN"),
9541                        amount: receipt.amount,
9542                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9543                    });
9544                }
9545            }
9546
9547            if !cash_flows.is_empty() {
9548                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9549                    self.config.treasury.cash_positioning.clone(),
9550                    seed + 93,
9551                );
9552                let account_id = format!("{entity_id}-MAIN");
9553                snapshot.cash_positions = cash_gen.generate(
9554                    entity_id,
9555                    &account_id,
9556                    currency,
9557                    &cash_flows,
9558                    start_date,
9559                    start_date + chrono::Months::new(self.config.global.period_months),
9560                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9561                );
9562            }
9563        }
9564
9565        // Generate cash forecasts from AR/AP aging
9566        if self.config.treasury.cash_forecasting.enabled {
9567            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9568
9569            // Build AR aging items from subledger AR invoices
9570            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9571                .ar_invoices
9572                .iter()
9573                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9574                .map(|inv| {
9575                    let days_past_due = if inv.due_date < end_date {
9576                        (end_date - inv.due_date).num_days().max(0) as u32
9577                    } else {
9578                        0
9579                    };
9580                    datasynth_generators::treasury::ArAgingItem {
9581                        expected_date: inv.due_date,
9582                        amount: inv.amount_remaining,
9583                        days_past_due,
9584                        document_id: inv.invoice_number.clone(),
9585                    }
9586                })
9587                .collect();
9588
9589            // Build AP aging items from subledger AP invoices
9590            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9591                .ap_invoices
9592                .iter()
9593                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9594                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9595                    payment_date: inv.due_date,
9596                    amount: inv.amount_remaining,
9597                    document_id: inv.invoice_number.clone(),
9598                })
9599                .collect();
9600
9601            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9602                self.config.treasury.cash_forecasting.clone(),
9603                seed + 94,
9604            );
9605            let forecast = forecast_gen.generate(
9606                entity_id,
9607                currency,
9608                end_date,
9609                &ar_items,
9610                &ap_items,
9611                &[], // scheduled disbursements - empty for now
9612            );
9613            snapshot.cash_forecasts.push(forecast);
9614        }
9615
9616        // Generate cash pools and sweeps
9617        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9618            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9619            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9620                self.config.treasury.cash_pooling.clone(),
9621                seed + 95,
9622            );
9623
9624            // Create a pool from available accounts
9625            let account_ids: Vec<String> = snapshot
9626                .cash_positions
9627                .iter()
9628                .map(|cp| cp.bank_account_id.clone())
9629                .collect::<std::collections::HashSet<_>>()
9630                .into_iter()
9631                .collect();
9632
9633            if let Some(pool) =
9634                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9635            {
9636                // Generate sweeps - build participant balances from last cash position per account
9637                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9638                for cp in &snapshot.cash_positions {
9639                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9640                }
9641
9642                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9643                    latest_balances
9644                        .into_iter()
9645                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9646                        .map(
9647                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9648                                account_id: id,
9649                                balance,
9650                            },
9651                        )
9652                        .collect();
9653
9654                let sweeps =
9655                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9656                snapshot.cash_pool_sweeps = sweeps;
9657                snapshot.cash_pools.push(pool);
9658            }
9659        }
9660
9661        // Generate bank guarantees
9662        if self.config.treasury.bank_guarantees.enabled {
9663            let vendor_names: Vec<String> = self
9664                .master_data
9665                .vendors
9666                .iter()
9667                .map(|v| v.name.clone())
9668                .collect();
9669            if !vendor_names.is_empty() {
9670                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9671                    self.config.treasury.bank_guarantees.clone(),
9672                    seed + 96,
9673                );
9674                snapshot.bank_guarantees =
9675                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9676            }
9677        }
9678
9679        // Generate netting runs from intercompany matched pairs
9680        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9681            let entity_ids: Vec<String> = self
9682                .config
9683                .companies
9684                .iter()
9685                .map(|c| c.code.clone())
9686                .collect();
9687            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9688                .matched_pairs
9689                .iter()
9690                .map(|mp| {
9691                    (
9692                        mp.seller_company.clone(),
9693                        mp.buyer_company.clone(),
9694                        mp.amount,
9695                    )
9696                })
9697                .collect();
9698            if entity_ids.len() >= 2 {
9699                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9700                    self.config.treasury.netting.clone(),
9701                    seed + 97,
9702                );
9703                snapshot.netting_runs = netting_gen.generate(
9704                    &entity_ids,
9705                    currency,
9706                    start_date,
9707                    self.config.global.period_months,
9708                    &ic_amounts,
9709                );
9710            }
9711        }
9712
9713        // Generate treasury journal entries from the instruments we just created.
9714        {
9715            use datasynth_generators::treasury::TreasuryAccounting;
9716
9717            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9718            let mut treasury_jes = Vec::new();
9719
9720            // Debt interest accrual JEs
9721            if !snapshot.debt_instruments.is_empty() {
9722                let debt_jes =
9723                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9724                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9725                treasury_jes.extend(debt_jes);
9726            }
9727
9728            // Hedge mark-to-market JEs
9729            if !snapshot.hedging_instruments.is_empty() {
9730                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9731                    &snapshot.hedging_instruments,
9732                    &snapshot.hedge_relationships,
9733                    end_date,
9734                    entity_id,
9735                );
9736                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9737                treasury_jes.extend(hedge_jes);
9738            }
9739
9740            // Cash pool sweep JEs
9741            if !snapshot.cash_pool_sweeps.is_empty() {
9742                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9743                    &snapshot.cash_pool_sweeps,
9744                    entity_id,
9745                );
9746                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9747                treasury_jes.extend(sweep_jes);
9748            }
9749
9750            if !treasury_jes.is_empty() {
9751                debug!("Total treasury journal entries: {}", treasury_jes.len());
9752            }
9753            snapshot.journal_entries = treasury_jes;
9754        }
9755
9756        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9757        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9758        stats.cash_position_count = snapshot.cash_positions.len();
9759        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9760        stats.cash_pool_count = snapshot.cash_pools.len();
9761
9762        info!(
9763            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9764            snapshot.debt_instruments.len(),
9765            snapshot.hedging_instruments.len(),
9766            snapshot.cash_positions.len(),
9767            snapshot.cash_forecasts.len(),
9768            snapshot.cash_pools.len(),
9769            snapshot.bank_guarantees.len(),
9770            snapshot.netting_runs.len(),
9771            snapshot.journal_entries.len(),
9772        );
9773        self.check_resources_with_log("post-treasury")?;
9774
9775        Ok(snapshot)
9776    }
9777
9778    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9779    fn phase_project_accounting(
9780        &mut self,
9781        document_flows: &DocumentFlowSnapshot,
9782        hr: &HrSnapshot,
9783        stats: &mut EnhancedGenerationStatistics,
9784    ) -> SynthResult<ProjectAccountingSnapshot> {
9785        if !self.phase_config.generate_project_accounting {
9786            debug!("Phase 23: Skipped (project accounting disabled)");
9787            return Ok(ProjectAccountingSnapshot::default());
9788        }
9789        let degradation = self.check_resources()?;
9790        if degradation >= DegradationLevel::Reduced {
9791            debug!(
9792                "Phase skipped due to resource pressure (degradation: {:?})",
9793                degradation
9794            );
9795            return Ok(ProjectAccountingSnapshot::default());
9796        }
9797        info!("Phase 23: Generating Project Accounting Data");
9798
9799        let seed = self.seed;
9800        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9801            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9802        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9803        let company_code = self
9804            .config
9805            .companies
9806            .first()
9807            .map(|c| c.code.as_str())
9808            .unwrap_or("1000");
9809
9810        let mut snapshot = ProjectAccountingSnapshot::default();
9811
9812        // Generate projects with WBS hierarchies
9813        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9814            self.config.project_accounting.clone(),
9815            seed + 95,
9816        );
9817        let pool = project_gen.generate(company_code, start_date, end_date);
9818        snapshot.projects = pool.projects.clone();
9819
9820        // Link source documents to projects for cost allocation
9821        {
9822            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9823                Vec::new();
9824
9825            // Time entries
9826            for te in &hr.time_entries {
9827                let total_hours = te.hours_regular + te.hours_overtime;
9828                if total_hours > 0.0 {
9829                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9830                        id: te.entry_id.clone(),
9831                        entity_id: company_code.to_string(),
9832                        date: te.date,
9833                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9834                            .unwrap_or(rust_decimal::Decimal::ZERO),
9835                        source_type: CostSourceType::TimeEntry,
9836                        hours: Some(
9837                            rust_decimal::Decimal::from_f64_retain(total_hours)
9838                                .unwrap_or(rust_decimal::Decimal::ZERO),
9839                        ),
9840                    });
9841                }
9842            }
9843
9844            // Expense reports
9845            for er in &hr.expense_reports {
9846                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9847                    id: er.report_id.clone(),
9848                    entity_id: company_code.to_string(),
9849                    date: er.submission_date,
9850                    amount: er.total_amount,
9851                    source_type: CostSourceType::ExpenseReport,
9852                    hours: None,
9853                });
9854            }
9855
9856            // Purchase orders
9857            for po in &document_flows.purchase_orders {
9858                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9859                    id: po.header.document_id.clone(),
9860                    entity_id: company_code.to_string(),
9861                    date: po.header.document_date,
9862                    amount: po.total_net_amount,
9863                    source_type: CostSourceType::PurchaseOrder,
9864                    hours: None,
9865                });
9866            }
9867
9868            // Vendor invoices
9869            for vi in &document_flows.vendor_invoices {
9870                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9871                    id: vi.header.document_id.clone(),
9872                    entity_id: company_code.to_string(),
9873                    date: vi.header.document_date,
9874                    amount: vi.payable_amount,
9875                    source_type: CostSourceType::VendorInvoice,
9876                    hours: None,
9877                });
9878            }
9879
9880            if !source_docs.is_empty() && !pool.projects.is_empty() {
9881                let mut cost_gen =
9882                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9883                        self.config.project_accounting.cost_allocation.clone(),
9884                        seed + 99,
9885                    );
9886                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9887            }
9888        }
9889
9890        // Generate change orders
9891        if self.config.project_accounting.change_orders.enabled {
9892            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9893                self.config.project_accounting.change_orders.clone(),
9894                seed + 96,
9895            );
9896            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9897        }
9898
9899        // Generate milestones
9900        if self.config.project_accounting.milestones.enabled {
9901            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9902                self.config.project_accounting.milestones.clone(),
9903                seed + 97,
9904            );
9905            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9906        }
9907
9908        // Generate earned value metrics (needs cost lines, so only if we have projects)
9909        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9910            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9911                self.config.project_accounting.earned_value.clone(),
9912                seed + 98,
9913            );
9914            snapshot.earned_value_metrics =
9915                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9916        }
9917
9918        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9919        if self.config.project_accounting.revenue_recognition.enabled
9920            && !snapshot.projects.is_empty()
9921            && !snapshot.cost_lines.is_empty()
9922        {
9923            use datasynth_generators::project_accounting::RevenueGenerator;
9924            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9925            let avg_contract_value =
9926                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9927                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9928
9929            // Build contract value tuples: only customer-type projects get revenue recognition.
9930            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9931            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9932                snapshot
9933                    .projects
9934                    .iter()
9935                    .filter(|p| {
9936                        matches!(
9937                            p.project_type,
9938                            datasynth_core::models::ProjectType::Customer
9939                        )
9940                    })
9941                    .map(|p| {
9942                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9943                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9944                        // budget × 1.25 → contract value
9945                        } else {
9946                            avg_contract_value
9947                        };
9948                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9949                        (p.project_id.clone(), cv, etc)
9950                    })
9951                    .collect();
9952
9953            if !contract_values.is_empty() {
9954                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9955                snapshot.revenue_records = rev_gen.generate(
9956                    &snapshot.projects,
9957                    &snapshot.cost_lines,
9958                    &contract_values,
9959                    start_date,
9960                    end_date,
9961                );
9962                debug!(
9963                    "Generated {} revenue recognition records for {} customer projects",
9964                    snapshot.revenue_records.len(),
9965                    contract_values.len()
9966                );
9967            }
9968        }
9969
9970        stats.project_count = snapshot.projects.len();
9971        stats.project_change_order_count = snapshot.change_orders.len();
9972        stats.project_cost_line_count = snapshot.cost_lines.len();
9973
9974        info!(
9975            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9976            snapshot.projects.len(),
9977            snapshot.change_orders.len(),
9978            snapshot.milestones.len(),
9979            snapshot.earned_value_metrics.len()
9980        );
9981        self.check_resources_with_log("post-project-accounting")?;
9982
9983        Ok(snapshot)
9984    }
9985
9986    /// Phase 24: Generate process evolution and organizational events.
9987    fn phase_evolution_events(
9988        &mut self,
9989        stats: &mut EnhancedGenerationStatistics,
9990    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9991        if !self.phase_config.generate_evolution_events {
9992            debug!("Phase 24: Skipped (evolution events disabled)");
9993            return Ok((Vec::new(), Vec::new()));
9994        }
9995        info!("Phase 24: Generating Process Evolution + Organizational Events");
9996
9997        let seed = self.seed;
9998        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9999            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10000        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10001
10002        // Process evolution events
10003        let mut proc_gen =
10004            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
10005                seed + 100,
10006            );
10007        let process_events = proc_gen.generate_events(start_date, end_date);
10008
10009        // Organizational events
10010        let company_codes: Vec<String> = self
10011            .config
10012            .companies
10013            .iter()
10014            .map(|c| c.code.clone())
10015            .collect();
10016        let mut org_gen =
10017            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
10018                seed + 101,
10019            );
10020        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
10021
10022        stats.process_evolution_event_count = process_events.len();
10023        stats.organizational_event_count = org_events.len();
10024
10025        info!(
10026            "Evolution events generated: {} process evolution, {} organizational",
10027            process_events.len(),
10028            org_events.len()
10029        );
10030        self.check_resources_with_log("post-evolution-events")?;
10031
10032        Ok((process_events, org_events))
10033    }
10034
10035    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
10036    /// data recovery, and regulatory changes).
10037    fn phase_disruption_events(
10038        &self,
10039        stats: &mut EnhancedGenerationStatistics,
10040    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
10041        if !self.config.organizational_events.enabled {
10042            debug!("Phase 24b: Skipped (organizational events disabled)");
10043            return Ok(Vec::new());
10044        }
10045        info!("Phase 24b: Generating Disruption Events");
10046
10047        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10048            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10049        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10050
10051        let company_codes: Vec<String> = self
10052            .config
10053            .companies
10054            .iter()
10055            .map(|c| c.code.clone())
10056            .collect();
10057
10058        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
10059        let events = gen.generate(start_date, end_date, &company_codes);
10060
10061        stats.disruption_event_count = events.len();
10062        info!("Disruption events generated: {} events", events.len());
10063        self.check_resources_with_log("post-disruption-events")?;
10064
10065        Ok(events)
10066    }
10067
10068    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
10069    ///
10070    /// Produces paired examples where each pair contains the original clean JE
10071    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
10072    /// split transaction). Useful for training anomaly detection models with
10073    /// known ground truth.
10074    fn phase_counterfactuals(
10075        &self,
10076        journal_entries: &[JournalEntry],
10077        stats: &mut EnhancedGenerationStatistics,
10078    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
10079        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
10080            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
10081            return Ok(Vec::new());
10082        }
10083        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
10084
10085        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
10086
10087        let mut gen = CounterfactualGenerator::new(self.seed + 110);
10088
10089        // Rotating set of specs to produce diverse mutation types
10090        let specs = [
10091            CounterfactualSpec::ScaleAmount { factor: 2.5 },
10092            CounterfactualSpec::ShiftDate { days: -14 },
10093            CounterfactualSpec::SelfApprove,
10094            CounterfactualSpec::SplitTransaction { split_count: 3 },
10095        ];
10096
10097        let pairs: Vec<_> = journal_entries
10098            .iter()
10099            .enumerate()
10100            .map(|(i, je)| {
10101                let spec = &specs[i % specs.len()];
10102                gen.generate(je, spec)
10103            })
10104            .collect();
10105
10106        stats.counterfactual_pair_count = pairs.len();
10107        info!(
10108            "Counterfactual pairs generated: {} pairs from {} journal entries",
10109            pairs.len(),
10110            journal_entries.len()
10111        );
10112        self.check_resources_with_log("post-counterfactuals")?;
10113
10114        Ok(pairs)
10115    }
10116
10117    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
10118    ///
10119    /// Uses the anomaly labels (from Phase 8) to determine which documents are
10120    /// fraudulent, then generates probabilistic red flags on all chain documents.
10121    /// Non-fraud documents also receive red flags at a lower rate (false positives)
10122    /// to produce realistic ML training data.
10123    fn phase_red_flags(
10124        &self,
10125        anomaly_labels: &AnomalyLabels,
10126        document_flows: &DocumentFlowSnapshot,
10127        stats: &mut EnhancedGenerationStatistics,
10128    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10129        if !self.config.fraud.enabled {
10130            debug!("Phase 26: Skipped (fraud generation disabled)");
10131            return Ok(Vec::new());
10132        }
10133        info!("Phase 26: Generating Fraud Red-Flag Indicators");
10134
10135        use datasynth_generators::fraud::RedFlagGenerator;
10136
10137        let generator = RedFlagGenerator::new();
10138        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10139
10140        // Build a set of document IDs that are known-fraudulent from anomaly labels.
10141        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10142            .labels
10143            .iter()
10144            .filter(|label| label.anomaly_type.is_intentional())
10145            .map(|label| label.document_id.as_str())
10146            .collect();
10147
10148        let mut flags = Vec::new();
10149
10150        // Iterate P2P chains: use the purchase order document ID as the chain key.
10151        for chain in &document_flows.p2p_chains {
10152            let doc_id = &chain.purchase_order.header.document_id;
10153            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10154            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10155        }
10156
10157        // Iterate O2C chains: use the sales order document ID as the chain key.
10158        for chain in &document_flows.o2c_chains {
10159            let doc_id = &chain.sales_order.header.document_id;
10160            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10161            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10162        }
10163
10164        stats.red_flag_count = flags.len();
10165        info!(
10166            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10167            flags.len(),
10168            document_flows.p2p_chains.len(),
10169            document_flows.o2c_chains.len(),
10170            fraud_doc_ids.len()
10171        );
10172        self.check_resources_with_log("post-red-flags")?;
10173
10174        Ok(flags)
10175    }
10176
10177    /// Phase 26b: Generate collusion rings from employee/vendor pools.
10178    ///
10179    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
10180    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
10181    /// advance them over the simulation period.
10182    fn phase_collusion_rings(
10183        &mut self,
10184        stats: &mut EnhancedGenerationStatistics,
10185    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10186        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10187            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10188            return Ok(Vec::new());
10189        }
10190        info!("Phase 26b: Generating Collusion Rings");
10191
10192        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10193            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10194        let months = self.config.global.period_months;
10195
10196        let employee_ids: Vec<String> = self
10197            .master_data
10198            .employees
10199            .iter()
10200            .map(|e| e.employee_id.clone())
10201            .collect();
10202        let vendor_ids: Vec<String> = self
10203            .master_data
10204            .vendors
10205            .iter()
10206            .map(|v| v.vendor_id.clone())
10207            .collect();
10208
10209        let mut generator =
10210            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10211        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10212
10213        stats.collusion_ring_count = rings.len();
10214        info!(
10215            "Collusion rings generated: {} rings, total members: {}",
10216            rings.len(),
10217            rings
10218                .iter()
10219                .map(datasynth_generators::fraud::CollusionRing::size)
10220                .sum::<usize>()
10221        );
10222        self.check_resources_with_log("post-collusion-rings")?;
10223
10224        Ok(rings)
10225    }
10226
10227    /// Phase 27: Generate bi-temporal version chains for vendor entities.
10228    ///
10229    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
10230    /// master data changes over time, supporting bi-temporal audit queries.
10231    fn phase_temporal_attributes(
10232        &mut self,
10233        stats: &mut EnhancedGenerationStatistics,
10234    ) -> SynthResult<
10235        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10236    > {
10237        if !self.config.temporal_attributes.enabled {
10238            debug!("Phase 27: Skipped (temporal attributes disabled)");
10239            return Ok(Vec::new());
10240        }
10241        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10242
10243        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10244            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10245
10246        // Build a TemporalAttributeConfig from the user's config.
10247        // Since Phase 27 is already gated on temporal_attributes.enabled,
10248        // default to enabling version chains so users get actual mutations.
10249        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10250            || self.config.temporal_attributes.enabled;
10251        let temporal_config = {
10252            let ta = &self.config.temporal_attributes;
10253            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10254                .enabled(ta.enabled)
10255                .closed_probability(ta.valid_time.closed_probability)
10256                .avg_validity_days(ta.valid_time.avg_validity_days)
10257                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10258                .with_version_chains(if generate_version_chains {
10259                    ta.avg_versions_per_entity
10260                } else {
10261                    1.0
10262                })
10263                .build()
10264        };
10265        // Apply backdating settings if configured
10266        let temporal_config = if self
10267            .config
10268            .temporal_attributes
10269            .transaction_time
10270            .allow_backdating
10271        {
10272            let mut c = temporal_config;
10273            c.transaction_time.allow_backdating = true;
10274            c.transaction_time.backdating_probability = self
10275                .config
10276                .temporal_attributes
10277                .transaction_time
10278                .backdating_probability;
10279            c.transaction_time.max_backdate_days = self
10280                .config
10281                .temporal_attributes
10282                .transaction_time
10283                .max_backdate_days;
10284            c
10285        } else {
10286            temporal_config
10287        };
10288        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10289            temporal_config,
10290            self.seed + 130,
10291            start_date,
10292        );
10293
10294        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10295            self.seed + 130,
10296            datasynth_core::GeneratorType::Vendor,
10297        );
10298
10299        let chains: Vec<_> = self
10300            .master_data
10301            .vendors
10302            .iter()
10303            .map(|vendor| {
10304                let id = uuid_factory.next();
10305                gen.generate_version_chain(vendor.clone(), id)
10306            })
10307            .collect();
10308
10309        stats.temporal_version_chain_count = chains.len();
10310        info!("Temporal version chains generated: {} chains", chains.len());
10311        self.check_resources_with_log("post-temporal-attributes")?;
10312
10313        Ok(chains)
10314    }
10315
10316    /// Phase 28: Build entity relationship graph and cross-process links.
10317    ///
10318    /// Part 1 (gated on `relationship_strength.enabled`): builds an
10319    /// `EntityGraph` from master-data vendor/customer entities and
10320    /// journal-entry-derived transaction summaries.
10321    ///
10322    /// Part 2 (gated on `cross_process_links.enabled`): extracts
10323    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
10324    /// generates inventory-movement cross-process links.
10325    fn phase_entity_relationships(
10326        &self,
10327        journal_entries: &[JournalEntry],
10328        document_flows: &DocumentFlowSnapshot,
10329        stats: &mut EnhancedGenerationStatistics,
10330    ) -> SynthResult<(
10331        Option<datasynth_core::models::EntityGraph>,
10332        Vec<datasynth_core::models::CrossProcessLink>,
10333    )> {
10334        use datasynth_generators::relationships::{
10335            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10336            TransactionSummary,
10337        };
10338
10339        let rs_enabled = self.config.relationship_strength.enabled;
10340        let cpl_enabled = self.config.cross_process_links.enabled
10341            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10342
10343        if !rs_enabled && !cpl_enabled {
10344            debug!(
10345                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10346            );
10347            return Ok((None, Vec::new()));
10348        }
10349
10350        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10351
10352        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10354
10355        let company_code = self
10356            .config
10357            .companies
10358            .first()
10359            .map(|c| c.code.as_str())
10360            .unwrap_or("1000");
10361
10362        // Build the generator with matching config flags
10363        let gen_config = EntityGraphConfig {
10364            enabled: rs_enabled,
10365            cross_process: datasynth_generators::relationships::CrossProcessConfig {
10366                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10367                enable_return_flows: false,
10368                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10369                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10370                // Use higher link rate for small datasets to avoid probabilistic empty results
10371                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10372                    1.0
10373                } else {
10374                    0.30
10375                },
10376                ..Default::default()
10377            },
10378            strength_config: datasynth_generators::relationships::StrengthConfig {
10379                transaction_volume_weight: self
10380                    .config
10381                    .relationship_strength
10382                    .calculation
10383                    .transaction_volume_weight,
10384                transaction_count_weight: self
10385                    .config
10386                    .relationship_strength
10387                    .calculation
10388                    .transaction_count_weight,
10389                duration_weight: self
10390                    .config
10391                    .relationship_strength
10392                    .calculation
10393                    .relationship_duration_weight,
10394                recency_weight: self.config.relationship_strength.calculation.recency_weight,
10395                mutual_connections_weight: self
10396                    .config
10397                    .relationship_strength
10398                    .calculation
10399                    .mutual_connections_weight,
10400                recency_half_life_days: self
10401                    .config
10402                    .relationship_strength
10403                    .calculation
10404                    .recency_half_life_days,
10405            },
10406            ..Default::default()
10407        };
10408
10409        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10410
10411        // --- Part 1: Entity Relationship Graph ---
10412        let entity_graph = if rs_enabled {
10413            // Build EntitySummary lists from master data
10414            let vendor_summaries: Vec<EntitySummary> = self
10415                .master_data
10416                .vendors
10417                .iter()
10418                .map(|v| {
10419                    EntitySummary::new(
10420                        &v.vendor_id,
10421                        &v.name,
10422                        datasynth_core::models::GraphEntityType::Vendor,
10423                        start_date,
10424                    )
10425                })
10426                .collect();
10427
10428            let customer_summaries: Vec<EntitySummary> = self
10429                .master_data
10430                .customers
10431                .iter()
10432                .map(|c| {
10433                    EntitySummary::new(
10434                        &c.customer_id,
10435                        &c.name,
10436                        datasynth_core::models::GraphEntityType::Customer,
10437                        start_date,
10438                    )
10439                })
10440                .collect();
10441
10442            // Build transaction summaries from journal entries.
10443            // Key = (company_code, trading_partner) for entries that have a
10444            // trading partner.  This captures intercompany flows and any JE
10445            // whose line items carry a trading_partner reference.
10446            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10447                std::collections::HashMap::new();
10448
10449            for je in journal_entries {
10450                let cc = je.header.company_code.clone();
10451                let posting_date = je.header.posting_date;
10452                for line in &je.lines {
10453                    if let Some(ref tp) = line.trading_partner {
10454                        let amount = if line.debit_amount > line.credit_amount {
10455                            line.debit_amount
10456                        } else {
10457                            line.credit_amount
10458                        };
10459                        let entry = txn_summaries
10460                            .entry((cc.clone(), tp.clone()))
10461                            .or_insert_with(|| TransactionSummary {
10462                                total_volume: rust_decimal::Decimal::ZERO,
10463                                transaction_count: 0,
10464                                first_transaction_date: posting_date,
10465                                last_transaction_date: posting_date,
10466                                related_entities: std::collections::HashSet::new(),
10467                            });
10468                        entry.total_volume += amount;
10469                        entry.transaction_count += 1;
10470                        if posting_date < entry.first_transaction_date {
10471                            entry.first_transaction_date = posting_date;
10472                        }
10473                        if posting_date > entry.last_transaction_date {
10474                            entry.last_transaction_date = posting_date;
10475                        }
10476                        entry.related_entities.insert(cc.clone());
10477                    }
10478                }
10479            }
10480
10481            // Also extract transaction relationships from document flow chains.
10482            // P2P chains: Company → Vendor relationships
10483            for chain in &document_flows.p2p_chains {
10484                let cc = chain.purchase_order.header.company_code.clone();
10485                let vendor_id = chain.purchase_order.vendor_id.clone();
10486                let po_date = chain.purchase_order.header.document_date;
10487                let amount = chain.purchase_order.total_net_amount;
10488
10489                let entry = txn_summaries
10490                    .entry((cc.clone(), vendor_id))
10491                    .or_insert_with(|| TransactionSummary {
10492                        total_volume: rust_decimal::Decimal::ZERO,
10493                        transaction_count: 0,
10494                        first_transaction_date: po_date,
10495                        last_transaction_date: po_date,
10496                        related_entities: std::collections::HashSet::new(),
10497                    });
10498                entry.total_volume += amount;
10499                entry.transaction_count += 1;
10500                if po_date < entry.first_transaction_date {
10501                    entry.first_transaction_date = po_date;
10502                }
10503                if po_date > entry.last_transaction_date {
10504                    entry.last_transaction_date = po_date;
10505                }
10506                entry.related_entities.insert(cc);
10507            }
10508
10509            // O2C chains: Company → Customer relationships
10510            for chain in &document_flows.o2c_chains {
10511                let cc = chain.sales_order.header.company_code.clone();
10512                let customer_id = chain.sales_order.customer_id.clone();
10513                let so_date = chain.sales_order.header.document_date;
10514                let amount = chain.sales_order.total_net_amount;
10515
10516                let entry = txn_summaries
10517                    .entry((cc.clone(), customer_id))
10518                    .or_insert_with(|| TransactionSummary {
10519                        total_volume: rust_decimal::Decimal::ZERO,
10520                        transaction_count: 0,
10521                        first_transaction_date: so_date,
10522                        last_transaction_date: so_date,
10523                        related_entities: std::collections::HashSet::new(),
10524                    });
10525                entry.total_volume += amount;
10526                entry.transaction_count += 1;
10527                if so_date < entry.first_transaction_date {
10528                    entry.first_transaction_date = so_date;
10529                }
10530                if so_date > entry.last_transaction_date {
10531                    entry.last_transaction_date = so_date;
10532                }
10533                entry.related_entities.insert(cc);
10534            }
10535
10536            let as_of_date = journal_entries
10537                .last()
10538                .map(|je| je.header.posting_date)
10539                .unwrap_or(start_date);
10540
10541            let graph = gen.generate_entity_graph(
10542                company_code,
10543                as_of_date,
10544                &vendor_summaries,
10545                &customer_summaries,
10546                &txn_summaries,
10547            );
10548
10549            info!(
10550                "Entity relationship graph: {} nodes, {} edges",
10551                graph.nodes.len(),
10552                graph.edges.len()
10553            );
10554            stats.entity_relationship_node_count = graph.nodes.len();
10555            stats.entity_relationship_edge_count = graph.edges.len();
10556            Some(graph)
10557        } else {
10558            None
10559        };
10560
10561        // --- Part 2: Cross-Process Links ---
10562        let cross_process_links = if cpl_enabled {
10563            // Build GoodsReceiptRef from P2P chains
10564            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10565                .p2p_chains
10566                .iter()
10567                .flat_map(|chain| {
10568                    let vendor_id = chain.purchase_order.vendor_id.clone();
10569                    let cc = chain.purchase_order.header.company_code.clone();
10570                    chain.goods_receipts.iter().flat_map(move |gr| {
10571                        gr.items.iter().filter_map({
10572                            let doc_id = gr.header.document_id.clone();
10573                            let v_id = vendor_id.clone();
10574                            let company = cc.clone();
10575                            let receipt_date = gr.header.document_date;
10576                            move |item| {
10577                                item.base
10578                                    .material_id
10579                                    .as_ref()
10580                                    .map(|mat_id| GoodsReceiptRef {
10581                                        document_id: doc_id.clone(),
10582                                        material_id: mat_id.clone(),
10583                                        quantity: item.base.quantity,
10584                                        receipt_date,
10585                                        vendor_id: v_id.clone(),
10586                                        company_code: company.clone(),
10587                                    })
10588                            }
10589                        })
10590                    })
10591                })
10592                .collect();
10593
10594            // Build DeliveryRef from O2C chains
10595            let del_refs: Vec<DeliveryRef> = document_flows
10596                .o2c_chains
10597                .iter()
10598                .flat_map(|chain| {
10599                    let customer_id = chain.sales_order.customer_id.clone();
10600                    let cc = chain.sales_order.header.company_code.clone();
10601                    chain.deliveries.iter().flat_map(move |del| {
10602                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10603                        del.items.iter().filter_map({
10604                            let doc_id = del.header.document_id.clone();
10605                            let c_id = customer_id.clone();
10606                            let company = cc.clone();
10607                            move |item| {
10608                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10609                                    document_id: doc_id.clone(),
10610                                    material_id: mat_id.clone(),
10611                                    quantity: item.base.quantity,
10612                                    delivery_date,
10613                                    customer_id: c_id.clone(),
10614                                    company_code: company.clone(),
10615                                })
10616                            }
10617                        })
10618                    })
10619                })
10620                .collect();
10621
10622            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10623            info!("Cross-process links generated: {} links", links.len());
10624            stats.cross_process_link_count = links.len();
10625            links
10626        } else {
10627            Vec::new()
10628        };
10629
10630        self.check_resources_with_log("post-entity-relationships")?;
10631        Ok((entity_graph, cross_process_links))
10632    }
10633
10634    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10635    fn phase_industry_data(
10636        &self,
10637        stats: &mut EnhancedGenerationStatistics,
10638    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10639        if !self.config.industry_specific.enabled {
10640            return None;
10641        }
10642        info!("Phase 29: Generating industry-specific data");
10643        let output = datasynth_generators::industry::factory::generate_industry_output(
10644            self.config.global.industry,
10645        );
10646        stats.industry_gl_account_count = output.gl_accounts.len();
10647        info!(
10648            "Industry data generated: {} GL accounts for {:?}",
10649            output.gl_accounts.len(),
10650            self.config.global.industry
10651        );
10652        Some(output)
10653    }
10654
10655    /// Phase 3b: Generate opening balances for each company.
10656    ///
10657    /// # Order of precedence
10658    ///
10659    /// 1. **v5.3 chain carryover** (ShardContext.opening_balances non-empty):
10660    ///    convert each EntityOpeningBalance into a
10661    ///    GeneratedOpeningBalance per company. This branch runs
10662    ///    UNCONDITIONALLY — even when `balance.generate_opening_balances`
10663    ///    is `false` — so a non-overlay preset that gets driven through
10664    ///    `group generate-chain` still applies the prior-year carry-
10665    ///    forward instead of silently dropping it.
10666    /// 2. **`generate_opening_balances` flag**: if off (and no carryover),
10667    ///    return empty Vec.
10668    /// 3. **OpeningBalanceGenerator**: industry-mix sampler for the
10669    ///    period-0 engagement.
10670    fn phase_opening_balances(
10671        &mut self,
10672        coa: &Arc<ChartOfAccounts>,
10673        stats: &mut EnhancedGenerationStatistics,
10674    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10675        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10676            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10677        let fiscal_year = start_date.year();
10678
10679        // 1. v5.3 chain carryover — runs unconditionally when present.
10680        if let Some(ctx) = &self.shard_context {
10681            if !ctx.opening_balances.is_empty() {
10682                info!(
10683                    "Phase 3b: applying v5.3 opening-balance carryover ({} accounts × {} companies)",
10684                    ctx.opening_balances.len(),
10685                    self.config.companies.len(),
10686                );
10687                let mut results = Vec::new();
10688                for company in &self.config.companies {
10689                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10690                        .opening_balances
10691                        .iter()
10692                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10693                        .collect();
10694                    let total_assets = ctx
10695                        .opening_balances
10696                        .iter()
10697                        .filter(|ob| {
10698                            matches!(
10699                                ob.account_type,
10700                                AccountType::Asset | AccountType::ContraAsset
10701                            )
10702                        })
10703                        .map(|ob| ob.net_balance())
10704                        .sum::<rust_decimal::Decimal>();
10705                    let total_liabilities = ctx
10706                        .opening_balances
10707                        .iter()
10708                        .filter(|ob| {
10709                            matches!(
10710                                ob.account_type,
10711                                AccountType::Liability | AccountType::ContraLiability
10712                            )
10713                        })
10714                        .map(|ob| ob.net_balance())
10715                        .sum::<rust_decimal::Decimal>();
10716                    let total_equity = ctx
10717                        .opening_balances
10718                        .iter()
10719                        .filter(|ob| {
10720                            matches!(
10721                                ob.account_type,
10722                                AccountType::Equity | AccountType::ContraEquity
10723                            )
10724                        })
10725                        .map(|ob| ob.net_balance())
10726                        .sum::<rust_decimal::Decimal>();
10727                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10728                        < rust_decimal::Decimal::ONE;
10729                    results.push(GeneratedOpeningBalance {
10730                        company_code: company.code.clone(),
10731                        as_of_date: start_date,
10732                        balances,
10733                        total_assets,
10734                        total_liabilities,
10735                        total_equity,
10736                        is_balanced,
10737                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10738                            current_ratio: None,
10739                            quick_ratio: None,
10740                            debt_to_equity: None,
10741                            working_capital: rust_decimal::Decimal::ZERO,
10742                        },
10743                    });
10744                }
10745                stats.opening_balance_count = results.len();
10746                self.check_resources_with_log("post-opening-balances")?;
10747                return Ok(results);
10748            }
10749        }
10750
10751        // 2. Generator path is opt-in via the config flag.
10752        if !self.config.balance.generate_opening_balances {
10753            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10754            return Ok(Vec::new());
10755        }
10756        info!("Phase 3b: Generating Opening Balances");
10757
10758        // 3. OpeningBalanceGenerator — industry-mix sampler for period 0.
10759        let industry = match self.config.global.industry {
10760            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10761            IndustrySector::Retail => IndustryType::Retail,
10762            IndustrySector::FinancialServices => IndustryType::Financial,
10763            IndustrySector::Healthcare => IndustryType::Healthcare,
10764            IndustrySector::Technology => IndustryType::Technology,
10765            _ => IndustryType::Manufacturing,
10766        };
10767
10768        let config = datasynth_generators::OpeningBalanceConfig {
10769            industry,
10770            ..Default::default()
10771        };
10772        let mut gen =
10773            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10774
10775        let mut results = Vec::new();
10776        for company in &self.config.companies {
10777            let spec = OpeningBalanceSpec::new(
10778                company.code.clone(),
10779                start_date,
10780                fiscal_year,
10781                company.currency.clone(),
10782                rust_decimal::Decimal::new(10_000_000, 0),
10783                industry,
10784            );
10785            let ob = gen.generate(&spec, coa, start_date, &company.code);
10786            results.push(ob);
10787        }
10788
10789        stats.opening_balance_count = results.len();
10790        info!("Opening balances generated: {} companies", results.len());
10791        self.check_resources_with_log("post-opening-balances")?;
10792
10793        Ok(results)
10794    }
10795
10796    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10797    fn phase_subledger_reconciliation(
10798        &mut self,
10799        subledger: &SubledgerSnapshot,
10800        entries: &[JournalEntry],
10801        stats: &mut EnhancedGenerationStatistics,
10802    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10803        if !self.config.balance.reconcile_subledgers {
10804            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10805            return Ok(Vec::new());
10806        }
10807        info!("Phase 9b: Reconciling GL to subledger balances");
10808
10809        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10810            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10811            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10812
10813        // Build GL balance map from journal entries using a balance tracker
10814        let tracker_config = BalanceTrackerConfig {
10815            validate_on_each_entry: false,
10816            track_history: false,
10817            fail_on_validation_error: false,
10818            ..Default::default()
10819        };
10820        let recon_currency = self
10821            .config
10822            .companies
10823            .first()
10824            .map(|c| c.currency.clone())
10825            .unwrap_or_else(|| "USD".to_string());
10826        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10827        let validation_errors = tracker.apply_entries(entries);
10828        if !validation_errors.is_empty() {
10829            warn!(
10830                error_count = validation_errors.len(),
10831                "Balance tracker encountered validation errors during subledger reconciliation"
10832            );
10833            for err in &validation_errors {
10834                debug!("Balance validation error: {:?}", err);
10835            }
10836        }
10837
10838        let mut engine = datasynth_generators::ReconciliationEngine::new(
10839            datasynth_generators::ReconciliationConfig::default(),
10840        );
10841
10842        let mut results = Vec::new();
10843        let company_code = self
10844            .config
10845            .companies
10846            .first()
10847            .map(|c| c.code.as_str())
10848            .unwrap_or("1000");
10849
10850        // Reconcile AR
10851        if !subledger.ar_invoices.is_empty() {
10852            let gl_balance = tracker
10853                .get_account_balance(
10854                    company_code,
10855                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10856                )
10857                .map(|b| b.closing_balance)
10858                .unwrap_or_default();
10859            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10860            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10861        }
10862
10863        // Reconcile AP
10864        if !subledger.ap_invoices.is_empty() {
10865            let gl_balance = tracker
10866                .get_account_balance(
10867                    company_code,
10868                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10869                )
10870                .map(|b| b.closing_balance)
10871                .unwrap_or_default();
10872            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10873            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10874        }
10875
10876        // Reconcile FA
10877        if !subledger.fa_records.is_empty() {
10878            let gl_asset_balance = tracker
10879                .get_account_balance(
10880                    company_code,
10881                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10882                )
10883                .map(|b| b.closing_balance)
10884                .unwrap_or_default();
10885            let gl_accum_depr_balance = tracker
10886                .get_account_balance(
10887                    company_code,
10888                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10889                )
10890                .map(|b| b.closing_balance)
10891                .unwrap_or_default();
10892            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10893                subledger.fa_records.iter().collect();
10894            let (asset_recon, depr_recon) = engine.reconcile_fa(
10895                company_code,
10896                end_date,
10897                gl_asset_balance,
10898                gl_accum_depr_balance,
10899                &fa_refs,
10900            );
10901            results.push(asset_recon);
10902            results.push(depr_recon);
10903        }
10904
10905        // Reconcile Inventory
10906        if !subledger.inventory_positions.is_empty() {
10907            let gl_balance = tracker
10908                .get_account_balance(
10909                    company_code,
10910                    datasynth_core::accounts::control_accounts::INVENTORY,
10911                )
10912                .map(|b| b.closing_balance)
10913                .unwrap_or_default();
10914            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10915                subledger.inventory_positions.iter().collect();
10916            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10917        }
10918
10919        stats.subledger_reconciliation_count = results.len();
10920        let passed = results.iter().filter(|r| r.is_balanced()).count();
10921        let failed = results.len() - passed;
10922        info!(
10923            "Subledger reconciliation: {} checks, {} passed, {} failed",
10924            results.len(),
10925            passed,
10926            failed
10927        );
10928        self.check_resources_with_log("post-subledger-reconciliation")?;
10929
10930        Ok(results)
10931    }
10932
10933    /// Generate the chart of accounts.
10934    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10935        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10936
10937        let coa_framework = self.resolve_coa_framework();
10938
10939        let mut gen = ChartOfAccountsGenerator::new(
10940            self.config.chart_of_accounts.complexity,
10941            self.config.global.industry,
10942            self.seed,
10943        )
10944        .with_coa_framework(coa_framework)
10945        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10946        .with_expand_industry_subaccounts(
10947            self.config.chart_of_accounts.expand_industry_subaccounts,
10948        );
10949
10950        let mut built = gen.generate();
10951        // v4.4.1: propagate the accounting framework label from config
10952        // onto the CoA struct so SDK consumers can read it without
10953        // cross-referencing the config (they previously saw null).
10954        if self.config.accounting_standards.enabled {
10955            use datasynth_config::schema::AccountingFrameworkConfig;
10956            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10957                match f {
10958                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10959                    AccountingFrameworkConfig::Ifrs => "ifrs",
10960                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10961                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10962                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10963                }
10964                .to_string()
10965            });
10966        }
10967        // SP4.2 W8.2 + W7.1 — remap synthetic account numbers to corpus
10968        // ones first (W8.2), then enrich descriptions via the overlay (W7.1).
10969        // Applied before Arc::new so we only build one Arc (no clone needed).
10970        if let Some(ref cached) = self.cached_priors {
10971            if let Some(ref coa_prior) = cached.coa_semantic {
10972                use datasynth_generators::coa_generator::{
10973                    remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10974                };
10975                // W8.2 — replace synthetic account numbers with corpus
10976                // ones so the W7.1 overlay fires at ~80% instead of ~16%.
10977                let mut rng =
10978                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10979                let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10980                tracing::info!(
10981                    target: "datasynth_runtime::coa",
10982                    remapped,
10983                    total = built.accounts.len(),
10984                    "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10985                );
10986                // W7.1 — now overlay descriptions / class metadata for the
10987                // (now mostly corpus-numbered) accounts.
10988                let applied =
10989                    ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10990                tracing::info!(
10991                    target: "datasynth_runtime::coa",
10992                    applied,
10993                    total = built.accounts.len(),
10994                    "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10995                );
10996            }
10997            // SP6 — taxonomy overlay: run AFTER the semantic overlay so
10998            // taxonomy-templated accounts take precedence over verbatim
10999            // semantic descriptions.  Uses SyntheticExampleResolver because
11000            // the CoA is built before master-data pools are populated (so
11001            // vendor/customer names are not yet available).
11002            if let Some(tx) = cached.text_taxonomy.as_ref() {
11003                use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
11004                use datasynth_generators::coa_generator::overlay_coa_taxonomy;
11005                let mut resolver = SyntheticExampleResolver;
11006                let mut rng =
11007                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
11008                overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
11009                tracing::info!(
11010                    target: "datasynth_runtime::coa",
11011                    total = built.accounts.len(),
11012                    "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
11013                );
11014            }
11015        }
11016
11017        let coa = Arc::new(built);
11018        self.coa = Some(Arc::clone(&coa));
11019
11020        if let Some(pb) = pb {
11021            pb.finish_with_message("Chart of Accounts complete");
11022        }
11023
11024        Ok(coa)
11025    }
11026
11027    /// Generate master data entities.
11028    fn generate_master_data(&mut self) -> SynthResult<()> {
11029        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11030            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11031        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11032
11033        let total = self.config.companies.len() as u64 * 5; // 5 entity types
11034        let pb = self.create_progress_bar(total, "Generating Master Data");
11035
11036        // Resolve country pack once for all companies (uses primary company's country)
11037        let pack = self.primary_pack().clone();
11038
11039        // Capture config values needed inside the parallel closure
11040        let vendors_per_company = self.phase_config.vendors_per_company;
11041        let customers_per_company = self.phase_config.customers_per_company;
11042        let materials_per_company = self.phase_config.materials_per_company;
11043        let assets_per_company = self.phase_config.assets_per_company;
11044        let coa_framework = self.resolve_coa_framework();
11045
11046        // Generate all master data in parallel across companies.
11047        // Each company's data is independent, making this embarrassingly parallel.
11048        let per_company_results: Vec<_> = self
11049            .config
11050            .companies
11051            .par_iter()
11052            .enumerate()
11053            .map(|(i, company)| {
11054                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
11055                let pack = pack.clone();
11056
11057                // Generate vendors (offset counter so IDs are globally unique across companies)
11058                let mut vendor_gen = VendorGenerator::new(company_seed);
11059                vendor_gen.set_country_pack(pack.clone());
11060                vendor_gen.set_coa_framework(coa_framework);
11061                vendor_gen.set_counter_offset(i * vendors_per_company);
11062                // v3.2.0+: user-supplied bank names (and future template
11063                // strings) flow through the shared provider.
11064                vendor_gen.set_template_provider(self.template_provider.clone());
11065                // Wire vendor network config when enabled
11066                if self.config.vendor_network.enabled {
11067                    let vn = &self.config.vendor_network;
11068                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
11069                        enabled: true,
11070                        depth: vn.depth,
11071                        tier1_count: datasynth_generators::TierCountConfig::new(
11072                            vn.tier1.min,
11073                            vn.tier1.max,
11074                        ),
11075                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
11076                            vn.tier2_per_parent.min,
11077                            vn.tier2_per_parent.max,
11078                        ),
11079                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
11080                            vn.tier3_per_parent.min,
11081                            vn.tier3_per_parent.max,
11082                        ),
11083                        cluster_distribution: datasynth_generators::ClusterDistribution {
11084                            reliable_strategic: vn.clusters.reliable_strategic,
11085                            standard_operational: vn.clusters.standard_operational,
11086                            transactional: vn.clusters.transactional,
11087                            problematic: vn.clusters.problematic,
11088                        },
11089                        concentration_limits: datasynth_generators::ConcentrationLimits {
11090                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
11091                            max_top5: vn.dependencies.top_5_concentration,
11092                        },
11093                        ..datasynth_generators::VendorNetworkConfig::default()
11094                    });
11095                }
11096                let vendor_pool =
11097                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
11098
11099                // Generate customers (offset counter so IDs are globally unique across companies)
11100                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
11101                customer_gen.set_country_pack(pack.clone());
11102                customer_gen.set_coa_framework(coa_framework);
11103                customer_gen.set_counter_offset(i * customers_per_company);
11104                // v3.2.0+: user-supplied customer names flow through the shared provider.
11105                customer_gen.set_template_provider(self.template_provider.clone());
11106                // Wire customer segmentation config when enabled
11107                if self.config.customer_segmentation.enabled {
11108                    let cs = &self.config.customer_segmentation;
11109                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
11110                        enabled: true,
11111                        segment_distribution: datasynth_generators::SegmentDistribution {
11112                            enterprise: cs.value_segments.enterprise.customer_share,
11113                            mid_market: cs.value_segments.mid_market.customer_share,
11114                            smb: cs.value_segments.smb.customer_share,
11115                            consumer: cs.value_segments.consumer.customer_share,
11116                        },
11117                        referral_config: datasynth_generators::ReferralConfig {
11118                            enabled: cs.networks.referrals.enabled,
11119                            referral_rate: cs.networks.referrals.referral_rate,
11120                            ..Default::default()
11121                        },
11122                        hierarchy_config: datasynth_generators::HierarchyConfig {
11123                            enabled: cs.networks.corporate_hierarchies.enabled,
11124                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
11125                            ..Default::default()
11126                        },
11127                        ..Default::default()
11128                    };
11129                    customer_gen.set_segmentation_config(seg_cfg);
11130                }
11131                let customer_pool = customer_gen.generate_customer_pool(
11132                    customers_per_company,
11133                    &company.code,
11134                    start_date,
11135                );
11136
11137                // Generate materials (offset counter so IDs are globally unique across companies)
11138                let mut material_gen = MaterialGenerator::new(company_seed + 200);
11139                material_gen.set_country_pack(pack.clone());
11140                material_gen.set_counter_offset(i * materials_per_company);
11141                // v3.2.1+: user-supplied material descriptions flow through shared provider
11142                material_gen.set_template_provider(self.template_provider.clone());
11143                let material_pool = material_gen.generate_material_pool(
11144                    materials_per_company,
11145                    &company.code,
11146                    start_date,
11147                );
11148
11149                // Generate fixed assets
11150                let mut asset_gen = AssetGenerator::new(company_seed + 300);
11151                // v3.2.1+: user-supplied asset descriptions flow through shared provider
11152                asset_gen.set_template_provider(self.template_provider.clone());
11153                let asset_pool = asset_gen.generate_asset_pool(
11154                    assets_per_company,
11155                    &company.code,
11156                    (start_date, end_date),
11157                );
11158
11159                // Generate employees
11160                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11161                employee_gen.set_country_pack(pack);
11162                // v3.2.1+: user-supplied department names flow through shared provider
11163                employee_gen.set_template_provider(self.template_provider.clone());
11164                let employee_pool =
11165                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11166
11167                // Generate employee change history (2-5 events per employee)
11168                let employee_change_history =
11169                    employee_gen.generate_all_change_history(&employee_pool, end_date);
11170
11171                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
11172                let employee_ids: Vec<String> = employee_pool
11173                    .employees
11174                    .iter()
11175                    .map(|e| e.employee_id.clone())
11176                    .collect();
11177                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11178                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11179
11180                // v5.1: profit centre hierarchy (two-level: top-level
11181                // segment / region / product-group nodes + sub-units).
11182                let mut pc_gen =
11183                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11184                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11185
11186                (
11187                    vendor_pool.vendors,
11188                    customer_pool.customers,
11189                    material_pool.materials,
11190                    asset_pool.assets,
11191                    employee_pool.employees,
11192                    employee_change_history,
11193                    cost_centers,
11194                    profit_centers,
11195                )
11196            })
11197            .collect();
11198
11199        // Aggregate results from all companies
11200        for (
11201            vendors,
11202            customers,
11203            materials,
11204            assets,
11205            employees,
11206            change_history,
11207            cost_centers,
11208            profit_centers,
11209        ) in per_company_results
11210        {
11211            self.master_data.vendors.extend(vendors);
11212            self.master_data.customers.extend(customers);
11213            self.master_data.materials.extend(materials);
11214            self.master_data.assets.extend(assets);
11215            self.master_data.employees.extend(employees);
11216            self.master_data.cost_centers.extend(cost_centers);
11217            self.master_data.profit_centers.extend(profit_centers);
11218            self.master_data
11219                .employee_change_history
11220                .extend(change_history);
11221        }
11222
11223        // v3.3.0: one OrganizationalProfile per company. Cheap to
11224        // generate (derived from industry + company_code) so we
11225        // always emit when master data runs; no separate config flag.
11226        {
11227            use datasynth_core::models::IndustrySector;
11228            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11229            let industry = match self.config.global.industry {
11230                IndustrySector::Manufacturing => "manufacturing",
11231                IndustrySector::Retail => "retail",
11232                IndustrySector::FinancialServices => "financial_services",
11233                IndustrySector::Technology => "technology",
11234                IndustrySector::Healthcare => "healthcare",
11235                _ => "other",
11236            };
11237            for (i, company) in self.config.companies.iter().enumerate() {
11238                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11239                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11240                let profile = profile_gen.generate(&company.code, industry);
11241                self.master_data.organizational_profiles.push(profile);
11242            }
11243        }
11244
11245        if let Some(pb) = &pb {
11246            pb.inc(total);
11247        }
11248        if let Some(pb) = pb {
11249            pb.finish_with_message("Master data generation complete");
11250        }
11251
11252        Ok(())
11253    }
11254
11255    /// Generate document flows (P2P and O2C).
11256    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11257        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11258            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11259
11260        // Generate P2P chains
11261        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
11262        let months = (self.config.global.period_months as usize).max(1);
11263        let p2p_count = self
11264            .phase_config
11265            .p2p_chains
11266            .min(self.master_data.vendors.len() * 2 * months);
11267        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11268
11269        // Convert P2P config from schema to generator config
11270        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11271        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11272        p2p_gen.set_country_pack(self.primary_pack().clone());
11273        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
11274        // to business days. No-op when `temporal_patterns.business_days.
11275        // enabled = false`.
11276        if let Some(ctx) = &self.temporal_context {
11277            p2p_gen.set_temporal_context(Arc::clone(ctx));
11278        }
11279
11280        for i in 0..p2p_count {
11281            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11282            let materials: Vec<&Material> = self
11283                .master_data
11284                .materials
11285                .iter()
11286                .skip(i % self.master_data.materials.len().max(1))
11287                .take(2.min(self.master_data.materials.len()))
11288                .collect();
11289
11290            if materials.is_empty() {
11291                continue;
11292            }
11293
11294            let company = &self.config.companies[i % self.config.companies.len()];
11295            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11296            let fiscal_period = po_date.month() as u8;
11297            let created_by = if self.master_data.employees.is_empty() {
11298                "SYSTEM"
11299            } else {
11300                self.master_data.employees[i % self.master_data.employees.len()]
11301                    .user_id
11302                    .as_str()
11303            };
11304
11305            let chain = p2p_gen.generate_chain(
11306                &company.code,
11307                vendor,
11308                &materials,
11309                po_date,
11310                start_date.year() as u16,
11311                fiscal_period,
11312                created_by,
11313            );
11314
11315            // Flatten documents
11316            flows.purchase_orders.push(chain.purchase_order.clone());
11317            flows.goods_receipts.extend(chain.goods_receipts.clone());
11318            if let Some(vi) = &chain.vendor_invoice {
11319                flows.vendor_invoices.push(vi.clone());
11320            }
11321            if let Some(payment) = &chain.payment {
11322                flows.payments.push(payment.clone());
11323            }
11324            for remainder in &chain.remainder_payments {
11325                flows.payments.push(remainder.clone());
11326            }
11327            flows.p2p_chains.push(chain);
11328
11329            if let Some(pb) = &pb {
11330                pb.inc(1);
11331            }
11332        }
11333
11334        if let Some(pb) = pb {
11335            pb.finish_with_message("P2P document flows complete");
11336        }
11337
11338        // Generate O2C chains
11339        // Cap at ~2 SOs per customer per month to keep order volume realistic
11340        let o2c_count = self
11341            .phase_config
11342            .o2c_chains
11343            .min(self.master_data.customers.len() * 2 * months);
11344        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11345
11346        // Convert O2C config from schema to generator config
11347        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11348        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11349        o2c_gen.set_country_pack(self.primary_pack().clone());
11350        // v3.4.1: wire temporal context (no-op when business_days disabled).
11351        if let Some(ctx) = &self.temporal_context {
11352            o2c_gen.set_temporal_context(Arc::clone(ctx));
11353        }
11354
11355        for i in 0..o2c_count {
11356            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11357            let materials: Vec<&Material> = self
11358                .master_data
11359                .materials
11360                .iter()
11361                .skip(i % self.master_data.materials.len().max(1))
11362                .take(2.min(self.master_data.materials.len()))
11363                .collect();
11364
11365            if materials.is_empty() {
11366                continue;
11367            }
11368
11369            let company = &self.config.companies[i % self.config.companies.len()];
11370            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11371            let fiscal_period = so_date.month() as u8;
11372            let created_by = if self.master_data.employees.is_empty() {
11373                "SYSTEM"
11374            } else {
11375                self.master_data.employees[i % self.master_data.employees.len()]
11376                    .user_id
11377                    .as_str()
11378            };
11379
11380            let chain = o2c_gen.generate_chain(
11381                &company.code,
11382                customer,
11383                &materials,
11384                so_date,
11385                start_date.year() as u16,
11386                fiscal_period,
11387                created_by,
11388            );
11389
11390            // Flatten documents
11391            flows.sales_orders.push(chain.sales_order.clone());
11392            flows.deliveries.extend(chain.deliveries.clone());
11393            if let Some(ci) = &chain.customer_invoice {
11394                flows.customer_invoices.push(ci.clone());
11395            }
11396            if let Some(receipt) = &chain.customer_receipt {
11397                flows.payments.push(receipt.clone());
11398            }
11399            // Extract remainder receipts (follow-up to partial payments)
11400            for receipt in &chain.remainder_receipts {
11401                flows.payments.push(receipt.clone());
11402            }
11403            flows.o2c_chains.push(chain);
11404
11405            if let Some(pb) = &pb {
11406                pb.inc(1);
11407            }
11408        }
11409
11410        if let Some(pb) = pb {
11411            pb.finish_with_message("O2C document flows complete");
11412        }
11413
11414        // Collect all document cross-references from document headers.
11415        // Each document embeds references to its predecessor(s) via add_reference(); here we
11416        // denormalise them into a flat list for the document_references.json output file.
11417        {
11418            let mut refs = Vec::new();
11419            for doc in &flows.purchase_orders {
11420                refs.extend(doc.header.document_references.iter().cloned());
11421            }
11422            for doc in &flows.goods_receipts {
11423                refs.extend(doc.header.document_references.iter().cloned());
11424            }
11425            for doc in &flows.vendor_invoices {
11426                refs.extend(doc.header.document_references.iter().cloned());
11427            }
11428            for doc in &flows.sales_orders {
11429                refs.extend(doc.header.document_references.iter().cloned());
11430            }
11431            for doc in &flows.deliveries {
11432                refs.extend(doc.header.document_references.iter().cloned());
11433            }
11434            for doc in &flows.customer_invoices {
11435                refs.extend(doc.header.document_references.iter().cloned());
11436            }
11437            for doc in &flows.payments {
11438                refs.extend(doc.header.document_references.iter().cloned());
11439            }
11440            debug!(
11441                "Collected {} document cross-references from document headers",
11442                refs.len()
11443            );
11444            flows.document_references = refs;
11445        }
11446
11447        Ok(())
11448    }
11449
11450    /// Generate journal entries using parallel generation across multiple cores.
11451    fn generate_journal_entries(
11452        &mut self,
11453        coa: &Arc<ChartOfAccounts>,
11454    ) -> SynthResult<Vec<JournalEntry>> {
11455        use datasynth_core::traits::ParallelGenerator;
11456
11457        let total = self.calculate_total_transactions();
11458        let pb = self.create_progress_bar(total, "Generating Journal Entries");
11459
11460        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11461            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11462        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11463
11464        let company_codes: Vec<String> = self
11465            .config
11466            .companies
11467            .iter()
11468            .map(|c| c.code.clone())
11469            .collect();
11470
11471        let mut generator = JournalEntryGenerator::new_with_params(
11472            self.config.transactions.clone(),
11473            Arc::clone(coa),
11474            company_codes,
11475            start_date,
11476            end_date,
11477            self.seed,
11478        );
11479        // P2 (multi-currency): each entity's JE document currency defaults to its
11480        // functional currency (falling back to its config currency) so the flat
11481        // export reflects per-entity currency rather than always the group ccy.
11482        let company_currencies: std::collections::HashMap<String, String> = self
11483            .config
11484            .companies
11485            .iter()
11486            .map(|c| {
11487                (
11488                    c.code.clone(),
11489                    c.functional_currency
11490                        .clone()
11491                        .unwrap_or_else(|| c.currency.clone()),
11492                )
11493            })
11494            .collect();
11495        generator = generator.with_company_currencies(company_currencies);
11496        // Wire the `business_processes.*_weight` config through (phantom knob
11497        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
11498        let bp = &self.config.business_processes;
11499        generator.set_business_process_weights(
11500            bp.o2c_weight,
11501            bp.p2p_weight,
11502            bp.r2r_weight,
11503            bp.h2r_weight,
11504            bp.a2r_weight,
11505        );
11506        // v3.4.0: wire advanced distributions (mixture models + industry
11507        // profiles). No-op when `distributions.enabled = false` or
11508        // `distributions.amounts.enabled = false`, preserving v3.3.2
11509        // byte-identical output on default configs.
11510        generator
11511            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11512            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11513
11514        // SP3: load and wire industry priors when the config opts in via
11515        //   distributions.industry_profile.priors.enabled = true
11516        // When disabled (or when using the legacy bare-name form), this block
11517        // is a no-op and generation behavior is identical to v5.11.
11518        if let Some(profile) = &self.config.distributions.industry_profile {
11519            if let Some(priors_cfg) = profile.priors() {
11520                if priors_cfg.enabled {
11521                    use datasynth_config::schema::PriorsSource;
11522                    use datasynth_generators::priors_loader::LoadedPriors;
11523
11524                    let mut priors_rng =
11525                        rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11526                    let period_days = i64::from(self.config.global.period_months) * 30;
11527                    let industry_slug = profile.profile_type().slug();
11528
11529                    let loaded = match priors_cfg.source {
11530                        PriorsSource::Bundled => {
11531                            LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11532                                .map_err(|e| {
11533                                    SynthError::config(format!(
11534                                "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11535                            ))
11536                                })?
11537                        }
11538                        PriorsSource::File => {
11539                            let path = priors_cfg.path.as_ref().ok_or_else(|| {
11540                                SynthError::config(
11541                                    "SP3: industry_profile.priors.path required when source = file"
11542                                        .to_string(),
11543                                )
11544                            })?;
11545                            LoadedPriors::load_from_path(
11546                                path,
11547                                &mut priors_rng,
11548                                period_days,
11549                                Some(industry_slug),
11550                            )
11551                            .map_err(|e| {
11552                                SynthError::config(format!(
11553                                    "SP3: failed to load priors from '{}': {e}",
11554                                    path.display()
11555                                ))
11556                            })?
11557                        }
11558                    };
11559
11560                    // SP3.12 — cache priors in Arc so document-flow generator
11561                    // can also apply lines-per-JE padding without re-loading.
11562                    let loaded = std::sync::Arc::new(loaded);
11563                    self.cached_priors = Some(loaded.clone());
11564                    generator.loaded_priors = Some((*loaded).clone());
11565
11566                    // SP3.4 — instantiate VelocityCalibrator when the config
11567                    // opts in.  Default target rates (R7/R9) are a sensible
11568                    // baseline; they can be derived from the loaded priors in
11569                    // a future hardening pass.
11570                    if priors_cfg.velocity_calibration {
11571                        use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11572                        let mut targets = std::collections::HashMap::new();
11573                        targets.insert("R7".to_string(), 0.10);
11574                        targets.insert("R9".to_string(), 0.10);
11575                        let calibrator = VelocityCalibrator::new(targets, 10_000);
11576                        generator.velocity_calibrator = Some(calibrator);
11577                    }
11578                }
11579            }
11580        }
11581
11582        let generator = generator;
11583
11584        // Connect generated master data to ensure JEs reference real entities
11585        // Enable persona-based error injection for realistic human behavior
11586        // Pass fraud configuration for fraud injection
11587        let je_pack = self.primary_pack();
11588
11589        // Master-data CC / PC pools so JE.cost_center and
11590        // JE.profit_center join back to `cost_centers.id` and
11591        // `profit_centers.id` (closes the v5.9.0 linkage gap that
11592        // had `JE.cost_center = "CC1000"` while master used
11593        // `CC-1000-FIN` etc.).  Empty when no master is present —
11594        // the generator falls back to its hardcoded constants.
11595        let cc_pool: Vec<String> = self
11596            .master_data
11597            .cost_centers
11598            .iter()
11599            .map(|c| c.id.clone())
11600            .collect();
11601        let pc_pool: Vec<String> = self
11602            .master_data
11603            .profit_centers
11604            .iter()
11605            .map(|p| p.id.clone())
11606            .collect();
11607
11608        // Build a UserPool from the generated employee master so
11609        // JE.created_by lines join back to `employees.user_id`.  v5.9.0:
11610        // closes the third linkage gap (the previous behaviour had
11611        // JeGenerator generate its own UserPool internally with
11612        // ids disjoint from the employee master).
11613        let user_pool_from_employees =
11614            datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11615
11616        let mut generator = generator
11617            .with_master_data(
11618                &self.master_data.vendors,
11619                &self.master_data.customers,
11620                &self.master_data.materials,
11621            )
11622            .with_cost_center_pool(cc_pool)
11623            .with_profit_center_pool(pc_pool)
11624            .with_country_pack_names(je_pack)
11625            .with_user_pool(user_pool_from_employees)
11626            .with_country_pack_temporal(
11627                self.config.temporal_patterns.clone(),
11628                self.seed + 200,
11629                je_pack,
11630            )
11631            .with_persona_errors(true)
11632            .with_fraud_config(self.config.fraud.clone());
11633
11634        // Apply temporal drift if configured. v3.5.2+: also merge
11635        // `distributions.regime_changes` (regime events, economic
11636        // cycles, parameter drifts) into the same DriftConfig so both
11637        // knobs flow through the shared DriftController.
11638        let temporal_enabled = self.config.temporal.enabled;
11639        let regimes_enabled = self.config.distributions.regime_changes.enabled;
11640        if temporal_enabled || regimes_enabled {
11641            let mut drift_config = if temporal_enabled {
11642                self.config.temporal.to_core_config()
11643            } else {
11644                // regime-changes only: start from default (drift OFF),
11645                // apply_to flips `enabled = true`.
11646                datasynth_core::distributions::DriftConfig::default()
11647            };
11648            if regimes_enabled {
11649                self.config
11650                    .distributions
11651                    .regime_changes
11652                    .apply_to(&mut drift_config, start_date);
11653            }
11654            generator = generator.with_drift_config(drift_config, self.seed + 100);
11655        }
11656
11657        // Check memory limit at start
11658        self.check_memory_limit()?;
11659
11660        // Determine parallelism: use available cores, but cap at total entries
11661        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11662
11663        // Use parallel generation for datasets with 10K+ entries.
11664        // Below this threshold, the statistical properties of a single-seeded
11665        // generator (e.g. Benford compliance) are better preserved.
11666        let entries = if total >= 10_000 && num_threads > 1 {
11667            // Parallel path: split the generator across cores and generate in parallel.
11668            // Each sub-generator gets a unique seed for deterministic, independent generation.
11669            let sub_generators = generator.split(num_threads);
11670            let entries_per_thread = total as usize / num_threads;
11671            let remainder = total as usize % num_threads;
11672
11673            let batches: Vec<Vec<JournalEntry>> = sub_generators
11674                .into_par_iter()
11675                .enumerate()
11676                .map(|(i, mut gen)| {
11677                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11678                    gen.generate_batch(count)
11679                })
11680                .collect();
11681
11682            // Merge all batches into a single Vec
11683            let entries = JournalEntryGenerator::merge_results(batches);
11684
11685            if let Some(pb) = &pb {
11686                pb.inc(total);
11687            }
11688            entries
11689        } else {
11690            // Sequential path for small datasets (< 1000 entries)
11691            let mut entries = Vec::with_capacity(total as usize);
11692            for _ in 0..total {
11693                let entry = generator.generate();
11694                entries.push(entry);
11695                if let Some(pb) = &pb {
11696                    pb.inc(1);
11697                }
11698            }
11699            entries
11700        };
11701
11702        if let Some(pb) = pb {
11703            pb.finish_with_message("Journal entries complete");
11704        }
11705
11706        Ok(entries)
11707    }
11708
11709    /// Generate journal entries from document flows.
11710    ///
11711    /// This creates proper GL entries for each document in the P2P and O2C flows,
11712    /// ensuring that document activity is reflected in the general ledger.
11713    fn generate_jes_from_document_flows(
11714        &mut self,
11715        flows: &DocumentFlowSnapshot,
11716    ) -> SynthResult<Vec<JournalEntry>> {
11717        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11718        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11719
11720        let je_config = match self.resolve_coa_framework() {
11721            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11722            CoAFramework::GermanSkr04 => {
11723                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11724                DocumentFlowJeConfig::from(&fa)
11725            }
11726            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11727        };
11728
11729        let populate_fec = je_config.populate_fec_fields;
11730        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11731
11732        // SP3.12 — propagate cached priors so document-flow JEs receive
11733        // the same lines-per-JE padding as standalone JEs.
11734        if let Some(ref priors) = self.cached_priors {
11735            generator.set_loaded_priors(priors.clone());
11736        }
11737
11738        // Master-data CC / PC pools so document-flow-derived JEs
11739        // (P2P / O2C postings) reference IDs that join back to the
11740        // cost-centers / profit-centers masters.  Same plumbing as
11741        // for `JeGenerator` above; falls back to hardcoded const
11742        // pools when masters are absent.
11743        let cc_pool: Vec<String> = self
11744            .master_data
11745            .cost_centers
11746            .iter()
11747            .map(|c| c.id.clone())
11748            .collect();
11749        let pc_pool: Vec<String> = self
11750            .master_data
11751            .profit_centers
11752            .iter()
11753            .map(|p| p.id.clone())
11754            .collect();
11755        if !cc_pool.is_empty() {
11756            generator.set_cost_center_pool(cc_pool);
11757        }
11758        if !pc_pool.is_empty() {
11759            generator.set_profit_center_pool(pc_pool);
11760        }
11761
11762        // Build auxiliary account lookup from vendor/customer master data so that
11763        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11764        // PCG "4010001") instead of raw partner IDs.
11765        if populate_fec {
11766            let mut aux_lookup = std::collections::HashMap::new();
11767            for vendor in &self.master_data.vendors {
11768                if let Some(ref aux) = vendor.auxiliary_gl_account {
11769                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11770                }
11771            }
11772            for customer in &self.master_data.customers {
11773                if let Some(ref aux) = customer.auxiliary_gl_account {
11774                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11775                }
11776            }
11777            if !aux_lookup.is_empty() {
11778                generator.set_auxiliary_account_lookup(aux_lookup);
11779            }
11780        }
11781
11782        let mut entries = Vec::new();
11783
11784        // Generate JEs from P2P chains
11785        for chain in &flows.p2p_chains {
11786            let chain_entries = generator.generate_from_p2p_chain(chain);
11787            entries.extend(chain_entries);
11788            if let Some(pb) = &pb {
11789                pb.inc(1);
11790            }
11791        }
11792
11793        // Generate JEs from O2C chains
11794        for chain in &flows.o2c_chains {
11795            let chain_entries = generator.generate_from_o2c_chain(chain);
11796            entries.extend(chain_entries);
11797            if let Some(pb) = &pb {
11798                pb.inc(1);
11799            }
11800        }
11801
11802        if let Some(pb) = pb {
11803            pb.finish_with_message(format!(
11804                "Generated {} JEs from document flows",
11805                entries.len()
11806            ));
11807        }
11808
11809        Ok(entries)
11810    }
11811
11812    /// Generate journal entries from payroll runs.
11813    ///
11814    /// Creates one JE per payroll run:
11815    /// - DR Salaries & Wages (6100) for gross pay
11816    /// - CR Payroll Clearing (9100) for gross pay
11817    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11818        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11819
11820        let mut jes = Vec::with_capacity(payroll_runs.len());
11821
11822        for run in payroll_runs {
11823            let mut je = JournalEntry::new_simple(
11824                format!("JE-PAYROLL-{}", run.payroll_id),
11825                run.company_code.clone(),
11826                run.run_date,
11827                format!("Payroll {}", run.payroll_id),
11828            );
11829
11830            // Debit Salaries & Wages for gross pay
11831            je.add_line(JournalEntryLine {
11832                line_number: 1,
11833                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11834                debit_amount: run.total_gross,
11835                reference: Some(run.payroll_id.clone()),
11836                text: Some(format!(
11837                    "Payroll {} ({} employees)",
11838                    run.payroll_id, run.employee_count
11839                )),
11840                ..Default::default()
11841            });
11842
11843            // Credit Payroll Clearing for gross pay
11844            je.add_line(JournalEntryLine {
11845                line_number: 2,
11846                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11847                credit_amount: run.total_gross,
11848                reference: Some(run.payroll_id.clone()),
11849                ..Default::default()
11850            });
11851
11852            jes.push(je);
11853        }
11854
11855        jes
11856    }
11857
11858    /// Link document flows to subledger records.
11859    ///
11860    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11861    /// ensuring subledger data is coherent with document flow data.
11862    fn link_document_flows_to_subledgers(
11863        &mut self,
11864        flows: &DocumentFlowSnapshot,
11865    ) -> SynthResult<SubledgerSnapshot> {
11866        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11867        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11868
11869        // Build vendor/customer name maps from master data for realistic subledger names
11870        let vendor_names: std::collections::HashMap<String, String> = self
11871            .master_data
11872            .vendors
11873            .iter()
11874            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11875            .collect();
11876        let customer_names: std::collections::HashMap<String, String> = self
11877            .master_data
11878            .customers
11879            .iter()
11880            .map(|c| (c.customer_id.clone(), c.name.clone()))
11881            .collect();
11882
11883        let mut linker = DocumentFlowLinker::new()
11884            .with_vendor_names(vendor_names)
11885            .with_customer_names(customer_names);
11886
11887        // Convert vendor invoices to AP invoices
11888        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11889        if let Some(pb) = &pb {
11890            pb.inc(flows.vendor_invoices.len() as u64);
11891        }
11892
11893        // Convert customer invoices to AR invoices
11894        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11895        if let Some(pb) = &pb {
11896            pb.inc(flows.customer_invoices.len() as u64);
11897        }
11898
11899        if let Some(pb) = pb {
11900            pb.finish_with_message(format!(
11901                "Linked {} AP and {} AR invoices",
11902                ap_invoices.len(),
11903                ar_invoices.len()
11904            ));
11905        }
11906
11907        Ok(SubledgerSnapshot {
11908            ap_invoices,
11909            ar_invoices,
11910            fa_records: Vec::new(),
11911            inventory_positions: Vec::new(),
11912            inventory_movements: Vec::new(),
11913            // Aging reports are computed after payment settlement in phase_document_flows.
11914            ar_aging_reports: Vec::new(),
11915            ap_aging_reports: Vec::new(),
11916            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11917            depreciation_runs: Vec::new(),
11918            inventory_valuations: Vec::new(),
11919            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11920            dunning_runs: Vec::new(),
11921            dunning_letters: Vec::new(),
11922        })
11923    }
11924
11925    /// Generate OCPM events from document flows.
11926    ///
11927    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11928    /// capturing the object-centric process perspective.
11929    #[allow(clippy::too_many_arguments)]
11930    fn generate_ocpm_events(
11931        &mut self,
11932        flows: &DocumentFlowSnapshot,
11933        sourcing: &SourcingSnapshot,
11934        hr: &HrSnapshot,
11935        manufacturing: &ManufacturingSnapshot,
11936        banking: &BankingSnapshot,
11937        audit: &AuditSnapshot,
11938        financial_reporting: &FinancialReportingSnapshot,
11939    ) -> SynthResult<OcpmSnapshot> {
11940        let total_chains = flows.p2p_chains.len()
11941            + flows.o2c_chains.len()
11942            + sourcing.sourcing_projects.len()
11943            + hr.payroll_runs.len()
11944            + manufacturing.production_orders.len()
11945            + banking.customers.len()
11946            + audit.engagements.len()
11947            + financial_reporting.bank_reconciliations.len();
11948        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11949
11950        // Create OCPM event log with standard types
11951        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11952        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11953
11954        // Configure the OCPM generator
11955        let ocpm_config = OcpmGeneratorConfig {
11956            generate_p2p: true,
11957            generate_o2c: true,
11958            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11959            generate_h2r: !hr.payroll_runs.is_empty(),
11960            generate_mfg: !manufacturing.production_orders.is_empty(),
11961            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11962            generate_bank: !banking.customers.is_empty(),
11963            generate_audit: !audit.engagements.is_empty(),
11964            happy_path_rate: 0.75,
11965            exception_path_rate: 0.20,
11966            error_path_rate: 0.05,
11967            add_duration_variability: true,
11968            duration_std_dev_factor: 0.3,
11969        };
11970        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11971        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11972
11973        // Get available users for resource assignment
11974        let available_users: Vec<String> = self
11975            .master_data
11976            .employees
11977            .iter()
11978            .take(20)
11979            .map(|e| e.user_id.clone())
11980            .collect();
11981
11982        // Deterministic base date from config (avoids Utc::now() non-determinism)
11983        let fallback_date =
11984            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11985        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11986            .unwrap_or(fallback_date);
11987        let base_midnight = base_date
11988            .and_hms_opt(0, 0, 0)
11989            .expect("midnight is always valid");
11990        let base_datetime =
11991            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11992
11993        // Helper closure to add case results to event log
11994        let add_result = |event_log: &mut OcpmEventLog,
11995                          result: datasynth_ocpm::CaseGenerationResult| {
11996            for event in result.events {
11997                event_log.add_event(event);
11998            }
11999            for object in result.objects {
12000                event_log.add_object(object);
12001            }
12002            for relationship in result.relationships {
12003                event_log.add_relationship(relationship);
12004            }
12005            for corr in result.correlation_events {
12006                event_log.add_correlation_event(corr);
12007            }
12008            event_log.add_case(result.case_trace);
12009        };
12010
12011        // Generate events from P2P chains
12012        for chain in &flows.p2p_chains {
12013            let po = &chain.purchase_order;
12014            let documents = P2pDocuments::new(
12015                &po.header.document_id,
12016                &po.vendor_id,
12017                &po.header.company_code,
12018                po.total_net_amount,
12019                &po.header.currency,
12020                &ocpm_uuid_factory,
12021            )
12022            .with_goods_receipt(
12023                chain
12024                    .goods_receipts
12025                    .first()
12026                    .map(|gr| gr.header.document_id.as_str())
12027                    .unwrap_or(""),
12028                &ocpm_uuid_factory,
12029            )
12030            .with_invoice(
12031                chain
12032                    .vendor_invoice
12033                    .as_ref()
12034                    .map(|vi| vi.header.document_id.as_str())
12035                    .unwrap_or(""),
12036                &ocpm_uuid_factory,
12037            )
12038            .with_payment(
12039                chain
12040                    .payment
12041                    .as_ref()
12042                    .map(|p| p.header.document_id.as_str())
12043                    .unwrap_or(""),
12044                &ocpm_uuid_factory,
12045            );
12046
12047            let start_time =
12048                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
12049            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
12050            add_result(&mut event_log, result);
12051
12052            if let Some(pb) = &pb {
12053                pb.inc(1);
12054            }
12055        }
12056
12057        // Generate events from O2C chains
12058        for chain in &flows.o2c_chains {
12059            let so = &chain.sales_order;
12060            let documents = O2cDocuments::new(
12061                &so.header.document_id,
12062                &so.customer_id,
12063                &so.header.company_code,
12064                so.total_net_amount,
12065                &so.header.currency,
12066                &ocpm_uuid_factory,
12067            )
12068            .with_delivery(
12069                chain
12070                    .deliveries
12071                    .first()
12072                    .map(|d| d.header.document_id.as_str())
12073                    .unwrap_or(""),
12074                &ocpm_uuid_factory,
12075            )
12076            .with_invoice(
12077                chain
12078                    .customer_invoice
12079                    .as_ref()
12080                    .map(|ci| ci.header.document_id.as_str())
12081                    .unwrap_or(""),
12082                &ocpm_uuid_factory,
12083            )
12084            .with_receipt(
12085                chain
12086                    .customer_receipt
12087                    .as_ref()
12088                    .map(|r| r.header.document_id.as_str())
12089                    .unwrap_or(""),
12090                &ocpm_uuid_factory,
12091            );
12092
12093            let start_time =
12094                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
12095            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
12096            add_result(&mut event_log, result);
12097
12098            if let Some(pb) = &pb {
12099                pb.inc(1);
12100            }
12101        }
12102
12103        // Generate events from S2C sourcing projects
12104        for project in &sourcing.sourcing_projects {
12105            // Find vendor from contracts or qualifications
12106            let vendor_id = sourcing
12107                .contracts
12108                .iter()
12109                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12110                .map(|c| c.vendor_id.clone())
12111                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
12112                .or_else(|| {
12113                    self.master_data
12114                        .vendors
12115                        .first()
12116                        .map(|v| v.vendor_id.clone())
12117                })
12118                .unwrap_or_else(|| "V000".to_string());
12119            let mut docs = S2cDocuments::new(
12120                &project.project_id,
12121                &vendor_id,
12122                &project.company_code,
12123                project.estimated_annual_spend,
12124                &ocpm_uuid_factory,
12125            );
12126            // Link RFx if available
12127            if let Some(rfx) = sourcing
12128                .rfx_events
12129                .iter()
12130                .find(|r| r.sourcing_project_id == project.project_id)
12131            {
12132                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
12133                // Link winning bid (status == Accepted)
12134                if let Some(bid) = sourcing.bids.iter().find(|b| {
12135                    b.rfx_id == rfx.rfx_id
12136                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
12137                }) {
12138                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
12139                }
12140            }
12141            // Link contract
12142            if let Some(contract) = sourcing
12143                .contracts
12144                .iter()
12145                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12146            {
12147                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12148            }
12149            let start_time = base_datetime - chrono::Duration::days(90);
12150            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12151            add_result(&mut event_log, result);
12152
12153            if let Some(pb) = &pb {
12154                pb.inc(1);
12155            }
12156        }
12157
12158        // Generate events from H2R payroll runs
12159        for run in &hr.payroll_runs {
12160            // Use first matching payroll line item's employee, or fallback
12161            let employee_id = hr
12162                .payroll_line_items
12163                .iter()
12164                .find(|li| li.payroll_id == run.payroll_id)
12165                .map(|li| li.employee_id.as_str())
12166                .unwrap_or("EMP000");
12167            let docs = H2rDocuments::new(
12168                &run.payroll_id,
12169                employee_id,
12170                &run.company_code,
12171                run.total_gross,
12172                &ocpm_uuid_factory,
12173            )
12174            .with_time_entries(
12175                hr.time_entries
12176                    .iter()
12177                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12178                    .take(5)
12179                    .map(|t| t.entry_id.as_str())
12180                    .collect(),
12181            );
12182            let start_time = base_datetime - chrono::Duration::days(30);
12183            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12184            add_result(&mut event_log, result);
12185
12186            if let Some(pb) = &pb {
12187                pb.inc(1);
12188            }
12189        }
12190
12191        // Generate events from MFG production orders
12192        for order in &manufacturing.production_orders {
12193            let mut docs = MfgDocuments::new(
12194                &order.order_id,
12195                &order.material_id,
12196                &order.company_code,
12197                order.planned_quantity,
12198                &ocpm_uuid_factory,
12199            )
12200            .with_operations(
12201                order
12202                    .operations
12203                    .iter()
12204                    .map(|o| format!("OP-{:04}", o.operation_number))
12205                    .collect::<Vec<_>>()
12206                    .iter()
12207                    .map(std::string::String::as_str)
12208                    .collect(),
12209            );
12210            // Link quality inspection if available (via reference_id matching order_id)
12211            if let Some(insp) = manufacturing
12212                .quality_inspections
12213                .iter()
12214                .find(|i| i.reference_id == order.order_id)
12215            {
12216                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12217            }
12218            // Link cycle count if available (match by material_id in items)
12219            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12220                cc.items
12221                    .iter()
12222                    .any(|item| item.material_id == order.material_id)
12223            }) {
12224                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12225            }
12226            let start_time = base_datetime - chrono::Duration::days(60);
12227            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12228            add_result(&mut event_log, result);
12229
12230            if let Some(pb) = &pb {
12231                pb.inc(1);
12232            }
12233        }
12234
12235        // Generate events from Banking customers
12236        for customer in &banking.customers {
12237            let customer_id_str = customer.customer_id.to_string();
12238            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12239            // Link accounts (primary_owner_id matches customer_id)
12240            if let Some(account) = banking
12241                .accounts
12242                .iter()
12243                .find(|a| a.primary_owner_id == customer.customer_id)
12244            {
12245                let account_id_str = account.account_id.to_string();
12246                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12247                // Link transactions for this account
12248                let txn_strs: Vec<String> = banking
12249                    .transactions
12250                    .iter()
12251                    .filter(|t| t.account_id == account.account_id)
12252                    .take(10)
12253                    .map(|t| t.transaction_id.to_string())
12254                    .collect();
12255                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12256                let txn_amounts: Vec<rust_decimal::Decimal> = banking
12257                    .transactions
12258                    .iter()
12259                    .filter(|t| t.account_id == account.account_id)
12260                    .take(10)
12261                    .map(|t| t.amount)
12262                    .collect();
12263                if !txn_ids.is_empty() {
12264                    docs = docs.with_transactions(txn_ids, txn_amounts);
12265                }
12266            }
12267            let start_time = base_datetime - chrono::Duration::days(180);
12268            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12269            add_result(&mut event_log, result);
12270
12271            if let Some(pb) = &pb {
12272                pb.inc(1);
12273            }
12274        }
12275
12276        // Generate events from Audit engagements
12277        for engagement in &audit.engagements {
12278            let engagement_id_str = engagement.engagement_id.to_string();
12279            let docs = AuditDocuments::new(
12280                &engagement_id_str,
12281                &engagement.client_entity_id,
12282                &ocpm_uuid_factory,
12283            )
12284            .with_workpapers(
12285                audit
12286                    .workpapers
12287                    .iter()
12288                    .filter(|w| w.engagement_id == engagement.engagement_id)
12289                    .take(10)
12290                    .map(|w| w.workpaper_id.to_string())
12291                    .collect::<Vec<_>>()
12292                    .iter()
12293                    .map(std::string::String::as_str)
12294                    .collect(),
12295            )
12296            .with_evidence(
12297                audit
12298                    .evidence
12299                    .iter()
12300                    .filter(|e| e.engagement_id == engagement.engagement_id)
12301                    .take(10)
12302                    .map(|e| e.evidence_id.to_string())
12303                    .collect::<Vec<_>>()
12304                    .iter()
12305                    .map(std::string::String::as_str)
12306                    .collect(),
12307            )
12308            .with_risks(
12309                audit
12310                    .risk_assessments
12311                    .iter()
12312                    .filter(|r| r.engagement_id == engagement.engagement_id)
12313                    .take(5)
12314                    .map(|r| r.risk_id.to_string())
12315                    .collect::<Vec<_>>()
12316                    .iter()
12317                    .map(std::string::String::as_str)
12318                    .collect(),
12319            )
12320            .with_findings(
12321                audit
12322                    .findings
12323                    .iter()
12324                    .filter(|f| f.engagement_id == engagement.engagement_id)
12325                    .take(5)
12326                    .map(|f| f.finding_id.to_string())
12327                    .collect::<Vec<_>>()
12328                    .iter()
12329                    .map(std::string::String::as_str)
12330                    .collect(),
12331            )
12332            .with_judgments(
12333                audit
12334                    .judgments
12335                    .iter()
12336                    .filter(|j| j.engagement_id == engagement.engagement_id)
12337                    .take(5)
12338                    .map(|j| j.judgment_id.to_string())
12339                    .collect::<Vec<_>>()
12340                    .iter()
12341                    .map(std::string::String::as_str)
12342                    .collect(),
12343            );
12344            let start_time = base_datetime - chrono::Duration::days(120);
12345            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12346            add_result(&mut event_log, result);
12347
12348            if let Some(pb) = &pb {
12349                pb.inc(1);
12350            }
12351        }
12352
12353        // Generate events from Bank Reconciliations
12354        for recon in &financial_reporting.bank_reconciliations {
12355            let docs = BankReconDocuments::new(
12356                &recon.reconciliation_id,
12357                &recon.bank_account_id,
12358                &recon.company_code,
12359                recon.bank_ending_balance,
12360                &ocpm_uuid_factory,
12361            )
12362            .with_statement_lines(
12363                recon
12364                    .statement_lines
12365                    .iter()
12366                    .take(20)
12367                    .map(|l| l.line_id.as_str())
12368                    .collect(),
12369            )
12370            .with_reconciling_items(
12371                recon
12372                    .reconciling_items
12373                    .iter()
12374                    .take(10)
12375                    .map(|i| i.item_id.as_str())
12376                    .collect(),
12377            );
12378            let start_time = base_datetime - chrono::Duration::days(30);
12379            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12380            add_result(&mut event_log, result);
12381
12382            if let Some(pb) = &pb {
12383                pb.inc(1);
12384            }
12385        }
12386
12387        // Compute process variants
12388        event_log.compute_variants();
12389
12390        let summary = event_log.summary();
12391
12392        if let Some(pb) = pb {
12393            pb.finish_with_message(format!(
12394                "Generated {} OCPM events, {} objects",
12395                summary.event_count, summary.object_count
12396            ));
12397        }
12398
12399        Ok(OcpmSnapshot {
12400            event_count: summary.event_count,
12401            object_count: summary.object_count,
12402            case_count: summary.case_count,
12403            event_log: Some(event_log),
12404        })
12405    }
12406
12407    /// Inject anomalies into journal entries.
12408    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12409        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12410
12411        // Read anomaly rates from config instead of using hardcoded values.
12412        // Priority: anomaly_injection config > fraud config > default 0.02
12413        let total_rate = if self.config.anomaly_injection.enabled {
12414            self.config.anomaly_injection.rates.total_rate
12415        } else if self.config.fraud.enabled {
12416            self.config.fraud.fraud_rate
12417        } else {
12418            0.02
12419        };
12420
12421        let fraud_rate = if self.config.anomaly_injection.enabled {
12422            self.config.anomaly_injection.rates.fraud_rate
12423        } else {
12424            AnomalyRateConfig::default().fraud_rate
12425        };
12426
12427        let error_rate = if self.config.anomaly_injection.enabled {
12428            self.config.anomaly_injection.rates.error_rate
12429        } else {
12430            AnomalyRateConfig::default().error_rate
12431        };
12432
12433        let process_issue_rate = if self.config.anomaly_injection.enabled {
12434            self.config.anomaly_injection.rates.process_rate
12435        } else {
12436            AnomalyRateConfig::default().process_issue_rate
12437        };
12438
12439        let anomaly_config = AnomalyInjectorConfig {
12440            rates: AnomalyRateConfig {
12441                total_rate,
12442                fraud_rate,
12443                error_rate,
12444                process_issue_rate,
12445                ..Default::default()
12446            },
12447            // Fraud behavioral-bias signatures now flow from config (the subtlety lever); defaults
12448            // match the engine's historical hardcoded values, so output is unchanged unless overridden.
12449            enhanced: EnhancedInjectionConfig {
12450                fraud_behavioral_bias: self.config.fraud.effective_bias().to_core(),
12451                // Persistent fraud campaigns (A1) — off unless config opts in.
12452                fraud_campaign: self.config.fraud.campaigns.clone(),
12453                ..Default::default()
12454            },
12455            seed: self.seed + 5000,
12456            ..Default::default()
12457        };
12458
12459        let mut injector = AnomalyInjector::new(anomaly_config);
12460        let result = injector.process_entries(entries);
12461
12462        // Central concentration abstraction (#143, Phase 1): run the post-process
12463        // pipeline AFTER per-entry strategies. The pipeline merges the SOTA-12
12464        // tagger + new passes (trading-partner pool, Phase-2 account substitution)
12465        // through a single integration point — see
12466        // docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md.
12467        //
12468        // Back-compat: the legacy `anomaly_injection.source_conditional_rarity_rate`
12469        // key remains honored. If `concentration.source_conditional_rarity` is also
12470        // set in the same config, the unified DSL field wins.
12471        let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12472            use datasynth_config::schema::{
12473                ConcentrationConfig, ConsolidationOutlierPassConfig,
12474                SourceConditionalRarityPassConfig,
12475            };
12476            use datasynth_generators::concentration::ConcentrationPipeline;
12477
12478            // Decide effective ConcentrationConfig: start from user config, then
12479            // back-fill from the legacy SOTA-12 key if the unified DSL didn't set it.
12480            let mut effective: ConcentrationConfig = self.config.concentration.clone();
12481            if effective.source_conditional_rarity.is_none() {
12482                if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12483                    effective.enabled = true;
12484                    effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12485                        rate,
12486                        min_surprise: None,
12487                        min_per_source_lines: None,
12488                    });
12489                }
12490            }
12491            // v5.30 B2 (#154) — back-compat: surface
12492            // `anomaly_injection.rates.consolidation_outlier_rate` as a
12493            // `ConsolidationOutlierPassConfig` if the unified DSL didn't
12494            // set one. Default 0.001 baseline shipped via the schema's
12495            // `default_consolidation_outlier_rate` — only synthesise the
12496            // pass when the rate is > 0, otherwise it's a no-op anyway.
12497            if effective.consolidation_outlier.is_none() {
12498                let rate = self
12499                    .config
12500                    .anomaly_injection
12501                    .rates
12502                    .consolidation_outlier_rate;
12503                if rate > 0.0 {
12504                    effective.enabled = true;
12505                    effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12506                        rate,
12507                        ..Default::default()
12508                    });
12509                }
12510            }
12511
12512            if !effective.enabled {
12513                (0, 0)
12514            } else {
12515                let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12516                    SynthError::generation(format!(
12517                        "ConcentrationPipeline construction failed: {e}"
12518                    ))
12519                })?;
12520                if !pipeline.is_active() {
12521                    (0, 0)
12522                } else {
12523                    // Per-pipeline seed disjoint from every other generator stream.
12524                    const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12525                    let stats =
12526                        pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12527                    let sota12: usize = stats
12528                        .iter()
12529                        .filter(|s| s.pass == "source_conditional_rarity")
12530                        .map(|s| s.entries_modified)
12531                        .sum();
12532                    let consol: usize = stats
12533                        .iter()
12534                        .filter(|s| s.pass == "consolidation_outlier")
12535                        .map(|s| s.entries_modified)
12536                        .sum();
12537                    (sota12, consol)
12538                }
12539            }
12540        };
12541
12542        if let Some(pb) = &pb {
12543            pb.inc(entries.len() as u64);
12544            pb.finish_with_message("Anomaly injection complete");
12545        }
12546
12547        let mut by_type = HashMap::new();
12548        for label in &result.labels {
12549            *by_type
12550                .entry(format!("{:?}", label.anomaly_type))
12551                .or_insert(0) += 1;
12552        }
12553        if sota12_tagged > 0 {
12554            *by_type
12555                .entry("SourceConditionalRarity".to_string())
12556                .or_insert(0) += sota12_tagged;
12557        }
12558        // v5.30 B2 (#154): record the consolidation-outlier expansion
12559        // count under a stable label key so the orchestrator's run
12560        // report surfaces the heavy-tail emission rate alongside the
12561        // other anomaly buckets.
12562        if consolidation_outlier_expanded > 0 {
12563            *by_type
12564                .entry("ConsolidationOutlier".to_string())
12565                .or_insert(0) += consolidation_outlier_expanded;
12566        }
12567
12568        Ok(AnomalyLabels {
12569            labels: result.labels,
12570            summary: Some(result.summary),
12571            by_type,
12572            carry_forward: result.carry_forward,
12573        })
12574    }
12575
12576    /// Validate journal entries using running balance tracker.
12577    ///
12578    /// Applies all entries to the balance tracker and validates:
12579    /// - Each entry is internally balanced (debits = credits)
12580    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
12581    ///
12582    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
12583    /// excluded from balance validation as they may be intentionally unbalanced.
12584    fn validate_journal_entries(
12585        &mut self,
12586        entries: &[JournalEntry],
12587    ) -> SynthResult<BalanceValidationResult> {
12588        // Filter out entries with human errors as they may be intentionally unbalanced
12589        let clean_entries: Vec<&JournalEntry> = entries
12590            .iter()
12591            .filter(|e| {
12592                e.header
12593                    .header_text
12594                    .as_ref()
12595                    .map(|t| !t.contains("[HUMAN_ERROR:"))
12596                    .unwrap_or(true)
12597            })
12598            .collect();
12599
12600        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12601
12602        // Configure tracker to not fail on errors (collect them instead)
12603        let config = BalanceTrackerConfig {
12604            validate_on_each_entry: false,   // We'll validate at the end
12605            track_history: false,            // Skip history for performance
12606            fail_on_validation_error: false, // Collect errors, don't fail
12607            ..Default::default()
12608        };
12609        let validation_currency = self
12610            .config
12611            .companies
12612            .first()
12613            .map(|c| c.currency.clone())
12614            .unwrap_or_else(|| "USD".to_string());
12615
12616        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12617
12618        // Apply clean entries (without human errors)
12619        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12620        let errors = tracker.apply_entries(&clean_refs);
12621
12622        if let Some(pb) = &pb {
12623            pb.inc(entries.len() as u64);
12624        }
12625
12626        // Check if any entries were unbalanced
12627        // Note: When fail_on_validation_error is false, errors are stored in tracker
12628        let has_unbalanced = tracker
12629            .get_validation_errors()
12630            .iter()
12631            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12632
12633        // Validate balance sheet for each company
12634        // Include both returned errors and collected validation errors
12635        let mut all_errors = errors;
12636        all_errors.extend(tracker.get_validation_errors().iter().cloned());
12637        let company_codes: Vec<String> = self
12638            .config
12639            .companies
12640            .iter()
12641            .map(|c| c.code.clone())
12642            .collect();
12643
12644        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12645            .map(|d| d + chrono::Months::new(self.config.global.period_months))
12646            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12647
12648        for company_code in &company_codes {
12649            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12650                all_errors.push(e);
12651            }
12652        }
12653
12654        // Get statistics after all mutable operations are done
12655        let stats = tracker.get_statistics();
12656
12657        // Determine if balanced overall
12658        let is_balanced = all_errors.is_empty();
12659
12660        if let Some(pb) = pb {
12661            let msg = if is_balanced {
12662                "Balance validation passed"
12663            } else {
12664                "Balance validation completed with errors"
12665            };
12666            pb.finish_with_message(msg);
12667        }
12668
12669        Ok(BalanceValidationResult {
12670            validated: true,
12671            is_balanced,
12672            entries_processed: stats.entries_processed,
12673            total_debits: stats.total_debits,
12674            total_credits: stats.total_credits,
12675            accounts_tracked: stats.accounts_tracked,
12676            companies_tracked: stats.companies_tracked,
12677            validation_errors: all_errors,
12678            has_unbalanced_entries: has_unbalanced,
12679        })
12680    }
12681
12682    /// Inject data quality variations into journal entries.
12683    ///
12684    /// Applies typos, missing values, and format variations to make
12685    /// the synthetic data more realistic for testing data cleaning pipelines.
12686    fn inject_data_quality(
12687        &mut self,
12688        entries: &mut [JournalEntry],
12689    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12690        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12691
12692        // Build config from user-specified schema settings when data_quality is enabled;
12693        // otherwise fall back to the low-rate minimal() preset.
12694        let config = if self.config.data_quality.enabled {
12695            let dq = &self.config.data_quality;
12696            // Propagate per-field rates and protected fields from the schema
12697            // so users can dial in real-production NULL profiles per field
12698            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
12699            let field_rates = dq.missing_values.field_rates.clone();
12700            let mut required_fields: std::collections::HashSet<String> =
12701                dq.missing_values.protected_fields.iter().cloned().collect();
12702            // Always preserve audit-critical identifiers regardless of
12703            // user config — losing these breaks downstream joins.
12704            for f in [
12705                "document_id",
12706                "company_code",
12707                "posting_date",
12708                "fiscal_year",
12709                "fiscal_period",
12710                "gl_account",
12711                "line_number",
12712                "transaction_id",
12713            ] {
12714                required_fields.insert(f.to_string());
12715            }
12716            DataQualityConfig {
12717                enable_missing_values: dq.missing_values.enabled,
12718                missing_values: datasynth_generators::MissingValueConfig {
12719                    global_rate: dq.effective_missing_rate(),
12720                    field_rates,
12721                    required_fields,
12722                    ..Default::default()
12723                },
12724                enable_format_variations: dq.format_variations.enabled,
12725                format_variations: datasynth_generators::FormatVariationConfig {
12726                    date_variation_rate: dq.format_variations.dates.rate,
12727                    amount_variation_rate: dq.format_variations.amounts.rate,
12728                    identifier_variation_rate: dq.format_variations.identifiers.rate,
12729                    ..Default::default()
12730                },
12731                enable_duplicates: dq.duplicates.enabled,
12732                duplicates: datasynth_generators::DuplicateConfig {
12733                    duplicate_rate: dq.effective_duplicate_rate(),
12734                    ..Default::default()
12735                },
12736                enable_typos: dq.typos.enabled,
12737                typos: datasynth_generators::TypoConfig {
12738                    char_error_rate: dq.effective_typo_rate(),
12739                    ..Default::default()
12740                },
12741                enable_encoding_issues: dq.encoding_issues.enabled,
12742                encoding_issue_rate: dq.encoding_issues.rate,
12743                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
12744                track_statistics: true,
12745            }
12746        } else {
12747            DataQualityConfig::minimal()
12748        };
12749        let mut injector = DataQualityInjector::new(config);
12750
12751        // Wire country pack for locale-aware format baselines
12752        injector.set_country_pack(self.primary_pack().clone());
12753
12754        // Build context for missing value decisions
12755        let context = HashMap::new();
12756
12757        for entry in entries.iter_mut() {
12758            // Process header_text field (common target for typos)
12759            if let Some(text) = &entry.header.header_text {
12760                let processed = injector.process_text_field(
12761                    "header_text",
12762                    text,
12763                    &entry.header.document_id.to_string(),
12764                    &context,
12765                );
12766                match processed {
12767                    Some(new_text) if new_text != *text => {
12768                        entry.header.header_text = Some(new_text);
12769                    }
12770                    None => {
12771                        entry.header.header_text = None; // Missing value
12772                    }
12773                    _ => {}
12774                }
12775            }
12776
12777            // Process reference field
12778            if let Some(ref_text) = &entry.header.reference {
12779                let processed = injector.process_text_field(
12780                    "reference",
12781                    ref_text,
12782                    &entry.header.document_id.to_string(),
12783                    &context,
12784                );
12785                match processed {
12786                    Some(new_text) if new_text != *ref_text => {
12787                        entry.header.reference = Some(new_text);
12788                    }
12789                    None => {
12790                        entry.header.reference = None;
12791                    }
12792                    _ => {}
12793                }
12794            }
12795
12796            // Process user_persona field (potential for typos in user IDs)
12797            let user_persona = entry.header.user_persona.clone();
12798            if let Some(processed) = injector.process_text_field(
12799                "user_persona",
12800                &user_persona,
12801                &entry.header.document_id.to_string(),
12802                &context,
12803            ) {
12804                if processed != user_persona {
12805                    entry.header.user_persona = processed;
12806                }
12807            }
12808
12809            // Process line items
12810            for line in &mut entry.lines {
12811                // Process line description if present
12812                if let Some(ref text) = line.line_text {
12813                    let processed = injector.process_text_field(
12814                        "line_text",
12815                        text,
12816                        &entry.header.document_id.to_string(),
12817                        &context,
12818                    );
12819                    match processed {
12820                        Some(new_text) if new_text != *text => {
12821                            line.line_text = Some(new_text);
12822                        }
12823                        None => {
12824                            line.line_text = None;
12825                        }
12826                        _ => {}
12827                    }
12828                }
12829
12830                // Process cost_center if present
12831                if let Some(cc) = &line.cost_center {
12832                    let processed = injector.process_text_field(
12833                        "cost_center",
12834                        cc,
12835                        &entry.header.document_id.to_string(),
12836                        &context,
12837                    );
12838                    match processed {
12839                        Some(new_cc) if new_cc != *cc => {
12840                            line.cost_center = Some(new_cc);
12841                        }
12842                        None => {
12843                            line.cost_center = None;
12844                        }
12845                        _ => {}
12846                    }
12847                }
12848
12849                // Extended field coverage (v5.6+): apply NULL injection to
12850                // every Option<String> on the line so users can match
12851                // arbitrary real-production NULL profiles via
12852                // `data_quality.missing_values.field_rates`.
12853                //
12854                // Macro-free helper: process_field returns the new value
12855                // ({Some, None, unchanged}) and we apply it back.
12856                macro_rules! process_opt_field {
12857                    ($field_name:expr, $opt:expr) => {
12858                        if let Some(val) = $opt.as_ref() {
12859                            match injector.process_text_field(
12860                                $field_name,
12861                                val,
12862                                &entry.header.document_id.to_string(),
12863                                &context,
12864                            ) {
12865                                Some(new_val) if new_val != *val => {
12866                                    *$opt = Some(new_val);
12867                                }
12868                                None => {
12869                                    *$opt = None;
12870                                }
12871                                _ => {}
12872                            }
12873                        }
12874                    };
12875                }
12876
12877                process_opt_field!("profit_center", &mut line.profit_center);
12878                process_opt_field!("assignment", &mut line.assignment);
12879                process_opt_field!("tax_code", &mut line.tax_code);
12880                process_opt_field!("account_description", &mut line.account_description);
12881                process_opt_field!(
12882                    "auxiliary_account_number",
12883                    &mut line.auxiliary_account_number
12884                );
12885                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12886                process_opt_field!("lettrage", &mut line.lettrage);
12887            }
12888
12889            if let Some(pb) = &pb {
12890                pb.inc(1);
12891            }
12892        }
12893
12894        if let Some(pb) = pb {
12895            pb.finish_with_message("Data quality injection complete");
12896        }
12897
12898        let quality_issues = injector.issues().to_vec();
12899        Ok((injector.stats().clone(), quality_issues))
12900    }
12901
12902    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12903    ///
12904    /// Creates complete audit documentation for each company in the configuration,
12905    /// following ISA standards:
12906    /// - ISA 210/220: Engagement acceptance and terms
12907    /// - ISA 230: Audit documentation (workpapers)
12908    /// - ISA 265: Control deficiencies (findings)
12909    /// - ISA 315/330: Risk assessment and response
12910    /// - ISA 500: Audit evidence
12911    /// - ISA 200: Professional judgment
12912    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12913        // Check if FSM-driven audit generation is enabled
12914        let use_fsm = self
12915            .config
12916            .audit
12917            .fsm
12918            .as_ref()
12919            .map(|f| f.enabled)
12920            .unwrap_or(false);
12921
12922        if use_fsm {
12923            return self.generate_audit_data_with_fsm(entries);
12924        }
12925
12926        // --- Legacy (non-FSM) audit generation follows ---
12927        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12928            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12929        let fiscal_year = start_date.year() as u16;
12930        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12931
12932        // Calculate rough total revenue from entries for materiality
12933        let total_revenue: rust_decimal::Decimal = entries
12934            .iter()
12935            .flat_map(|e| e.lines.iter())
12936            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12937            .map(|l| l.credit_amount)
12938            .sum();
12939
12940        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12941        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12942
12943        let mut snapshot = AuditSnapshot::default();
12944
12945        // Initialize generators
12946        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12947        // v3.3.2: thread the user-facing audit schema config into the
12948        // engagement generator (team size range).
12949        engagement_gen.set_team_config(&self.config.audit.team);
12950
12951        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12952        // v3.3.2: thread workpaper + review workflow schema config into
12953        // the workpaper generator (per-section count range + review
12954        // delay ranges).
12955        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12956        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12957        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12958        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12959        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12960        finding_gen.set_template_provider(self.template_provider.clone());
12961        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12962        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12963        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12964        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12965        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12966        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12967        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12968
12969        // Get list of accounts from CoA for risk assessment
12970        let accounts: Vec<String> = self
12971            .coa
12972            .as_ref()
12973            .map(|coa| {
12974                coa.get_postable_accounts()
12975                    .iter()
12976                    .map(|acc| acc.account_code().to_string())
12977                    .collect()
12978            })
12979            .unwrap_or_default();
12980
12981        // Generate engagements for each company
12982        for (i, company) in self.config.companies.iter().enumerate() {
12983            // Calculate company-specific revenue (proportional to volume weight)
12984            let company_revenue = total_revenue
12985                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12986
12987            // Generate engagements for this company
12988            let engagements_for_company =
12989                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12990            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12991                1
12992            } else {
12993                0
12994            };
12995
12996            for _eng_idx in 0..(engagements_for_company + extra) {
12997                // v3.3.2: draw engagement type from the user-configured
12998                // distribution instead of always using the default
12999                // (AnnualAudit). Falls back to the default when all
13000                // probabilities are zero.
13001                let eng_type =
13002                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
13003
13004                // Generate the engagement
13005                let mut engagement = engagement_gen.generate_engagement(
13006                    &company.code,
13007                    &company.name,
13008                    fiscal_year,
13009                    period_end,
13010                    company_revenue,
13011                    Some(eng_type),
13012                );
13013
13014                // Replace synthetic team IDs with real employee IDs from master data
13015                if !self.master_data.employees.is_empty() {
13016                    let emp_count = self.master_data.employees.len();
13017                    // Use employee IDs deterministically based on engagement index
13018                    let base = (i * 10 + _eng_idx) % emp_count;
13019                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
13020                        .employee_id
13021                        .clone();
13022                    engagement.engagement_manager_id = self.master_data.employees
13023                        [(base + 1) % emp_count]
13024                        .employee_id
13025                        .clone();
13026                    let real_team: Vec<String> = engagement
13027                        .team_member_ids
13028                        .iter()
13029                        .enumerate()
13030                        .map(|(j, _)| {
13031                            self.master_data.employees[(base + 2 + j) % emp_count]
13032                                .employee_id
13033                                .clone()
13034                        })
13035                        .collect();
13036                    engagement.team_member_ids = real_team;
13037                }
13038
13039                if let Some(pb) = &pb {
13040                    pb.inc(1);
13041                }
13042
13043                // Get team members from the engagement
13044                let team_members: Vec<String> = engagement.team_member_ids.clone();
13045
13046                // Generate workpapers for the engagement.
13047                // v3.3.2: honor `audit.generate_workpapers` — when false,
13048                // workpapers (and dependent evidence) are skipped while
13049                // the engagement itself, risk assessments, findings, etc.
13050                // still generate normally.
13051                let workpapers = if self.config.audit.generate_workpapers {
13052                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
13053                } else {
13054                    Vec::new()
13055                };
13056
13057                for wp in &workpapers {
13058                    if let Some(pb) = &pb {
13059                        pb.inc(1);
13060                    }
13061
13062                    // Generate evidence for each workpaper
13063                    let evidence = evidence_gen.generate_evidence_for_workpaper(
13064                        wp,
13065                        &team_members,
13066                        wp.preparer_date,
13067                    );
13068
13069                    for _ in &evidence {
13070                        if let Some(pb) = &pb {
13071                            pb.inc(1);
13072                        }
13073                    }
13074
13075                    snapshot.evidence.extend(evidence);
13076                }
13077
13078                // Generate risk assessments for the engagement
13079                let risks =
13080                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
13081
13082                for _ in &risks {
13083                    if let Some(pb) = &pb {
13084                        pb.inc(1);
13085                    }
13086                }
13087                snapshot.risk_assessments.extend(risks);
13088
13089                // Generate findings for the engagement
13090                let findings = finding_gen.generate_findings_for_engagement(
13091                    &engagement,
13092                    &workpapers,
13093                    &team_members,
13094                );
13095
13096                for _ in &findings {
13097                    if let Some(pb) = &pb {
13098                        pb.inc(1);
13099                    }
13100                }
13101                snapshot.findings.extend(findings);
13102
13103                // Generate professional judgments for the engagement
13104                let judgments =
13105                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
13106
13107                for _ in &judgments {
13108                    if let Some(pb) = &pb {
13109                        pb.inc(1);
13110                    }
13111                }
13112                snapshot.judgments.extend(judgments);
13113
13114                // ISA 505: External confirmations and responses
13115                let (confs, resps) =
13116                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
13117                snapshot.confirmations.extend(confs);
13118                snapshot.confirmation_responses.extend(resps);
13119
13120                // ISA 330: Procedure steps per workpaper
13121                let team_pairs: Vec<(String, String)> = team_members
13122                    .iter()
13123                    .map(|id| {
13124                        let name = self
13125                            .master_data
13126                            .employees
13127                            .iter()
13128                            .find(|e| e.employee_id == *id)
13129                            .map(|e| e.display_name.clone())
13130                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
13131                        (id.clone(), name)
13132                    })
13133                    .collect();
13134                for wp in &workpapers {
13135                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
13136                    snapshot.procedure_steps.extend(steps);
13137                }
13138
13139                // ISA 530: Samples per workpaper
13140                for wp in &workpapers {
13141                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
13142                        snapshot.samples.push(sample);
13143                    }
13144                }
13145
13146                // ISA 520: Analytical procedures
13147                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
13148                snapshot.analytical_results.extend(analytical);
13149
13150                // ISA 610: Internal audit function and reports
13151                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
13152                snapshot.ia_functions.push(ia_func);
13153                snapshot.ia_reports.extend(ia_reports);
13154
13155                // ISA 550: Related parties and transactions
13156                let vendor_names: Vec<String> = self
13157                    .master_data
13158                    .vendors
13159                    .iter()
13160                    .map(|v| v.name.clone())
13161                    .collect();
13162                let customer_names: Vec<String> = self
13163                    .master_data
13164                    .customers
13165                    .iter()
13166                    .map(|c| c.name.clone())
13167                    .collect();
13168                let (parties, rp_txns) =
13169                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13170                snapshot.related_parties.extend(parties);
13171                snapshot.related_party_transactions.extend(rp_txns);
13172
13173                // Add workpapers after findings since findings need them
13174                snapshot.workpapers.extend(workpapers);
13175
13176                // Generate audit scope record for this engagement (one per engagement)
13177                {
13178                    let scope_id = format!(
13179                        "SCOPE-{}-{}",
13180                        engagement.engagement_id.simple(),
13181                        &engagement.client_entity_id
13182                    );
13183                    let scope = datasynth_core::models::audit::AuditScope::new(
13184                        scope_id.clone(),
13185                        engagement.engagement_id.to_string(),
13186                        engagement.client_entity_id.clone(),
13187                        engagement.materiality,
13188                    );
13189                    // Wire scope_id back to engagement
13190                    let mut eng = engagement;
13191                    eng.scope_id = Some(scope_id);
13192                    snapshot.audit_scopes.push(scope);
13193                    snapshot.engagements.push(eng);
13194                }
13195            }
13196        }
13197
13198        // ----------------------------------------------------------------
13199        // ISA 600: Group audit — component auditors, plan, instructions, reports
13200        // ----------------------------------------------------------------
13201        if self.config.companies.len() > 1 {
13202            // Use materiality from the first engagement if available, otherwise
13203            // derive a reasonable figure from total revenue.
13204            let group_materiality = snapshot
13205                .engagements
13206                .first()
13207                .map(|e| e.materiality)
13208                .unwrap_or_else(|| {
13209                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13210                    total_revenue * pct
13211                });
13212
13213            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13214            let group_engagement_id = snapshot
13215                .engagements
13216                .first()
13217                .map(|e| e.engagement_id.to_string())
13218                .unwrap_or_else(|| "GROUP-ENG".to_string());
13219
13220            let component_snapshot = component_gen.generate(
13221                &self.config.companies,
13222                group_materiality,
13223                &group_engagement_id,
13224                period_end,
13225            );
13226
13227            snapshot.component_auditors = component_snapshot.component_auditors;
13228            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13229            snapshot.component_instructions = component_snapshot.component_instructions;
13230            snapshot.component_reports = component_snapshot.component_reports;
13231
13232            info!(
13233                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13234                snapshot.component_auditors.len(),
13235                snapshot.component_instructions.len(),
13236                snapshot.component_reports.len(),
13237            );
13238        }
13239
13240        // ----------------------------------------------------------------
13241        // ISA 210: Engagement letters — one per engagement
13242        // ----------------------------------------------------------------
13243        {
13244            let applicable_framework = self
13245                .config
13246                .accounting_standards
13247                .framework
13248                .as_ref()
13249                .map(|f| format!("{f:?}"))
13250                .unwrap_or_else(|| "IFRS".to_string());
13251
13252            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13253            let entity_count = self.config.companies.len();
13254
13255            for engagement in &snapshot.engagements {
13256                let company = self
13257                    .config
13258                    .companies
13259                    .iter()
13260                    .find(|c| c.code == engagement.client_entity_id);
13261                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13262                let letter_date = engagement.planning_start;
13263                let letter = letter_gen.generate(
13264                    &engagement.engagement_id.to_string(),
13265                    &engagement.client_name,
13266                    entity_count,
13267                    engagement.period_end_date,
13268                    currency,
13269                    &applicable_framework,
13270                    letter_date,
13271                );
13272                snapshot.engagement_letters.push(letter);
13273            }
13274
13275            info!(
13276                "ISA 210 engagement letters: {} generated",
13277                snapshot.engagement_letters.len()
13278            );
13279        }
13280
13281        // ----------------------------------------------------------------
13282        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
13283        // ----------------------------------------------------------------
13284        if self.phase_config.generate_legal_documents {
13285            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13286            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13287            for engagement in &snapshot.engagements {
13288                // Build an employee name list for signatory drawing —
13289                // prefer employees from the engaged entity, fall back to
13290                // all employees.
13291                let employee_names: Vec<String> = self
13292                    .master_data
13293                    .employees
13294                    .iter()
13295                    .filter(|e| e.company_code == engagement.client_entity_id)
13296                    .map(|e| e.display_name.clone())
13297                    .collect();
13298                let names_to_use = if !employee_names.is_empty() {
13299                    employee_names
13300                } else {
13301                    self.master_data
13302                        .employees
13303                        .iter()
13304                        .take(10)
13305                        .map(|e| e.display_name.clone())
13306                        .collect()
13307                };
13308                let docs = legal_gen.generate(
13309                    &engagement.client_entity_id,
13310                    engagement.fiscal_year as i32,
13311                    &names_to_use,
13312                );
13313                snapshot.legal_documents.extend(docs);
13314            }
13315            info!(
13316                "v3.3.0 legal documents: {} emitted across {} engagements",
13317                snapshot.legal_documents.len(),
13318                snapshot.engagements.len()
13319            );
13320        }
13321
13322        // ----------------------------------------------------------------
13323        // v3.3.0: IT general controls — access logs + change records
13324        //
13325        // `ItControlsGenerator` runs one pass per company (not per
13326        // engagement) so employee sets and system catalogs stay
13327        // coherent. We derive the period from the earliest engagement's
13328        // planning_start through the latest engagement's period_end_date
13329        // for each company.
13330        // ----------------------------------------------------------------
13331        if self.phase_config.generate_it_controls {
13332            use datasynth_generators::it_controls_generator::ItControlsGenerator;
13333            use std::collections::HashMap;
13334            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13335
13336            // Group engagements by company to produce one IT-controls
13337            // window per entity.
13338            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13339                HashMap::new();
13340            for engagement in &snapshot.engagements {
13341                let entry = by_company
13342                    .entry(engagement.client_entity_id.clone())
13343                    .or_insert((engagement.planning_start, engagement.period_end_date));
13344                if engagement.planning_start < entry.0 {
13345                    entry.0 = engagement.planning_start;
13346                }
13347                if engagement.period_end_date > entry.1 {
13348                    entry.1 = engagement.period_end_date;
13349                }
13350            }
13351
13352            // Standard system catalog — populated from known ERP / app
13353            // names. Keeps the generator's data shape stable when the
13354            // user hasn't configured IT-system naming separately.
13355            let systems: Vec<String> = vec![
13356                "SAP ECC",
13357                "SAP S/4 HANA",
13358                "Oracle EBS",
13359                "Workday",
13360                "NetSuite",
13361                "Active Directory",
13362                "SharePoint",
13363                "Salesforce",
13364                "ServiceNow",
13365                "Jira",
13366                "GitHub Enterprise",
13367                "AWS Console",
13368                "Okta",
13369            ]
13370            .into_iter()
13371            .map(String::from)
13372            .collect();
13373
13374            for (company_code, (start, end)) in by_company {
13375                let emps: Vec<(String, String)> = self
13376                    .master_data
13377                    .employees
13378                    .iter()
13379                    .filter(|e| e.company_code == company_code)
13380                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13381                    .collect();
13382                if emps.is_empty() {
13383                    continue;
13384                }
13385                // Compute period in months, rounded up to the nearest
13386                // whole month (min 1).
13387                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13388                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13389                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13390                snapshot.it_controls_access_logs.extend(access_logs);
13391                snapshot.it_controls_change_records.extend(change_records);
13392            }
13393
13394            info!(
13395                "v3.3.0 IT controls: {} access logs, {} change records",
13396                snapshot.it_controls_access_logs.len(),
13397                snapshot.it_controls_change_records.len()
13398            );
13399        }
13400
13401        // ----------------------------------------------------------------
13402        // ISA 560 / IAS 10: Subsequent events
13403        // ----------------------------------------------------------------
13404        {
13405            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13406            let entity_codes: Vec<String> = self
13407                .config
13408                .companies
13409                .iter()
13410                .map(|c| c.code.clone())
13411                .collect();
13412            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13413            info!(
13414                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13415                subsequent.len(),
13416                subsequent
13417                    .iter()
13418                    .filter(|e| matches!(
13419                        e.classification,
13420                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13421                    ))
13422                    .count(),
13423                subsequent
13424                    .iter()
13425                    .filter(|e| matches!(
13426                        e.classification,
13427                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13428                    ))
13429                    .count(),
13430            );
13431            snapshot.subsequent_events = subsequent;
13432        }
13433
13434        // ----------------------------------------------------------------
13435        // ISA 402: Service organization controls
13436        // ----------------------------------------------------------------
13437        {
13438            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13439            let entity_codes: Vec<String> = self
13440                .config
13441                .companies
13442                .iter()
13443                .map(|c| c.code.clone())
13444                .collect();
13445            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13446            info!(
13447                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13448                soc_snapshot.service_organizations.len(),
13449                soc_snapshot.soc_reports.len(),
13450                soc_snapshot.user_entity_controls.len(),
13451            );
13452            snapshot.service_organizations = soc_snapshot.service_organizations;
13453            snapshot.soc_reports = soc_snapshot.soc_reports;
13454            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13455        }
13456
13457        // ----------------------------------------------------------------
13458        // ISA 570: Going concern assessments
13459        // ----------------------------------------------------------------
13460        {
13461            use datasynth_generators::audit::going_concern_generator::{
13462                GoingConcernGenerator, GoingConcernInput,
13463            };
13464            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13465            let entity_codes: Vec<String> = self
13466                .config
13467                .companies
13468                .iter()
13469                .map(|c| c.code.clone())
13470                .collect();
13471            // Assessment date = period end + 75 days (typical sign-off window).
13472            let assessment_date = period_end + chrono::Duration::days(75);
13473            let period_label = format!("FY{}", period_end.year());
13474
13475            // Build financial inputs from actual journal entries.
13476            //
13477            // We derive approximate P&L, working capital, and operating cash flow
13478            // by aggregating GL account balances from the journal entry population.
13479            // Account ranges used (standard chart):
13480            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
13481            //   Expenses:        6xxx (debit-normal)
13482            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
13483            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
13484            //   Operating CF:    net income adjusted for D&A (rough proxy)
13485            let gc_inputs: Vec<GoingConcernInput> = self
13486                .config
13487                .companies
13488                .iter()
13489                .map(|company| {
13490                    let code = &company.code;
13491                    let mut revenue = rust_decimal::Decimal::ZERO;
13492                    let mut expenses = rust_decimal::Decimal::ZERO;
13493                    let mut current_assets = rust_decimal::Decimal::ZERO;
13494                    let mut current_liabs = rust_decimal::Decimal::ZERO;
13495                    let mut total_debt = rust_decimal::Decimal::ZERO;
13496
13497                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
13498                        for line in &je.lines {
13499                            let acct = line.gl_account.as_str();
13500                            let net = line.debit_amount - line.credit_amount;
13501                            if acct.starts_with('4') {
13502                                // Revenue accounts: credit-normal, so negative net = revenue earned
13503                                revenue -= net;
13504                            } else if acct.starts_with('6') {
13505                                // Expense accounts: debit-normal
13506                                expenses += net;
13507                            }
13508                            // Balance sheet accounts for working capital
13509                            if acct.starts_with('1') {
13510                                // Current asset accounts (1000–1499)
13511                                if let Ok(n) = acct.parse::<u32>() {
13512                                    if (1000..=1499).contains(&n) {
13513                                        current_assets += net;
13514                                    }
13515                                }
13516                            } else if acct.starts_with('2') {
13517                                if let Ok(n) = acct.parse::<u32>() {
13518                                    if (2000..=2499).contains(&n) {
13519                                        // Current liabilities
13520                                        current_liabs -= net; // credit-normal
13521                                    } else if (2500..=2999).contains(&n) {
13522                                        // Long-term debt
13523                                        total_debt -= net;
13524                                    }
13525                                }
13526                            }
13527                        }
13528                    }
13529
13530                    let net_income = revenue - expenses;
13531                    let working_capital = current_assets - current_liabs;
13532                    // Rough operating CF proxy: net income (full accrual CF calculation
13533                    // is done separately in the cash flow statement generator)
13534                    let operating_cash_flow = net_income;
13535
13536                    GoingConcernInput {
13537                        entity_code: code.clone(),
13538                        net_income,
13539                        working_capital,
13540                        operating_cash_flow,
13541                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13542                        assessment_date,
13543                    }
13544                })
13545                .collect();
13546
13547            let assessments = if gc_inputs.is_empty() {
13548                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13549            } else {
13550                gc_gen.generate_for_entities_with_inputs(
13551                    &entity_codes,
13552                    &gc_inputs,
13553                    assessment_date,
13554                    &period_label,
13555                )
13556            };
13557            info!(
13558                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13559                assessments.len(),
13560                assessments.iter().filter(|a| matches!(
13561                    a.auditor_conclusion,
13562                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13563                )).count(),
13564                assessments.iter().filter(|a| matches!(
13565                    a.auditor_conclusion,
13566                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13567                )).count(),
13568                assessments.iter().filter(|a| matches!(
13569                    a.auditor_conclusion,
13570                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13571                )).count(),
13572            );
13573            snapshot.going_concern_assessments = assessments;
13574        }
13575
13576        // ----------------------------------------------------------------
13577        // ISA 540: Accounting estimates
13578        // ----------------------------------------------------------------
13579        {
13580            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13581            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13582            let entity_codes: Vec<String> = self
13583                .config
13584                .companies
13585                .iter()
13586                .map(|c| c.code.clone())
13587                .collect();
13588            let estimates = est_gen.generate_for_entities(&entity_codes);
13589            info!(
13590                "ISA 540 accounting estimates: {} estimates across {} entities \
13591                 ({} with retrospective reviews, {} with auditor point estimates)",
13592                estimates.len(),
13593                entity_codes.len(),
13594                estimates
13595                    .iter()
13596                    .filter(|e| e.retrospective_review.is_some())
13597                    .count(),
13598                estimates
13599                    .iter()
13600                    .filter(|e| e.auditor_point_estimate.is_some())
13601                    .count(),
13602            );
13603            snapshot.accounting_estimates = estimates;
13604        }
13605
13606        // ----------------------------------------------------------------
13607        // ISA 700/701/705/706: Audit opinions (one per engagement)
13608        // ----------------------------------------------------------------
13609        {
13610            use datasynth_generators::audit::audit_opinion_generator::{
13611                AuditOpinionGenerator, AuditOpinionInput,
13612            };
13613
13614            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13615
13616            // Build inputs — one per engagement, linking findings and going concern.
13617            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13618                .engagements
13619                .iter()
13620                .map(|eng| {
13621                    // Collect findings for this engagement.
13622                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13623                        .findings
13624                        .iter()
13625                        .filter(|f| f.engagement_id == eng.engagement_id)
13626                        .cloned()
13627                        .collect();
13628
13629                    // Going concern for this entity.
13630                    let gc = snapshot
13631                        .going_concern_assessments
13632                        .iter()
13633                        .find(|g| g.entity_code == eng.client_entity_id)
13634                        .cloned();
13635
13636                    // Component reports relevant to this engagement.
13637                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13638                        snapshot.component_reports.clone();
13639
13640                    let auditor = self
13641                        .master_data
13642                        .employees
13643                        .first()
13644                        .map(|e| e.display_name.clone())
13645                        .unwrap_or_else(|| "Global Audit LLP".into());
13646
13647                    let partner = self
13648                        .master_data
13649                        .employees
13650                        .get(1)
13651                        .map(|e| e.display_name.clone())
13652                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
13653
13654                    AuditOpinionInput {
13655                        entity_code: eng.client_entity_id.clone(),
13656                        entity_name: eng.client_name.clone(),
13657                        engagement_id: eng.engagement_id,
13658                        period_end: eng.period_end_date,
13659                        findings: eng_findings,
13660                        going_concern: gc,
13661                        component_reports: comp_reports,
13662                        // Mark as US-listed when audit standards include PCAOB.
13663                        is_us_listed: {
13664                            let fw = &self.config.audit_standards.isa_compliance.framework;
13665                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13666                        },
13667                        auditor_name: auditor,
13668                        engagement_partner: partner,
13669                    }
13670                })
13671                .collect();
13672
13673            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13674
13675            for go in &generated_opinions {
13676                snapshot
13677                    .key_audit_matters
13678                    .extend(go.key_audit_matters.clone());
13679            }
13680            snapshot.audit_opinions = generated_opinions
13681                .into_iter()
13682                .map(|go| go.opinion)
13683                .collect();
13684
13685            info!(
13686                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13687                snapshot.audit_opinions.len(),
13688                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13689                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13690                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13691                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13692            );
13693        }
13694
13695        // ----------------------------------------------------------------
13696        // SOX 302 / 404 assessments
13697        // ----------------------------------------------------------------
13698        {
13699            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13700
13701            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13702
13703            for (i, company) in self.config.companies.iter().enumerate() {
13704                // Collect findings for this company's engagements.
13705                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13706                    .engagements
13707                    .iter()
13708                    .filter(|e| e.client_entity_id == company.code)
13709                    .map(|e| e.engagement_id)
13710                    .collect();
13711
13712                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13713                    .findings
13714                    .iter()
13715                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13716                    .cloned()
13717                    .collect();
13718
13719                // Derive executive names from employee list.
13720                let emp_count = self.master_data.employees.len();
13721                let ceo_name = if emp_count > 0 {
13722                    self.master_data.employees[i % emp_count]
13723                        .display_name
13724                        .clone()
13725                } else {
13726                    format!("CEO of {}", company.name)
13727                };
13728                let cfo_name = if emp_count > 1 {
13729                    self.master_data.employees[(i + 1) % emp_count]
13730                        .display_name
13731                        .clone()
13732                } else {
13733                    format!("CFO of {}", company.name)
13734                };
13735
13736                // Use engagement materiality if available.
13737                let materiality = snapshot
13738                    .engagements
13739                    .iter()
13740                    .find(|e| e.client_entity_id == company.code)
13741                    .map(|e| e.materiality)
13742                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13743
13744                let input = SoxGeneratorInput {
13745                    company_code: company.code.clone(),
13746                    company_name: company.name.clone(),
13747                    fiscal_year,
13748                    period_end,
13749                    findings: company_findings,
13750                    ceo_name,
13751                    cfo_name,
13752                    materiality_threshold: materiality,
13753                    revenue_percent: rust_decimal::Decimal::from(100),
13754                    assets_percent: rust_decimal::Decimal::from(100),
13755                    significant_accounts: vec![
13756                        "Revenue".into(),
13757                        "Accounts Receivable".into(),
13758                        "Inventory".into(),
13759                        "Fixed Assets".into(),
13760                        "Accounts Payable".into(),
13761                    ],
13762                };
13763
13764                let (certs, assessment) = sox_gen.generate(&input);
13765                snapshot.sox_302_certifications.extend(certs);
13766                snapshot.sox_404_assessments.push(assessment);
13767            }
13768
13769            info!(
13770                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13771                snapshot.sox_302_certifications.len(),
13772                snapshot.sox_404_assessments.len(),
13773                snapshot
13774                    .sox_404_assessments
13775                    .iter()
13776                    .filter(|a| a.icfr_effective)
13777                    .count(),
13778                snapshot
13779                    .sox_404_assessments
13780                    .iter()
13781                    .filter(|a| !a.icfr_effective)
13782                    .count(),
13783            );
13784        }
13785
13786        // ----------------------------------------------------------------
13787        // ISA 320: Materiality calculations (one per entity)
13788        // ----------------------------------------------------------------
13789        {
13790            use datasynth_generators::audit::materiality_generator::{
13791                MaterialityGenerator, MaterialityInput,
13792            };
13793
13794            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13795
13796            // Compute per-company financials from JEs.
13797            // Asset accounts start with '1', revenue with '4',
13798            // expense accounts with '5' or '6'.
13799            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13800
13801            for company in &self.config.companies {
13802                let company_code = company.code.clone();
13803
13804                // Revenue: credit-side entries on 4xxx accounts
13805                let company_revenue: rust_decimal::Decimal = entries
13806                    .iter()
13807                    .filter(|e| e.company_code() == company_code)
13808                    .flat_map(|e| e.lines.iter())
13809                    .filter(|l| l.account_code.starts_with('4'))
13810                    .map(|l| l.credit_amount)
13811                    .sum();
13812
13813                // Total assets: debit balances on 1xxx accounts
13814                let total_assets: rust_decimal::Decimal = entries
13815                    .iter()
13816                    .filter(|e| e.company_code() == company_code)
13817                    .flat_map(|e| e.lines.iter())
13818                    .filter(|l| l.account_code.starts_with('1'))
13819                    .map(|l| l.debit_amount)
13820                    .sum();
13821
13822                // Expenses: debit-side entries on 5xxx/6xxx accounts
13823                let total_expenses: rust_decimal::Decimal = entries
13824                    .iter()
13825                    .filter(|e| e.company_code() == company_code)
13826                    .flat_map(|e| e.lines.iter())
13827                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13828                    .map(|l| l.debit_amount)
13829                    .sum();
13830
13831                // Equity: credit balances on 3xxx accounts
13832                let equity: rust_decimal::Decimal = entries
13833                    .iter()
13834                    .filter(|e| e.company_code() == company_code)
13835                    .flat_map(|e| e.lines.iter())
13836                    .filter(|l| l.account_code.starts_with('3'))
13837                    .map(|l| l.credit_amount)
13838                    .sum();
13839
13840                let pretax_income = company_revenue - total_expenses;
13841
13842                // If no company-specific data, fall back to proportional share
13843                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13844                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13845                        .unwrap_or(rust_decimal::Decimal::ONE);
13846                    (
13847                        total_revenue * w,
13848                        total_revenue * w * rust_decimal::Decimal::from(3),
13849                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13850                        total_revenue * w * rust_decimal::Decimal::from(2),
13851                    )
13852                } else {
13853                    (company_revenue, total_assets, pretax_income, equity)
13854                };
13855
13856                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13857
13858                materiality_inputs.push(MaterialityInput {
13859                    entity_code: company_code,
13860                    period: format!("FY{}", fiscal_year),
13861                    revenue: rev,
13862                    pretax_income: pti,
13863                    total_assets: assets,
13864                    equity: eq,
13865                    gross_profit,
13866                });
13867            }
13868
13869            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13870
13871            info!(
13872                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13873                 {} total assets, {} equity benchmarks)",
13874                snapshot.materiality_calculations.len(),
13875                snapshot
13876                    .materiality_calculations
13877                    .iter()
13878                    .filter(|m| matches!(
13879                        m.benchmark,
13880                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13881                    ))
13882                    .count(),
13883                snapshot
13884                    .materiality_calculations
13885                    .iter()
13886                    .filter(|m| matches!(
13887                        m.benchmark,
13888                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13889                    ))
13890                    .count(),
13891                snapshot
13892                    .materiality_calculations
13893                    .iter()
13894                    .filter(|m| matches!(
13895                        m.benchmark,
13896                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13897                    ))
13898                    .count(),
13899                snapshot
13900                    .materiality_calculations
13901                    .iter()
13902                    .filter(|m| matches!(
13903                        m.benchmark,
13904                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13905                    ))
13906                    .count(),
13907            );
13908        }
13909
13910        // ----------------------------------------------------------------
13911        // ISA 315: Combined Risk Assessments (per entity, per account area)
13912        // ----------------------------------------------------------------
13913        {
13914            use datasynth_generators::audit::cra_generator::CraGenerator;
13915
13916            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13917
13918            // Build entity → scope_id map from already-generated scopes
13919            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13920                .audit_scopes
13921                .iter()
13922                .map(|s| (s.entity_code.clone(), s.id.clone()))
13923                .collect();
13924
13925            for company in &self.config.companies {
13926                let cras = cra_gen.generate_for_entity(&company.code, None);
13927                let scope_id = entity_scope_map.get(&company.code).cloned();
13928                let cras_with_scope: Vec<_> = cras
13929                    .into_iter()
13930                    .map(|mut cra| {
13931                        cra.scope_id = scope_id.clone();
13932                        cra
13933                    })
13934                    .collect();
13935                snapshot.combined_risk_assessments.extend(cras_with_scope);
13936            }
13937
13938            let significant_count = snapshot
13939                .combined_risk_assessments
13940                .iter()
13941                .filter(|c| c.significant_risk)
13942                .count();
13943            let high_cra_count = snapshot
13944                .combined_risk_assessments
13945                .iter()
13946                .filter(|c| {
13947                    matches!(
13948                        c.combined_risk,
13949                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13950                    )
13951                })
13952                .count();
13953
13954            info!(
13955                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13956                snapshot.combined_risk_assessments.len(),
13957                significant_count,
13958                high_cra_count,
13959            );
13960        }
13961
13962        // ----------------------------------------------------------------
13963        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13964        // ----------------------------------------------------------------
13965        {
13966            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13967
13968            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13969
13970            // Group CRAs by entity and use per-entity tolerable error from materiality
13971            for company in &self.config.companies {
13972                let entity_code = company.code.clone();
13973
13974                // Find tolerable error for this entity (= performance materiality)
13975                let tolerable_error = snapshot
13976                    .materiality_calculations
13977                    .iter()
13978                    .find(|m| m.entity_code == entity_code)
13979                    .map(|m| m.tolerable_error);
13980
13981                // Collect CRAs for this entity
13982                let entity_cras: Vec<_> = snapshot
13983                    .combined_risk_assessments
13984                    .iter()
13985                    .filter(|c| c.entity_code == entity_code)
13986                    .cloned()
13987                    .collect();
13988
13989                if !entity_cras.is_empty() {
13990                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13991                    snapshot.sampling_plans.extend(plans);
13992                    snapshot.sampled_items.extend(items);
13993                }
13994            }
13995
13996            let misstatement_count = snapshot
13997                .sampled_items
13998                .iter()
13999                .filter(|i| i.misstatement_found)
14000                .count();
14001
14002            info!(
14003                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
14004                snapshot.sampling_plans.len(),
14005                snapshot.sampled_items.len(),
14006                misstatement_count,
14007            );
14008        }
14009
14010        // ----------------------------------------------------------------
14011        // ISA 315: Significant Classes of Transactions (SCOTS)
14012        // ----------------------------------------------------------------
14013        {
14014            use datasynth_generators::audit::scots_generator::{
14015                ScotsGenerator, ScotsGeneratorConfig,
14016            };
14017
14018            let ic_enabled = self.config.intercompany.enabled;
14019
14020            let config = ScotsGeneratorConfig {
14021                intercompany_enabled: ic_enabled,
14022                ..ScotsGeneratorConfig::default()
14023            };
14024            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
14025
14026            for company in &self.config.companies {
14027                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
14028                snapshot
14029                    .significant_transaction_classes
14030                    .extend(entity_scots);
14031            }
14032
14033            let estimation_count = snapshot
14034                .significant_transaction_classes
14035                .iter()
14036                .filter(|s| {
14037                    matches!(
14038                        s.transaction_type,
14039                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
14040                    )
14041                })
14042                .count();
14043
14044            info!(
14045                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
14046                snapshot.significant_transaction_classes.len(),
14047                estimation_count,
14048            );
14049        }
14050
14051        // ----------------------------------------------------------------
14052        // ISA 520: Unusual Item Markers
14053        // ----------------------------------------------------------------
14054        {
14055            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
14056
14057            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
14058            let entity_codes: Vec<String> = self
14059                .config
14060                .companies
14061                .iter()
14062                .map(|c| c.code.clone())
14063                .collect();
14064            let unusual_flags =
14065                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
14066            info!(
14067                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
14068                unusual_flags.len(),
14069                unusual_flags
14070                    .iter()
14071                    .filter(|f| matches!(
14072                        f.severity,
14073                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
14074                    ))
14075                    .count(),
14076                unusual_flags
14077                    .iter()
14078                    .filter(|f| matches!(
14079                        f.severity,
14080                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
14081                    ))
14082                    .count(),
14083                unusual_flags
14084                    .iter()
14085                    .filter(|f| matches!(
14086                        f.severity,
14087                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
14088                    ))
14089                    .count(),
14090            );
14091            snapshot.unusual_items = unusual_flags;
14092        }
14093
14094        // ----------------------------------------------------------------
14095        // ISA 520: Analytical Relationships
14096        // ----------------------------------------------------------------
14097        {
14098            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
14099
14100            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
14101            let entity_codes: Vec<String> = self
14102                .config
14103                .companies
14104                .iter()
14105                .map(|c| c.code.clone())
14106                .collect();
14107            let current_period_label = format!("FY{fiscal_year}");
14108            let prior_period_label = format!("FY{}", fiscal_year - 1);
14109            let analytical_rels = ar_gen.generate_for_entities(
14110                &entity_codes,
14111                entries,
14112                &current_period_label,
14113                &prior_period_label,
14114            );
14115            let out_of_range = analytical_rels
14116                .iter()
14117                .filter(|r| !r.within_expected_range)
14118                .count();
14119            info!(
14120                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
14121                analytical_rels.len(),
14122                out_of_range,
14123            );
14124            snapshot.analytical_relationships = analytical_rels;
14125        }
14126
14127        if let Some(pb) = pb {
14128            pb.finish_with_message(format!(
14129                "Audit data: {} engagements, {} workpapers, {} evidence, \
14130                 {} confirmations, {} procedure steps, {} samples, \
14131                 {} analytical, {} IA funcs, {} related parties, \
14132                 {} component auditors, {} letters, {} subsequent events, \
14133                 {} service orgs, {} going concern, {} accounting estimates, \
14134                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
14135                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
14136                 {} unusual items, {} analytical relationships",
14137                snapshot.engagements.len(),
14138                snapshot.workpapers.len(),
14139                snapshot.evidence.len(),
14140                snapshot.confirmations.len(),
14141                snapshot.procedure_steps.len(),
14142                snapshot.samples.len(),
14143                snapshot.analytical_results.len(),
14144                snapshot.ia_functions.len(),
14145                snapshot.related_parties.len(),
14146                snapshot.component_auditors.len(),
14147                snapshot.engagement_letters.len(),
14148                snapshot.subsequent_events.len(),
14149                snapshot.service_organizations.len(),
14150                snapshot.going_concern_assessments.len(),
14151                snapshot.accounting_estimates.len(),
14152                snapshot.audit_opinions.len(),
14153                snapshot.key_audit_matters.len(),
14154                snapshot.sox_302_certifications.len(),
14155                snapshot.sox_404_assessments.len(),
14156                snapshot.materiality_calculations.len(),
14157                snapshot.combined_risk_assessments.len(),
14158                snapshot.sampling_plans.len(),
14159                snapshot.significant_transaction_classes.len(),
14160                snapshot.unusual_items.len(),
14161                snapshot.analytical_relationships.len(),
14162            ));
14163        }
14164
14165        // ----------------------------------------------------------------
14166        // PCAOB-ISA cross-reference mappings
14167        // ----------------------------------------------------------------
14168        // Always include the standard PCAOB-ISA mappings when audit generation is
14169        // enabled. These are static reference data (no randomness required) so we
14170        // call standard_mappings() directly.
14171        {
14172            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14173            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14174            debug!(
14175                "PCAOB-ISA mappings generated: {} mappings",
14176                snapshot.isa_pcaob_mappings.len()
14177            );
14178        }
14179
14180        // ----------------------------------------------------------------
14181        // ISA standard reference entries
14182        // ----------------------------------------------------------------
14183        // Emit flat ISA standard reference data (number, title, series) so
14184        // consumers get a machine-readable listing of all 34 ISA standards in
14185        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
14186        {
14187            use datasynth_standards::audit::isa_reference::IsaStandard;
14188            snapshot.isa_mappings = IsaStandard::standard_entries();
14189            debug!(
14190                "ISA standard entries generated: {} standards",
14191                snapshot.isa_mappings.len()
14192            );
14193        }
14194
14195        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
14196        // For each RPT, find the chronologically closest JE for the engagement's entity.
14197        {
14198            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14199                .engagements
14200                .iter()
14201                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14202                .collect();
14203
14204            for rpt in &mut snapshot.related_party_transactions {
14205                if rpt.journal_entry_id.is_some() {
14206                    continue; // already set
14207                }
14208                let entity = engagement_by_id
14209                    .get(&rpt.engagement_id.to_string())
14210                    .copied()
14211                    .unwrap_or("");
14212
14213                // Find closest JE by date in the entity's company
14214                let best_je = entries
14215                    .iter()
14216                    .filter(|je| je.header.company_code == entity)
14217                    .min_by_key(|je| {
14218                        (je.header.posting_date - rpt.transaction_date)
14219                            .num_days()
14220                            .abs()
14221                    });
14222
14223                if let Some(je) = best_je {
14224                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
14225                }
14226            }
14227
14228            let linked = snapshot
14229                .related_party_transactions
14230                .iter()
14231                .filter(|t| t.journal_entry_id.is_some())
14232                .count();
14233            debug!(
14234                "Linked {}/{} related party transactions to journal entries",
14235                linked,
14236                snapshot.related_party_transactions.len()
14237            );
14238        }
14239
14240        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
14241        // One opinion per engagement, derived from that engagement's findings,
14242        // going-concern assessment, and any component-auditor reports. Fills
14243        // `audit_opinions` + a flattened `key_audit_matters` for downstream
14244        // export.
14245        if !snapshot.engagements.is_empty() {
14246            use datasynth_generators::audit_opinion_generator::{
14247                AuditOpinionGenerator, AuditOpinionInput,
14248            };
14249
14250            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14251            let inputs: Vec<AuditOpinionInput> = snapshot
14252                .engagements
14253                .iter()
14254                .map(|eng| {
14255                    let findings = snapshot
14256                        .findings
14257                        .iter()
14258                        .filter(|f| f.engagement_id == eng.engagement_id)
14259                        .cloned()
14260                        .collect();
14261                    let going_concern = snapshot
14262                        .going_concern_assessments
14263                        .iter()
14264                        .find(|gc| gc.entity_code == eng.client_entity_id)
14265                        .cloned();
14266                    // ComponentAuditorReport doesn't carry an engagement id, but
14267                    // component scope is keyed by `entity_code`, so filter on that.
14268                    let component_reports = snapshot
14269                        .component_reports
14270                        .iter()
14271                        .filter(|r| r.entity_code == eng.client_entity_id)
14272                        .cloned()
14273                        .collect();
14274
14275                    AuditOpinionInput {
14276                        entity_code: eng.client_entity_id.clone(),
14277                        entity_name: eng.client_name.clone(),
14278                        engagement_id: eng.engagement_id,
14279                        period_end: eng.period_end_date,
14280                        findings,
14281                        going_concern,
14282                        component_reports,
14283                        is_us_listed: matches!(
14284                            eng.engagement_type,
14285                            datasynth_core::audit::EngagementType::IntegratedAudit
14286                                | datasynth_core::audit::EngagementType::Sox404
14287                        ),
14288                        auditor_name: "DataSynth Audit LLP".to_string(),
14289                        engagement_partner: "Engagement Partner".to_string(),
14290                    }
14291                })
14292                .collect();
14293
14294            let generated = opinion_gen.generate_batch(&inputs);
14295            for g in generated {
14296                snapshot.key_audit_matters.extend(g.key_audit_matters);
14297                snapshot.audit_opinions.push(g.opinion);
14298            }
14299            debug!(
14300                "Generated {} audit opinions with {} key audit matters",
14301                snapshot.audit_opinions.len(),
14302                snapshot.key_audit_matters.len()
14303            );
14304        }
14305
14306        Ok(snapshot)
14307    }
14308
14309    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
14310    ///
14311    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
14312    /// from the current orchestrator state, runs the FSM engine, and maps the
14313    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
14314    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
14315    fn generate_audit_data_with_fsm(
14316        &mut self,
14317        entries: &[JournalEntry],
14318    ) -> SynthResult<AuditSnapshot> {
14319        use datasynth_audit_fsm::{
14320            context::EngagementContext,
14321            engine::AuditFsmEngine,
14322            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14323        };
14324        use rand::SeedableRng;
14325        use rand_chacha::ChaCha8Rng;
14326
14327        info!("Audit FSM: generating audit data via FSM engine");
14328
14329        let fsm_config = self
14330            .config
14331            .audit
14332            .fsm
14333            .as_ref()
14334            .expect("FSM config must be present when FSM is enabled");
14335
14336        // 1. Load blueprint from config string.
14337        let bwp = match fsm_config.blueprint.as_str() {
14338            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14339            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14340            _ => {
14341                warn!(
14342                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14343                    fsm_config.blueprint
14344                );
14345                BlueprintWithPreconditions::load_builtin_fsa()
14346            }
14347        }
14348        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14349
14350        // 2. Load overlay from config string.
14351        let overlay = match fsm_config.overlay.as_str() {
14352            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14353            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14354            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14355            _ => {
14356                warn!(
14357                    "Unknown FSM overlay '{}', falling back to builtin:default",
14358                    fsm_config.overlay
14359                );
14360                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14361            }
14362        }
14363        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14364
14365        // 3. Build EngagementContext from orchestrator state.
14366        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14367            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14368        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14369
14370        // Determine the engagement entity early so we can filter JEs.
14371        let company = self.config.companies.first();
14372        let company_code = company
14373            .map(|c| c.code.clone())
14374            .unwrap_or_else(|| "UNKNOWN".to_string());
14375        let company_name = company
14376            .map(|c| c.name.clone())
14377            .unwrap_or_else(|| "Unknown Company".to_string());
14378        let currency = company
14379            .map(|c| c.currency.clone())
14380            .unwrap_or_else(|| "USD".to_string());
14381
14382        // Filter JEs to the engagement entity for single-company coherence.
14383        let entity_entries: Vec<_> = entries
14384            .iter()
14385            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14386            .cloned()
14387            .collect();
14388        let entries = &entity_entries; // Shadow the parameter for remaining usage
14389
14390        // Financial aggregates from journal entries.
14391        let total_revenue: rust_decimal::Decimal = entries
14392            .iter()
14393            .flat_map(|e| e.lines.iter())
14394            .filter(|l| l.account_code.starts_with('4'))
14395            .map(|l| l.credit_amount - l.debit_amount)
14396            .sum();
14397
14398        let total_assets: rust_decimal::Decimal = entries
14399            .iter()
14400            .flat_map(|e| e.lines.iter())
14401            .filter(|l| l.account_code.starts_with('1'))
14402            .map(|l| l.debit_amount - l.credit_amount)
14403            .sum();
14404
14405        let total_expenses: rust_decimal::Decimal = entries
14406            .iter()
14407            .flat_map(|e| e.lines.iter())
14408            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14409            .map(|l| l.debit_amount)
14410            .sum();
14411
14412        let equity: rust_decimal::Decimal = entries
14413            .iter()
14414            .flat_map(|e| e.lines.iter())
14415            .filter(|l| l.account_code.starts_with('3'))
14416            .map(|l| l.credit_amount - l.debit_amount)
14417            .sum();
14418
14419        let total_debt: rust_decimal::Decimal = entries
14420            .iter()
14421            .flat_map(|e| e.lines.iter())
14422            .filter(|l| l.account_code.starts_with('2'))
14423            .map(|l| l.credit_amount - l.debit_amount)
14424            .sum();
14425
14426        let pretax_income = total_revenue - total_expenses;
14427
14428        let cogs: rust_decimal::Decimal = entries
14429            .iter()
14430            .flat_map(|e| e.lines.iter())
14431            .filter(|l| l.account_code.starts_with('5'))
14432            .map(|l| l.debit_amount)
14433            .sum();
14434        let gross_profit = total_revenue - cogs;
14435
14436        let current_assets: rust_decimal::Decimal = entries
14437            .iter()
14438            .flat_map(|e| e.lines.iter())
14439            .filter(|l| {
14440                l.account_code.starts_with("10")
14441                    || l.account_code.starts_with("11")
14442                    || l.account_code.starts_with("12")
14443                    || l.account_code.starts_with("13")
14444            })
14445            .map(|l| l.debit_amount - l.credit_amount)
14446            .sum();
14447        let current_liabilities: rust_decimal::Decimal = entries
14448            .iter()
14449            .flat_map(|e| e.lines.iter())
14450            .filter(|l| {
14451                l.account_code.starts_with("20")
14452                    || l.account_code.starts_with("21")
14453                    || l.account_code.starts_with("22")
14454            })
14455            .map(|l| l.credit_amount - l.debit_amount)
14456            .sum();
14457        let working_capital = current_assets - current_liabilities;
14458
14459        let depreciation: rust_decimal::Decimal = entries
14460            .iter()
14461            .flat_map(|e| e.lines.iter())
14462            .filter(|l| l.account_code.starts_with("60"))
14463            .map(|l| l.debit_amount)
14464            .sum();
14465        let operating_cash_flow = pretax_income + depreciation;
14466
14467        // GL accounts for reference data.
14468        let accounts: Vec<String> = self
14469            .coa
14470            .as_ref()
14471            .map(|coa| {
14472                coa.get_postable_accounts()
14473                    .iter()
14474                    .map(|acc| acc.account_code().to_string())
14475                    .collect()
14476            })
14477            .unwrap_or_default();
14478
14479        // Team member IDs and display names from master data.
14480        let team_member_ids: Vec<String> = self
14481            .master_data
14482            .employees
14483            .iter()
14484            .take(8) // Cap team size
14485            .map(|e| e.employee_id.clone())
14486            .collect();
14487        let team_member_pairs: Vec<(String, String)> = self
14488            .master_data
14489            .employees
14490            .iter()
14491            .take(8)
14492            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14493            .collect();
14494
14495        let vendor_names: Vec<String> = self
14496            .master_data
14497            .vendors
14498            .iter()
14499            .map(|v| v.name.clone())
14500            .collect();
14501        let customer_names: Vec<String> = self
14502            .master_data
14503            .customers
14504            .iter()
14505            .map(|c| c.name.clone())
14506            .collect();
14507
14508        let entity_codes: Vec<String> = self
14509            .config
14510            .companies
14511            .iter()
14512            .map(|c| c.code.clone())
14513            .collect();
14514
14515        // Journal entry IDs for evidence tracing (sample up to 50).
14516        let journal_entry_ids: Vec<String> = entries
14517            .iter()
14518            .take(50)
14519            .map(|e| e.header.document_id.to_string())
14520            .collect();
14521
14522        // Account balances for risk weighting (aggregate debit - credit per account).
14523        let mut account_balances = std::collections::HashMap::<String, f64>::new();
14524        for entry in entries {
14525            for line in &entry.lines {
14526                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14527                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14528                *account_balances
14529                    .entry(line.account_code.clone())
14530                    .or_insert(0.0) += debit_f64 - credit_f64;
14531            }
14532        }
14533
14534        // Internal control IDs and anomaly refs are populated by the
14535        // caller when available; here we default to empty because the
14536        // orchestrator state may not have generated controls/anomalies
14537        // yet at this point in the pipeline.
14538        let control_ids: Vec<String> = Vec::new();
14539        let anomaly_refs: Vec<String> = Vec::new();
14540
14541        let mut context = EngagementContext {
14542            company_code,
14543            company_name,
14544            fiscal_year: start_date.year(),
14545            currency,
14546            total_revenue,
14547            total_assets,
14548            engagement_start: start_date,
14549            report_date: period_end,
14550            pretax_income,
14551            equity,
14552            gross_profit,
14553            working_capital,
14554            operating_cash_flow,
14555            total_debt,
14556            team_member_ids,
14557            team_member_pairs,
14558            accounts,
14559            vendor_names,
14560            customer_names,
14561            journal_entry_ids,
14562            account_balances,
14563            control_ids,
14564            anomaly_refs,
14565            journal_entries: entries.to_vec(),
14566            is_us_listed: false,
14567            entity_codes,
14568            auditor_firm_name: "DataSynth Audit LLP".into(),
14569            accounting_framework: self
14570                .config
14571                .accounting_standards
14572                .framework
14573                .map(|f| match f {
14574                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14575                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14576                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14577                        "French GAAP"
14578                    }
14579                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14580                        "German GAAP"
14581                    }
14582                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14583                        "Dual Reporting"
14584                    }
14585                })
14586                .unwrap_or("IFRS")
14587                .into(),
14588        };
14589
14590        // 4. Create and run the FSM engine.
14591        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14592        let rng = ChaCha8Rng::seed_from_u64(seed);
14593        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14594
14595        let mut result = engine
14596            .run_engagement(&context)
14597            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14598
14599        info!(
14600            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14601             {} phases completed, duration {:.1}h",
14602            result.event_log.len(),
14603            result.artifacts.total_artifacts(),
14604            result.anomalies.len(),
14605            result.phases_completed.len(),
14606            result.total_duration_hours,
14607        );
14608
14609        // 4b. Populate financial data in the artifact bag for downstream consumers.
14610        let tb_entity = context.company_code.clone();
14611        let tb_fy = context.fiscal_year;
14612        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14613        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14614            entries,
14615            &tb_entity,
14616            tb_fy,
14617            self.coa.as_ref().map(|c| c.as_ref()),
14618        );
14619
14620        // 5. Map ArtifactBag fields to AuditSnapshot.
14621        let bag = result.artifacts;
14622        let mut snapshot = AuditSnapshot {
14623            engagements: bag.engagements,
14624            engagement_letters: bag.engagement_letters,
14625            materiality_calculations: bag.materiality_calculations,
14626            risk_assessments: bag.risk_assessments,
14627            combined_risk_assessments: bag.combined_risk_assessments,
14628            workpapers: bag.workpapers,
14629            evidence: bag.evidence,
14630            findings: bag.findings,
14631            judgments: bag.judgments,
14632            sampling_plans: bag.sampling_plans,
14633            sampled_items: bag.sampled_items,
14634            analytical_results: bag.analytical_results,
14635            going_concern_assessments: bag.going_concern_assessments,
14636            subsequent_events: bag.subsequent_events,
14637            audit_opinions: bag.audit_opinions,
14638            key_audit_matters: bag.key_audit_matters,
14639            procedure_steps: bag.procedure_steps,
14640            samples: bag.samples,
14641            confirmations: bag.confirmations,
14642            confirmation_responses: bag.confirmation_responses,
14643            // Store the event trail for downstream export.
14644            fsm_event_trail: Some(result.event_log),
14645            // Fields not produced by the FSM engine remain at their defaults.
14646            ..Default::default()
14647        };
14648
14649        // 6. Add static reference data (same as legacy path).
14650        {
14651            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14652            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14653        }
14654        {
14655            use datasynth_standards::audit::isa_reference::IsaStandard;
14656            snapshot.isa_mappings = IsaStandard::standard_entries();
14657        }
14658
14659        info!(
14660            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14661             {} risk assessments, {} findings, {} materiality calcs",
14662            snapshot.engagements.len(),
14663            snapshot.workpapers.len(),
14664            snapshot.evidence.len(),
14665            snapshot.risk_assessments.len(),
14666            snapshot.findings.len(),
14667            snapshot.materiality_calculations.len(),
14668        );
14669
14670        Ok(snapshot)
14671    }
14672
14673    /// Export journal entries as graph data for ML training and network reconstruction.
14674    ///
14675    /// Builds a transaction graph where:
14676    /// - Nodes are GL accounts
14677    /// - Edges are money flows from credit to debit accounts
14678    /// - Edge attributes include amount, date, business process, anomaly flags
14679    fn export_graphs(
14680        &mut self,
14681        entries: &[JournalEntry],
14682        _coa: &Arc<ChartOfAccounts>,
14683        stats: &mut EnhancedGenerationStatistics,
14684    ) -> SynthResult<GraphExportSnapshot> {
14685        let pb = self.create_progress_bar(100, "Exporting Graphs");
14686
14687        let mut snapshot = GraphExportSnapshot::default();
14688
14689        // Get output directory
14690        let output_dir = self
14691            .output_path
14692            .clone()
14693            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14694        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14695
14696        // Process each graph type configuration
14697        for graph_type in &self.config.graph_export.graph_types {
14698            if let Some(pb) = &pb {
14699                pb.inc(10);
14700            }
14701
14702            // Build transaction graph
14703            let graph_config = TransactionGraphConfig {
14704                include_vendors: false,
14705                include_customers: false,
14706                create_debit_credit_edges: true,
14707                include_document_nodes: graph_type.include_document_nodes,
14708                min_edge_weight: graph_type.min_edge_weight,
14709                aggregate_parallel_edges: graph_type.aggregate_edges,
14710                framework: None,
14711            };
14712
14713            let mut builder = TransactionGraphBuilder::new(graph_config);
14714            builder.add_journal_entries(entries);
14715            let graph = builder.build();
14716
14717            // Update stats
14718            stats.graph_node_count += graph.node_count();
14719            stats.graph_edge_count += graph.edge_count();
14720
14721            if let Some(pb) = &pb {
14722                pb.inc(40);
14723            }
14724
14725            // Export to each configured format
14726            for format in &self.config.graph_export.formats {
14727                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14728
14729                // Create output directory
14730                if let Err(e) = std::fs::create_dir_all(&format_dir) {
14731                    warn!("Failed to create graph output directory: {}", e);
14732                    continue;
14733                }
14734
14735                match format {
14736                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14737                        let pyg_config = PyGExportConfig {
14738                            common: datasynth_graph::CommonExportConfig {
14739                                export_node_features: true,
14740                                export_edge_features: true,
14741                                export_node_labels: true,
14742                                export_edge_labels: true,
14743                                export_masks: true,
14744                                train_ratio: self.config.graph_export.train_ratio,
14745                                val_ratio: self.config.graph_export.validation_ratio,
14746                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14747                            },
14748                            one_hot_categoricals: false,
14749                        };
14750
14751                        let exporter = PyGExporter::new(pyg_config);
14752                        match exporter.export(&graph, &format_dir) {
14753                            Ok(metadata) => {
14754                                snapshot.exports.insert(
14755                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
14756                                    GraphExportInfo {
14757                                        name: graph_type.name.clone(),
14758                                        format: "pytorch_geometric".to_string(),
14759                                        output_path: format_dir.clone(),
14760                                        node_count: metadata.num_nodes,
14761                                        edge_count: metadata.num_edges,
14762                                    },
14763                                );
14764                                snapshot.graph_count += 1;
14765                            }
14766                            Err(e) => {
14767                                warn!("Failed to export PyTorch Geometric graph: {}", e);
14768                            }
14769                        }
14770                    }
14771                    datasynth_config::schema::GraphExportFormat::Neo4j => {
14772                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14773
14774                        let neo4j_config = Neo4jExportConfig {
14775                            export_node_properties: true,
14776                            export_edge_properties: true,
14777                            export_features: true,
14778                            generate_cypher: true,
14779                            generate_admin_import: true,
14780                            database_name: "synth".to_string(),
14781                            cypher_batch_size: 1000,
14782                        };
14783
14784                        let exporter = Neo4jExporter::new(neo4j_config);
14785                        match exporter.export(&graph, &format_dir) {
14786                            Ok(metadata) => {
14787                                snapshot.exports.insert(
14788                                    format!("{}_{}", graph_type.name, "neo4j"),
14789                                    GraphExportInfo {
14790                                        name: graph_type.name.clone(),
14791                                        format: "neo4j".to_string(),
14792                                        output_path: format_dir.clone(),
14793                                        node_count: metadata.num_nodes,
14794                                        edge_count: metadata.num_edges,
14795                                    },
14796                                );
14797                                snapshot.graph_count += 1;
14798                            }
14799                            Err(e) => {
14800                                warn!("Failed to export Neo4j graph: {}", e);
14801                            }
14802                        }
14803                    }
14804                    datasynth_config::schema::GraphExportFormat::Dgl => {
14805                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14806
14807                        let dgl_config = DGLExportConfig {
14808                            common: datasynth_graph::CommonExportConfig {
14809                                export_node_features: true,
14810                                export_edge_features: true,
14811                                export_node_labels: true,
14812                                export_edge_labels: true,
14813                                export_masks: true,
14814                                train_ratio: self.config.graph_export.train_ratio,
14815                                val_ratio: self.config.graph_export.validation_ratio,
14816                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14817                            },
14818                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14819                            include_pickle_script: true, // DGL ecosystem standard helper
14820                        };
14821
14822                        let exporter = DGLExporter::new(dgl_config);
14823                        match exporter.export(&graph, &format_dir) {
14824                            Ok(metadata) => {
14825                                snapshot.exports.insert(
14826                                    format!("{}_{}", graph_type.name, "dgl"),
14827                                    GraphExportInfo {
14828                                        name: graph_type.name.clone(),
14829                                        format: "dgl".to_string(),
14830                                        output_path: format_dir.clone(),
14831                                        node_count: metadata.common.num_nodes,
14832                                        edge_count: metadata.common.num_edges,
14833                                    },
14834                                );
14835                                snapshot.graph_count += 1;
14836                            }
14837                            Err(e) => {
14838                                warn!("Failed to export DGL graph: {}", e);
14839                            }
14840                        }
14841                    }
14842                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14843                        use datasynth_graph::{
14844                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14845                        };
14846
14847                        let rustgraph_config = RustGraphExportConfig {
14848                            include_features: true,
14849                            include_temporal: true,
14850                            include_labels: true,
14851                            source_name: "datasynth".to_string(),
14852                            batch_id: None,
14853                            output_format: RustGraphOutputFormat::JsonLines,
14854                            export_node_properties: true,
14855                            export_edge_properties: true,
14856                            pretty_print: false,
14857                        };
14858
14859                        let exporter = RustGraphExporter::new(rustgraph_config);
14860                        match exporter.export(&graph, &format_dir) {
14861                            Ok(metadata) => {
14862                                snapshot.exports.insert(
14863                                    format!("{}_{}", graph_type.name, "rustgraph"),
14864                                    GraphExportInfo {
14865                                        name: graph_type.name.clone(),
14866                                        format: "rustgraph".to_string(),
14867                                        output_path: format_dir.clone(),
14868                                        node_count: metadata.num_nodes,
14869                                        edge_count: metadata.num_edges,
14870                                    },
14871                                );
14872                                snapshot.graph_count += 1;
14873                            }
14874                            Err(e) => {
14875                                warn!("Failed to export RustGraph: {}", e);
14876                            }
14877                        }
14878                    }
14879                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14880                        // Hypergraph export is handled separately in Phase 10b
14881                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14882                    }
14883                }
14884            }
14885
14886            if let Some(pb) = &pb {
14887                pb.inc(40);
14888            }
14889        }
14890
14891        stats.graph_export_count = snapshot.graph_count;
14892        snapshot.exported = snapshot.graph_count > 0;
14893
14894        if let Some(pb) = pb {
14895            pb.finish_with_message(format!(
14896                "Graphs exported: {} graphs ({} nodes, {} edges)",
14897                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14898            ));
14899        }
14900
14901        Ok(snapshot)
14902    }
14903
14904    /// Build additional graph types (banking, approval, entity) when relevant data
14905    /// is available. These run as a late phase because the data they need (banking
14906    /// snapshot, intercompany snapshot) is only generated after the main graph
14907    /// export phase.
14908    fn build_additional_graphs(
14909        &self,
14910        banking: &BankingSnapshot,
14911        intercompany: &IntercompanySnapshot,
14912        entries: &[JournalEntry],
14913        stats: &mut EnhancedGenerationStatistics,
14914    ) {
14915        let output_dir = self
14916            .output_path
14917            .clone()
14918            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14919        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14920
14921        // Banking graph: build when banking customers and transactions exist
14922        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14923            info!("Phase 10c: Building banking network graph");
14924            let config = BankingGraphConfig::default();
14925            let mut builder = BankingGraphBuilder::new(config);
14926            builder.add_customers(&banking.customers);
14927            builder.add_accounts(&banking.accounts, &banking.customers);
14928            builder.add_transactions(&banking.transactions);
14929            let graph = builder.build();
14930
14931            let node_count = graph.node_count();
14932            let edge_count = graph.edge_count();
14933            stats.graph_node_count += node_count;
14934            stats.graph_edge_count += edge_count;
14935
14936            // Export as PyG if configured
14937            for format in &self.config.graph_export.formats {
14938                if matches!(
14939                    format,
14940                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14941                ) {
14942                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14943                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14944                        warn!("Failed to create banking graph output dir: {}", e);
14945                        continue;
14946                    }
14947                    let pyg_config = PyGExportConfig::default();
14948                    let exporter = PyGExporter::new(pyg_config);
14949                    if let Err(e) = exporter.export(&graph, &format_dir) {
14950                        warn!("Failed to export banking graph as PyG: {}", e);
14951                    } else {
14952                        info!(
14953                            "Banking network graph exported: {} nodes, {} edges",
14954                            node_count, edge_count
14955                        );
14956                    }
14957                }
14958            }
14959        }
14960
14961        // Approval graph: build from journal entry approval workflows
14962        let approval_entries: Vec<_> = entries
14963            .iter()
14964            .filter(|je| je.header.approval_workflow.is_some())
14965            .collect();
14966
14967        if !approval_entries.is_empty() {
14968            info!(
14969                "Phase 10c: Building approval network graph ({} entries with approvals)",
14970                approval_entries.len()
14971            );
14972            let config = ApprovalGraphConfig::default();
14973            let mut builder = ApprovalGraphBuilder::new(config);
14974
14975            for je in &approval_entries {
14976                if let Some(ref wf) = je.header.approval_workflow {
14977                    for action in &wf.actions {
14978                        let record = datasynth_core::models::ApprovalRecord {
14979                            approval_id: format!(
14980                                "APR-{}-{}",
14981                                je.header.document_id, action.approval_level
14982                            ),
14983                            document_number: je.header.document_id.to_string(),
14984                            document_type: "JE".to_string(),
14985                            company_code: je.company_code().to_string(),
14986                            requester_id: wf.preparer_id.clone(),
14987                            requester_name: Some(wf.preparer_name.clone()),
14988                            approver_id: action.actor_id.clone(),
14989                            approver_name: action.actor_name.clone(),
14990                            approval_date: je.posting_date(),
14991                            action: format!("{:?}", action.action),
14992                            amount: wf.amount,
14993                            approval_limit: None,
14994                            comments: action.comments.clone(),
14995                            delegation_from: None,
14996                            is_auto_approved: false,
14997                        };
14998                        builder.add_approval(&record);
14999                    }
15000                }
15001            }
15002
15003            let graph = builder.build();
15004            let node_count = graph.node_count();
15005            let edge_count = graph.edge_count();
15006            stats.graph_node_count += node_count;
15007            stats.graph_edge_count += edge_count;
15008
15009            // Export as PyG if configured
15010            for format in &self.config.graph_export.formats {
15011                if matches!(
15012                    format,
15013                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
15014                ) {
15015                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
15016                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
15017                        warn!("Failed to create approval graph output dir: {}", e);
15018                        continue;
15019                    }
15020                    let pyg_config = PyGExportConfig::default();
15021                    let exporter = PyGExporter::new(pyg_config);
15022                    if let Err(e) = exporter.export(&graph, &format_dir) {
15023                        warn!("Failed to export approval graph as PyG: {}", e);
15024                    } else {
15025                        info!(
15026                            "Approval network graph exported: {} nodes, {} edges",
15027                            node_count, edge_count
15028                        );
15029                    }
15030                }
15031            }
15032        }
15033
15034        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
15035        if self.config.companies.len() >= 2 {
15036            info!(
15037                "Phase 10c: Building entity relationship graph ({} companies)",
15038                self.config.companies.len()
15039            );
15040
15041            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15042                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
15043
15044            // Map CompanyConfig → Company objects
15045            let parent_code = &self.config.companies[0].code;
15046            let mut companies: Vec<datasynth_core::models::Company> =
15047                Vec::with_capacity(self.config.companies.len());
15048
15049            // First company is the parent
15050            let first = &self.config.companies[0];
15051            companies.push(datasynth_core::models::Company::parent(
15052                &first.code,
15053                &first.name,
15054                &first.country,
15055                &first.currency,
15056            ));
15057
15058            // Remaining companies are subsidiaries (100% owned by parent)
15059            for cc in self.config.companies.iter().skip(1) {
15060                companies.push(datasynth_core::models::Company::subsidiary(
15061                    &cc.code,
15062                    &cc.name,
15063                    &cc.country,
15064                    &cc.currency,
15065                    parent_code,
15066                    rust_decimal::Decimal::from(100),
15067                ));
15068            }
15069
15070            // Build IntercompanyRelationship records (same logic as phase_intercompany)
15071            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
15072                self.config
15073                    .companies
15074                    .iter()
15075                    .skip(1)
15076                    .enumerate()
15077                    .map(|(i, cc)| {
15078                        let mut rel =
15079                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
15080                                format!("REL{:03}", i + 1),
15081                                parent_code.clone(),
15082                                cc.code.clone(),
15083                                rust_decimal::Decimal::from(100),
15084                                start_date,
15085                            );
15086                        rel.functional_currency = cc.currency.clone();
15087                        rel
15088                    })
15089                    .collect();
15090
15091            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
15092            builder.add_companies(&companies);
15093            builder.add_ownership_relationships(&relationships);
15094
15095            // Thread IC matched-pair transaction edges into the entity graph
15096            for pair in &intercompany.matched_pairs {
15097                builder.add_intercompany_edge(
15098                    &pair.seller_company,
15099                    &pair.buyer_company,
15100                    pair.amount,
15101                    &format!("{:?}", pair.transaction_type),
15102                );
15103            }
15104
15105            let graph = builder.build();
15106            let node_count = graph.node_count();
15107            let edge_count = graph.edge_count();
15108            stats.graph_node_count += node_count;
15109            stats.graph_edge_count += edge_count;
15110
15111            // Export as PyG if configured
15112            for format in &self.config.graph_export.formats {
15113                if matches!(
15114                    format,
15115                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
15116                ) {
15117                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
15118                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
15119                        warn!("Failed to create entity graph output dir: {}", e);
15120                        continue;
15121                    }
15122                    let pyg_config = PyGExportConfig::default();
15123                    let exporter = PyGExporter::new(pyg_config);
15124                    if let Err(e) = exporter.export(&graph, &format_dir) {
15125                        warn!("Failed to export entity graph as PyG: {}", e);
15126                    } else {
15127                        info!(
15128                            "Entity relationship graph exported: {} nodes, {} edges",
15129                            node_count, edge_count
15130                        );
15131                    }
15132                }
15133            }
15134        } else {
15135            debug!(
15136                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
15137                self.config.companies.len()
15138            );
15139        }
15140    }
15141
15142    /// Export a multi-layer hypergraph for RustGraph integration.
15143    ///
15144    /// Builds a 3-layer hypergraph:
15145    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
15146    /// - Layer 2: Process Events (all process family document flows + OCPM events)
15147    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
15148    #[allow(clippy::too_many_arguments)]
15149    fn export_hypergraph(
15150        &self,
15151        coa: &Arc<ChartOfAccounts>,
15152        entries: &[JournalEntry],
15153        document_flows: &DocumentFlowSnapshot,
15154        sourcing: &SourcingSnapshot,
15155        hr: &HrSnapshot,
15156        manufacturing: &ManufacturingSnapshot,
15157        banking: &BankingSnapshot,
15158        audit: &AuditSnapshot,
15159        financial_reporting: &FinancialReportingSnapshot,
15160        ocpm: &OcpmSnapshot,
15161        compliance: &ComplianceRegulationsSnapshot,
15162        stats: &mut EnhancedGenerationStatistics,
15163    ) -> SynthResult<HypergraphExportInfo> {
15164        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15165        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15166        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15167        use datasynth_graph::models::hypergraph::AggregationStrategy;
15168
15169        let hg_settings = &self.config.graph_export.hypergraph;
15170
15171        // Parse aggregation strategy from config string
15172        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15173            "truncate" => AggregationStrategy::Truncate,
15174            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15175            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15176            "importance_sample" => AggregationStrategy::ImportanceSample,
15177            _ => AggregationStrategy::PoolByCounterparty,
15178        };
15179
15180        let builder_config = HypergraphConfig {
15181            max_nodes: hg_settings.max_nodes,
15182            aggregation_strategy,
15183            include_coso: hg_settings.governance_layer.include_coso,
15184            include_controls: hg_settings.governance_layer.include_controls,
15185            include_sox: hg_settings.governance_layer.include_sox,
15186            include_vendors: hg_settings.governance_layer.include_vendors,
15187            include_customers: hg_settings.governance_layer.include_customers,
15188            include_employees: hg_settings.governance_layer.include_employees,
15189            include_p2p: hg_settings.process_layer.include_p2p,
15190            include_o2c: hg_settings.process_layer.include_o2c,
15191            include_s2c: hg_settings.process_layer.include_s2c,
15192            include_h2r: hg_settings.process_layer.include_h2r,
15193            include_mfg: hg_settings.process_layer.include_mfg,
15194            include_bank: hg_settings.process_layer.include_bank,
15195            include_audit: hg_settings.process_layer.include_audit,
15196            include_r2r: hg_settings.process_layer.include_r2r,
15197            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15198            docs_per_counterparty_threshold: hg_settings
15199                .process_layer
15200                .docs_per_counterparty_threshold,
15201            include_accounts: hg_settings.accounting_layer.include_accounts,
15202            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15203            include_cross_layer_edges: hg_settings.cross_layer.enabled,
15204            include_compliance: self.config.compliance_regulations.enabled,
15205            include_tax: true,
15206            include_treasury: true,
15207            include_esg: true,
15208            include_project: true,
15209            include_intercompany: true,
15210            include_temporal_events: true,
15211        };
15212
15213        let mut builder = HypergraphBuilder::new(builder_config);
15214
15215        // Layer 1: Governance & Controls
15216        builder.add_coso_framework();
15217
15218        // Add controls if available (generated during JE generation)
15219        // Controls are generated per-company; we use the standard set
15220        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15221            let controls = InternalControl::standard_controls();
15222            builder.add_controls(&controls);
15223        }
15224
15225        // Add master data
15226        builder.add_vendors(&self.master_data.vendors);
15227        builder.add_customers(&self.master_data.customers);
15228        builder.add_employees(&self.master_data.employees);
15229
15230        // Layer 2: Process Events (all process families)
15231        builder.add_p2p_documents(
15232            &document_flows.purchase_orders,
15233            &document_flows.goods_receipts,
15234            &document_flows.vendor_invoices,
15235            &document_flows.payments,
15236        );
15237        builder.add_o2c_documents(
15238            &document_flows.sales_orders,
15239            &document_flows.deliveries,
15240            &document_flows.customer_invoices,
15241        );
15242        builder.add_s2c_documents(
15243            &sourcing.sourcing_projects,
15244            &sourcing.qualifications,
15245            &sourcing.rfx_events,
15246            &sourcing.bids,
15247            &sourcing.bid_evaluations,
15248            &sourcing.contracts,
15249        );
15250        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15251        builder.add_mfg_documents(
15252            &manufacturing.production_orders,
15253            &manufacturing.quality_inspections,
15254            &manufacturing.cycle_counts,
15255        );
15256        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15257        builder.add_audit_documents(
15258            &audit.engagements,
15259            &audit.workpapers,
15260            &audit.findings,
15261            &audit.evidence,
15262            &audit.risk_assessments,
15263            &audit.judgments,
15264            &audit.materiality_calculations,
15265            &audit.audit_opinions,
15266            &audit.going_concern_assessments,
15267        );
15268        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15269
15270        // OCPM events as hyperedges
15271        if let Some(ref event_log) = ocpm.event_log {
15272            builder.add_ocpm_events(event_log);
15273        }
15274
15275        // Compliance regulations as cross-layer nodes
15276        if self.config.compliance_regulations.enabled
15277            && hg_settings.governance_layer.include_controls
15278        {
15279            // Reconstruct ComplianceStandard objects from the registry
15280            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15281            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15282                .standard_records
15283                .iter()
15284                .filter_map(|r| {
15285                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15286                    registry.get(&sid).cloned()
15287                })
15288                .collect();
15289
15290            builder.add_compliance_regulations(
15291                &standards,
15292                &compliance.findings,
15293                &compliance.filings,
15294            );
15295        }
15296
15297        // Layer 3: Accounting Network
15298        builder.add_accounts(coa);
15299        builder.add_journal_entries_as_hyperedges(entries);
15300
15301        // Build the hypergraph
15302        let hypergraph = builder.build();
15303
15304        // Export
15305        let output_dir = self
15306            .output_path
15307            .clone()
15308            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15309        let hg_dir = output_dir
15310            .join(&self.config.graph_export.output_subdirectory)
15311            .join(&hg_settings.output_subdirectory);
15312
15313        // Branch on output format
15314        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15315            "unified" => {
15316                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15317                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15318                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15319                })?;
15320                (
15321                    metadata.num_nodes,
15322                    metadata.num_edges,
15323                    metadata.num_hyperedges,
15324                )
15325            }
15326            _ => {
15327                // "native" or any unrecognized format → use existing exporter
15328                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15329                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15330                    SynthError::generation(format!("Hypergraph export failed: {e}"))
15331                })?;
15332                (
15333                    metadata.num_nodes,
15334                    metadata.num_edges,
15335                    metadata.num_hyperedges,
15336                )
15337            }
15338        };
15339
15340        // Stream to RustGraph ingest endpoint if configured
15341        #[cfg(feature = "streaming")]
15342        if let Some(ref target_url) = hg_settings.stream_target {
15343            use crate::stream_client::{StreamClient, StreamConfig};
15344            use std::io::Write as _;
15345
15346            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15347            let stream_config = StreamConfig {
15348                target_url: target_url.clone(),
15349                batch_size: hg_settings.stream_batch_size,
15350                api_key,
15351                ..StreamConfig::default()
15352            };
15353
15354            match StreamClient::new(stream_config) {
15355                Ok(mut client) => {
15356                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15357                    match exporter.export_to_writer(&hypergraph, &mut client) {
15358                        Ok(_) => {
15359                            if let Err(e) = client.flush() {
15360                                warn!("Failed to flush stream client: {}", e);
15361                            } else {
15362                                info!("Streamed {} records to {}", client.total_sent(), target_url);
15363                            }
15364                        }
15365                        Err(e) => {
15366                            warn!("Streaming export failed: {}", e);
15367                        }
15368                    }
15369                }
15370                Err(e) => {
15371                    warn!("Failed to create stream client: {}", e);
15372                }
15373            }
15374        }
15375
15376        // Update stats
15377        stats.graph_node_count += num_nodes;
15378        stats.graph_edge_count += num_edges;
15379        stats.graph_export_count += 1;
15380
15381        Ok(HypergraphExportInfo {
15382            node_count: num_nodes,
15383            edge_count: num_edges,
15384            hyperedge_count: num_hyperedges,
15385            output_path: hg_dir,
15386        })
15387    }
15388
15389    /// Generate banking KYC/AML data.
15390    ///
15391    /// Creates banking customers, accounts, and transactions with AML typology injection.
15392    /// Uses the BankingOrchestrator from synth-banking crate.
15393    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15394        let pb = self.create_progress_bar(100, "Generating Banking Data");
15395
15396        // Build the banking orchestrator from config
15397        let orchestrator = BankingOrchestratorBuilder::new()
15398            .config(self.config.banking.clone())
15399            .seed(self.seed + 9000)
15400            .country_pack(self.primary_pack().clone())
15401            .build();
15402
15403        if let Some(pb) = &pb {
15404            pb.inc(10);
15405        }
15406
15407        // Generate the banking data
15408        let result = orchestrator.generate();
15409
15410        if let Some(pb) = &pb {
15411            pb.inc(90);
15412            pb.finish_with_message(format!(
15413                "Banking: {} customers, {} transactions",
15414                result.customers.len(),
15415                result.transactions.len()
15416            ));
15417        }
15418
15419        // Cross-reference banking customers with core master data so that
15420        // banking customer names align with the enterprise customer list.
15421        // We rotate through core customers, overlaying their name and country
15422        // onto the generated banking customers where possible.
15423        let mut banking_customers = result.customers;
15424        let core_customers = &self.master_data.customers;
15425        if !core_customers.is_empty() {
15426            for (i, bc) in banking_customers.iter_mut().enumerate() {
15427                let core = &core_customers[i % core_customers.len()];
15428                bc.name = CustomerName::business(&core.name);
15429                bc.residence_country = core.country.clone();
15430                bc.enterprise_customer_id = Some(core.customer_id.clone());
15431            }
15432            debug!(
15433                "Cross-referenced {} banking customers with {} core customers",
15434                banking_customers.len(),
15435                core_customers.len()
15436            );
15437        }
15438
15439        Ok(BankingSnapshot {
15440            customers: banking_customers,
15441            accounts: result.accounts,
15442            transactions: result.transactions,
15443            transaction_labels: result.transaction_labels,
15444            customer_labels: result.customer_labels,
15445            account_labels: result.account_labels,
15446            relationship_labels: result.relationship_labels,
15447            narratives: result.narratives,
15448            suspicious_count: result.stats.suspicious_count,
15449            scenario_count: result.scenarios.len(),
15450        })
15451    }
15452
15453    /// Calculate total transactions to generate.
15454    fn calculate_total_transactions(&self) -> u64 {
15455        let months = self.config.global.period_months as f64;
15456        self.config
15457            .companies
15458            .iter()
15459            .map(|c| {
15460                let annual = c.annual_transaction_volume.count() as f64;
15461                let weighted = annual * c.volume_weight;
15462                (weighted * months / 12.0) as u64
15463            })
15464            .sum()
15465    }
15466
15467    /// Create a progress bar if progress display is enabled.
15468    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15469        if !self.phase_config.show_progress {
15470            return None;
15471        }
15472
15473        let pb = if let Some(mp) = &self.multi_progress {
15474            mp.add(ProgressBar::new(total))
15475        } else {
15476            ProgressBar::new(total)
15477        };
15478
15479        pb.set_style(
15480            ProgressStyle::default_bar()
15481                .template(&format!(
15482                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15483                ))
15484                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15485                .progress_chars("#>-"),
15486        );
15487
15488        Some(pb)
15489    }
15490
15491    /// Get the generated chart of accounts.
15492    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15493        self.coa.clone()
15494    }
15495
15496    /// Get the generated master data.
15497    pub fn get_master_data(&self) -> &MasterDataSnapshot {
15498        &self.master_data
15499    }
15500
15501    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
15502    fn phase_compliance_regulations(
15503        &mut self,
15504        _stats: &mut EnhancedGenerationStatistics,
15505    ) -> SynthResult<ComplianceRegulationsSnapshot> {
15506        if !self.phase_config.generate_compliance_regulations {
15507            return Ok(ComplianceRegulationsSnapshot::default());
15508        }
15509
15510        info!("Phase: Generating Compliance Regulations Data");
15511
15512        let cr_config = &self.config.compliance_regulations;
15513
15514        // Determine jurisdictions: from config or inferred from companies
15515        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15516            self.config
15517                .companies
15518                .iter()
15519                .map(|c| c.country.clone())
15520                .collect::<std::collections::HashSet<_>>()
15521                .into_iter()
15522                .collect()
15523        } else {
15524            cr_config.jurisdictions.clone()
15525        };
15526
15527        // Determine reference date
15528        let fallback_date =
15529            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15530        let reference_date = cr_config
15531            .reference_date
15532            .as_ref()
15533            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15534            .unwrap_or_else(|| {
15535                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15536                    .unwrap_or(fallback_date)
15537            });
15538
15539        // Generate standards registry data
15540        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15541        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15542        let cross_reference_records = reg_gen.generate_cross_reference_records();
15543        let jurisdiction_records =
15544            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15545
15546        info!(
15547            "  Standards: {} records, {} cross-references, {} jurisdictions",
15548            standard_records.len(),
15549            cross_reference_records.len(),
15550            jurisdiction_records.len()
15551        );
15552
15553        // Generate audit procedures (if enabled)
15554        let audit_procedures = if cr_config.audit_procedures.enabled {
15555            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15556                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15557                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15558                confidence_level: cr_config.audit_procedures.confidence_level,
15559                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15560            };
15561            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15562                self.seed + 9000,
15563                proc_config,
15564            );
15565            let registry = reg_gen.registry();
15566            let mut all_procs = Vec::new();
15567            for jurisdiction in &jurisdictions {
15568                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15569                all_procs.extend(procs);
15570            }
15571            info!("  Audit procedures: {}", all_procs.len());
15572            all_procs
15573        } else {
15574            Vec::new()
15575        };
15576
15577        // Generate compliance findings (if enabled)
15578        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15579            let finding_config =
15580                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15581                    finding_rate: cr_config.findings.finding_rate,
15582                    material_weakness_rate: cr_config.findings.material_weakness_rate,
15583                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15584                    generate_remediation: cr_config.findings.generate_remediation,
15585                };
15586            let mut finding_gen =
15587                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15588                    self.seed + 9100,
15589                    finding_config,
15590                );
15591            let mut all_findings = Vec::new();
15592            for company in &self.config.companies {
15593                let company_findings =
15594                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15595                all_findings.extend(company_findings);
15596            }
15597            info!("  Compliance findings: {}", all_findings.len());
15598            all_findings
15599        } else {
15600            Vec::new()
15601        };
15602
15603        // Generate regulatory filings (if enabled)
15604        let filings = if cr_config.filings.enabled {
15605            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15606                filing_types: cr_config.filings.filing_types.clone(),
15607                generate_status_progression: cr_config.filings.generate_status_progression,
15608            };
15609            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15610                self.seed + 9200,
15611                filing_config,
15612            );
15613            let company_codes: Vec<String> = self
15614                .config
15615                .companies
15616                .iter()
15617                .map(|c| c.code.clone())
15618                .collect();
15619            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15620                .unwrap_or(fallback_date);
15621            let filings = filing_gen.generate_filings(
15622                &company_codes,
15623                &jurisdictions,
15624                start_date,
15625                self.config.global.period_months,
15626            );
15627            info!("  Regulatory filings: {}", filings.len());
15628            filings
15629        } else {
15630            Vec::new()
15631        };
15632
15633        // Build compliance graph (if enabled)
15634        let compliance_graph = if cr_config.graph.enabled {
15635            let graph_config = datasynth_graph::ComplianceGraphConfig {
15636                include_standard_nodes: cr_config.graph.include_compliance_nodes,
15637                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15638                include_cross_references: cr_config.graph.include_cross_references,
15639                include_supersession_edges: cr_config.graph.include_supersession_edges,
15640                include_account_links: cr_config.graph.include_account_links,
15641                include_control_links: cr_config.graph.include_control_links,
15642                include_company_links: cr_config.graph.include_company_links,
15643            };
15644            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15645
15646            // Add standard nodes
15647            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15648                .iter()
15649                .map(|r| datasynth_graph::StandardNodeInput {
15650                    standard_id: r.standard_id.clone(),
15651                    title: r.title.clone(),
15652                    category: r.category.clone(),
15653                    domain: r.domain.clone(),
15654                    is_active: r.is_active,
15655                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
15656                    applicable_account_types: r.applicable_account_types.clone(),
15657                    applicable_processes: r.applicable_processes.clone(),
15658                })
15659                .collect();
15660            builder.add_standards(&standard_inputs);
15661
15662            // Add jurisdiction nodes
15663            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15664                jurisdiction_records
15665                    .iter()
15666                    .map(|r| datasynth_graph::JurisdictionNodeInput {
15667                        country_code: r.country_code.clone(),
15668                        country_name: r.country_name.clone(),
15669                        framework: r.accounting_framework.clone(),
15670                        standard_count: r.standard_count,
15671                        tax_rate: r.statutory_tax_rate,
15672                    })
15673                    .collect();
15674            builder.add_jurisdictions(&jurisdiction_inputs);
15675
15676            // Add cross-reference edges
15677            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15678                cross_reference_records
15679                    .iter()
15680                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15681                        from_standard: r.from_standard.clone(),
15682                        to_standard: r.to_standard.clone(),
15683                        relationship: r.relationship.clone(),
15684                        convergence_level: r.convergence_level,
15685                    })
15686                    .collect();
15687            builder.add_cross_references(&xref_inputs);
15688
15689            // Add jurisdiction→standard mappings
15690            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15691                .iter()
15692                .map(|r| datasynth_graph::JurisdictionMappingInput {
15693                    country_code: r.jurisdiction.clone(),
15694                    standard_id: r.standard_id.clone(),
15695                })
15696                .collect();
15697            builder.add_jurisdiction_mappings(&mapping_inputs);
15698
15699            // Add procedure nodes
15700            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15701                .iter()
15702                .map(|p| datasynth_graph::ProcedureNodeInput {
15703                    procedure_id: p.procedure_id.clone(),
15704                    standard_id: p.standard_id.clone(),
15705                    procedure_type: p.procedure_type.clone(),
15706                    sample_size: p.sample_size,
15707                    confidence_level: p.confidence_level,
15708                })
15709                .collect();
15710            builder.add_procedures(&proc_inputs);
15711
15712            // Add finding nodes
15713            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15714                .iter()
15715                .map(|f| datasynth_graph::FindingNodeInput {
15716                    finding_id: f.finding_id.to_string(),
15717                    standard_id: f
15718                        .related_standards
15719                        .first()
15720                        .map(|s| s.as_str().to_string())
15721                        .unwrap_or_default(),
15722                    severity: f.severity.to_string(),
15723                    deficiency_level: f.deficiency_level.to_string(),
15724                    severity_score: f.deficiency_level.severity_score(),
15725                    control_id: f.control_id.clone(),
15726                    affected_accounts: f.affected_accounts.clone(),
15727                })
15728                .collect();
15729            builder.add_findings(&finding_inputs);
15730
15731            // Cross-domain: link standards to accounts from chart of accounts
15732            if cr_config.graph.include_account_links {
15733                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15734                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15735                for std_record in &standard_records {
15736                    if let Some(std_obj) =
15737                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
15738                            &std_record.standard_id,
15739                        ))
15740                    {
15741                        for acct_type in &std_obj.applicable_account_types {
15742                            account_links.push(datasynth_graph::AccountLinkInput {
15743                                standard_id: std_record.standard_id.clone(),
15744                                account_code: acct_type.clone(),
15745                                account_name: acct_type.clone(),
15746                            });
15747                        }
15748                    }
15749                }
15750                builder.add_account_links(&account_links);
15751            }
15752
15753            // Cross-domain: link standards to internal controls
15754            if cr_config.graph.include_control_links {
15755                let mut control_links = Vec::new();
15756                // SOX/PCAOB standards link to all controls
15757                let sox_like_ids: Vec<String> = standard_records
15758                    .iter()
15759                    .filter(|r| {
15760                        r.standard_id.starts_with("SOX")
15761                            || r.standard_id.starts_with("PCAOB-AS-2201")
15762                    })
15763                    .map(|r| r.standard_id.clone())
15764                    .collect();
15765                // Get control IDs from config (C001-C060 standard controls)
15766                let control_ids = [
15767                    ("C001", "Cash Controls"),
15768                    ("C002", "Large Transaction Approval"),
15769                    ("C010", "PO Approval"),
15770                    ("C011", "Three-Way Match"),
15771                    ("C020", "Revenue Recognition"),
15772                    ("C021", "Credit Check"),
15773                    ("C030", "Manual JE Approval"),
15774                    ("C031", "Period Close Review"),
15775                    ("C032", "Account Reconciliation"),
15776                    ("C040", "Payroll Processing"),
15777                    ("C050", "Fixed Asset Capitalization"),
15778                    ("C060", "Intercompany Elimination"),
15779                ];
15780                for sox_id in &sox_like_ids {
15781                    for (ctrl_id, ctrl_name) in &control_ids {
15782                        control_links.push(datasynth_graph::ControlLinkInput {
15783                            standard_id: sox_id.clone(),
15784                            control_id: ctrl_id.to_string(),
15785                            control_name: ctrl_name.to_string(),
15786                        });
15787                    }
15788                }
15789                builder.add_control_links(&control_links);
15790            }
15791
15792            // Cross-domain: filing nodes with company links
15793            if cr_config.graph.include_company_links {
15794                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15795                    .iter()
15796                    .enumerate()
15797                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15798                        filing_id: format!("F{:04}", i + 1),
15799                        filing_type: f.filing_type.to_string(),
15800                        company_code: f.company_code.clone(),
15801                        jurisdiction: f.jurisdiction.clone(),
15802                        status: format!("{:?}", f.status),
15803                    })
15804                    .collect();
15805                builder.add_filings(&filing_inputs);
15806            }
15807
15808            let graph = builder.build();
15809            info!(
15810                "  Compliance graph: {} nodes, {} edges",
15811                graph.nodes.len(),
15812                graph.edges.len()
15813            );
15814            Some(graph)
15815        } else {
15816            None
15817        };
15818
15819        self.check_resources_with_log("post-compliance-regulations")?;
15820
15821        Ok(ComplianceRegulationsSnapshot {
15822            standard_records,
15823            cross_reference_records,
15824            jurisdiction_records,
15825            audit_procedures,
15826            findings,
15827            filings,
15828            compliance_graph,
15829        })
15830    }
15831
15832    /// Build a lineage graph describing config → phase → output relationships.
15833    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15834        use super::lineage::LineageGraphBuilder;
15835
15836        let mut builder = LineageGraphBuilder::new();
15837
15838        // Config sections
15839        builder.add_config_section("config:global", "Global Config");
15840        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15841        builder.add_config_section("config:transactions", "Transaction Config");
15842
15843        // Generator phases
15844        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15845        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15846
15847        // Config → phase edges
15848        builder.configured_by("phase:coa", "config:chart_of_accounts");
15849        builder.configured_by("phase:je", "config:transactions");
15850
15851        // Output files
15852        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15853        builder.produced_by("output:je", "phase:je");
15854
15855        // Optional phases based on config
15856        if self.phase_config.generate_master_data {
15857            builder.add_config_section("config:master_data", "Master Data Config");
15858            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15859            builder.configured_by("phase:master_data", "config:master_data");
15860            builder.input_to("phase:master_data", "phase:je");
15861        }
15862
15863        if self.phase_config.generate_document_flows {
15864            builder.add_config_section("config:document_flows", "Document Flow Config");
15865            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15866            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15867            builder.configured_by("phase:p2p", "config:document_flows");
15868            builder.configured_by("phase:o2c", "config:document_flows");
15869
15870            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15871            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15872            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15873            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15874            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15875
15876            builder.produced_by("output:po", "phase:p2p");
15877            builder.produced_by("output:gr", "phase:p2p");
15878            builder.produced_by("output:vi", "phase:p2p");
15879            builder.produced_by("output:so", "phase:o2c");
15880            builder.produced_by("output:ci", "phase:o2c");
15881        }
15882
15883        if self.phase_config.inject_anomalies {
15884            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15885            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15886            builder.configured_by("phase:anomaly", "config:fraud");
15887            builder.add_output_file(
15888                "output:labels",
15889                "Anomaly Labels",
15890                "labels/anomaly_labels.csv",
15891            );
15892            builder.produced_by("output:labels", "phase:anomaly");
15893        }
15894
15895        if self.phase_config.generate_audit {
15896            builder.add_config_section("config:audit", "Audit Config");
15897            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15898            builder.configured_by("phase:audit", "config:audit");
15899        }
15900
15901        if self.phase_config.generate_banking {
15902            builder.add_config_section("config:banking", "Banking Config");
15903            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15904            builder.configured_by("phase:banking", "config:banking");
15905        }
15906
15907        if self.config.llm.enabled {
15908            builder.add_config_section("config:llm", "LLM Enrichment Config");
15909            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15910            builder.configured_by("phase:llm_enrichment", "config:llm");
15911        }
15912
15913        if self.config.diffusion.enabled {
15914            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15915            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15916            builder.configured_by("phase:diffusion", "config:diffusion");
15917        }
15918
15919        if self.config.causal.enabled {
15920            builder.add_config_section("config:causal", "Causal Generation Config");
15921            builder.add_generator_phase("phase:causal", "Causal Overlay");
15922            builder.configured_by("phase:causal", "config:causal");
15923        }
15924
15925        builder.build()
15926    }
15927
15928    // -----------------------------------------------------------------------
15929    // Trial-balance helpers used to replace hardcoded proxy values
15930    // -----------------------------------------------------------------------
15931
15932    /// Compute total revenue for a company from its journal entries.
15933    ///
15934    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15935    /// net credits on all revenue-account lines filtered to `company_code`.
15936    fn compute_company_revenue(
15937        entries: &[JournalEntry],
15938        company_code: &str,
15939    ) -> rust_decimal::Decimal {
15940        use rust_decimal::Decimal;
15941        let mut revenue = Decimal::ZERO;
15942        for je in entries {
15943            if je.header.company_code != company_code {
15944                continue;
15945            }
15946            for line in &je.lines {
15947                if line.gl_account.starts_with('4') {
15948                    // Revenue is credit-normal
15949                    revenue += line.credit_amount - line.debit_amount;
15950                }
15951            }
15952        }
15953        revenue.max(Decimal::ZERO)
15954    }
15955
15956    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15957    ///
15958    /// Asset accounts start with "1"; liability accounts start with "2".
15959    fn compute_entity_net_assets(
15960        entries: &[JournalEntry],
15961        entity_code: &str,
15962    ) -> rust_decimal::Decimal {
15963        use rust_decimal::Decimal;
15964        let mut asset_net = Decimal::ZERO;
15965        let mut liability_net = Decimal::ZERO;
15966        for je in entries {
15967            if je.header.company_code != entity_code {
15968                continue;
15969            }
15970            for line in &je.lines {
15971                if line.gl_account.starts_with('1') {
15972                    asset_net += line.debit_amount - line.credit_amount;
15973                } else if line.gl_account.starts_with('2') {
15974                    liability_net += line.credit_amount - line.debit_amount;
15975                }
15976            }
15977        }
15978        asset_net - liability_net
15979    }
15980
15981    /// v3.5.1+: Run the statistical validation suite configured in
15982    /// `distributions.validation.tests` over the final amount
15983    /// distribution.  Collects every non-zero line-level amount (debit +
15984    /// credit) and hands it to the runners in
15985    /// `datasynth_core::distributions::validation`.
15986    ///
15987    /// Returns `Ok(None)` when validation is disabled (the default).
15988    /// When `reporting.fail_on_error = true` and any test fails, returns
15989    /// `Err` with a concise message; otherwise attaches the report to
15990    /// the result and lets callers inspect it.
15991    fn phase_statistical_validation(
15992        &self,
15993        entries: &[JournalEntry],
15994    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15995        use datasynth_config::schema::StatisticalTestConfig;
15996        use datasynth_core::distributions::{
15997            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15998            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15999        };
16000        use rust_decimal::prelude::ToPrimitive;
16001
16002        let cfg = &self.config.distributions.validation;
16003        if !cfg.enabled {
16004            return Ok(None);
16005        }
16006
16007        // Collect per-line positive amounts (debit + credit is zero on the
16008        // non-posting side, so this naturally picks the magnitude).
16009        let amounts: Vec<rust_decimal::Decimal> = entries
16010            .iter()
16011            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
16012            .filter(|a| *a > rust_decimal::Decimal::ZERO)
16013            .collect();
16014
16015        // v4.1.0+ paired (amount, line_count) per entry for correlation
16016        // checks. Amount per entry is the debit-side total (= credit-side
16017        // total for a balanced entry).
16018        let paired_amount_linecount: Vec<(f64, f64)> = entries
16019            .iter()
16020            .filter_map(|je| {
16021                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
16022                if amt > rust_decimal::Decimal::ZERO {
16023                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
16024                } else {
16025                    None
16026                }
16027            })
16028            .collect();
16029
16030        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
16031        for test_cfg in &cfg.tests {
16032            match test_cfg {
16033                StatisticalTestConfig::BenfordFirstDigit {
16034                    threshold_mad,
16035                    warning_mad,
16036                } => {
16037                    results.push(run_benford_first_digit(
16038                        &amounts,
16039                        *threshold_mad,
16040                        *warning_mad,
16041                    ));
16042                }
16043                StatisticalTestConfig::ChiSquared { bins, significance } => {
16044                    results.push(run_chi_squared(&amounts, *bins, *significance));
16045                }
16046                StatisticalTestConfig::DistributionFit {
16047                    target: _,
16048                    ks_significance,
16049                    method: _,
16050                } => {
16051                    // v3.5.1+: log-uniformity KS check. Target-specific
16052                    // fits against Normal / Exponential land in v4.1.1+.
16053                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
16054                }
16055                StatisticalTestConfig::AndersonDarling {
16056                    target: _,
16057                    significance,
16058                } => {
16059                    // v4.1.0+: A*² statistic against log-normal on the
16060                    // log-scale. Other targets follow the same pattern.
16061                    results.push(run_anderson_darling(&amounts, *significance));
16062                }
16063                StatisticalTestConfig::CorrelationCheck {
16064                    expected_correlations,
16065                } => {
16066                    // v4.1.0+: (amount, line_count) is tracked today.
16067                    // Other pairs resolve to Skipped pending richer
16068                    // per-entry attribute collection.
16069                    if expected_correlations.is_empty() {
16070                        results.push(StatisticalTestResult {
16071                            name: "correlation_check".to_string(),
16072                            outcome: TestOutcome::Skipped,
16073                            statistic: 0.0,
16074                            threshold: 0.0,
16075                            message: "no expected correlations declared".to_string(),
16076                        });
16077                    } else {
16078                        for ec in expected_correlations {
16079                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
16080                            let is_amount_linecount = (ec.field1 == "amount"
16081                                && ec.field2 == "line_count")
16082                                || (ec.field1 == "line_count" && ec.field2 == "amount");
16083                            if is_amount_linecount {
16084                                let xs: Vec<f64> =
16085                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
16086                                let ys: Vec<f64> =
16087                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
16088                                results.push(run_correlation_check(
16089                                    &pair_key,
16090                                    &xs,
16091                                    &ys,
16092                                    ec.expected_r,
16093                                    ec.tolerance,
16094                                ));
16095                            } else {
16096                                results.push(StatisticalTestResult {
16097                                    name: format!("correlation_check_{pair_key}"),
16098                                    outcome: TestOutcome::Skipped,
16099                                    statistic: 0.0,
16100                                    threshold: ec.tolerance,
16101                                    message: format!(
16102                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
16103                                        ec.field1, ec.field2
16104                                    ),
16105                                });
16106                            }
16107                        }
16108                    }
16109                }
16110            }
16111        }
16112
16113        let report = StatisticalValidationReport {
16114            sample_count: amounts.len(),
16115            results,
16116        };
16117
16118        if cfg.reporting.fail_on_error && !report.all_passed() {
16119            let failed = report.failed_names().join(", ");
16120            return Err(SynthError::validation(format!(
16121                "statistical validation failed: {failed}"
16122            )));
16123        }
16124
16125        Ok(Some(report))
16126    }
16127
16128    /// v3.3.0: analytics-metadata phase.
16129    ///
16130    /// Runs AFTER all JE-adding phases (including Phase 20b's
16131    /// fraud-bias sweep). Four sub-generators fire in sequence, each
16132    /// gated by an individual `analytics_metadata.<flag>` toggle:
16133    ///
16134    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
16135    ///    current-period account balances.
16136    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
16137    ///    configured `global.industry`.
16138    /// 3. `ManagementReportGenerator` — management-report artefacts.
16139    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
16140    fn phase_analytics_metadata(
16141        &mut self,
16142        entries: &[JournalEntry],
16143    ) -> SynthResult<AnalyticsMetadataSnapshot> {
16144        use datasynth_generators::drift_event_generator::DriftEventGenerator;
16145        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
16146        use datasynth_generators::management_report_generator::ManagementReportGenerator;
16147        use datasynth_generators::prior_year_generator::PriorYearGenerator;
16148        use std::collections::BTreeMap;
16149
16150        let mut snap = AnalyticsMetadataSnapshot::default();
16151
16152        if !self.phase_config.generate_analytics_metadata {
16153            return Ok(snap);
16154        }
16155
16156        let cfg = &self.config.analytics_metadata;
16157        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16158            .map(|d| d.year())
16159            .unwrap_or(2025);
16160
16161        // ---- 1. Prior-year comparatives ----
16162        if cfg.prior_year {
16163            let mut gen = PriorYearGenerator::new(self.seed + 9100);
16164            for company in &self.config.companies {
16165                // Aggregate current-period balances per account code +
16166                // account name from the entries slice.
16167                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16168                    BTreeMap::new();
16169                for je in entries {
16170                    if je.header.company_code != company.code {
16171                        continue;
16172                    }
16173                    for line in &je.lines {
16174                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16175                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16176                        });
16177                        entry.1 += line.debit_amount - line.credit_amount;
16178                    }
16179                }
16180                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16181                    .into_iter()
16182                    .filter(|(_, (_, bal))| !bal.is_zero())
16183                    .map(|(code, (name, bal))| (code, name, bal))
16184                    .collect();
16185                if !current.is_empty() {
16186                    let comparatives =
16187                        gen.generate_comparatives(&company.code, fiscal_year, &current);
16188                    snap.prior_year_comparatives.extend(comparatives);
16189                }
16190            }
16191            info!(
16192                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16193                snap.prior_year_comparatives.len(),
16194                self.config.companies.len()
16195            );
16196        }
16197
16198        // ---- 2. Industry benchmarks ----
16199        if cfg.industry_benchmark {
16200            use datasynth_core::models::IndustrySector;
16201            let industry = match self.config.global.industry {
16202                IndustrySector::Manufacturing => "manufacturing",
16203                IndustrySector::Retail => "retail",
16204                IndustrySector::FinancialServices => "financial_services",
16205                IndustrySector::Technology => "technology",
16206                IndustrySector::Healthcare => "healthcare",
16207                _ => "other",
16208            };
16209            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16210            let benchmarks = gen.generate(industry, fiscal_year);
16211            info!(
16212                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16213                benchmarks.len()
16214            );
16215            snap.industry_benchmarks = benchmarks;
16216        }
16217
16218        // ---- 3. Management reports ----
16219        if cfg.management_reports {
16220            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16221            let period_months = self.config.global.period_months;
16222            for company in &self.config.companies {
16223                let reports =
16224                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16225                snap.management_reports.extend(reports);
16226            }
16227            info!(
16228                "v3.3.0 analytics: {} management reports across {} companies",
16229                snap.management_reports.len(),
16230                self.config.companies.len()
16231            );
16232        }
16233
16234        // ---- 4. Drift-event labels ----
16235        if cfg.drift_events {
16236            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16237                .expect("hardcoded NaiveDate 2025-01-01 is valid");
16238            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16239                .unwrap_or(fallback_start);
16240            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16241            let mut gen = DriftEventGenerator::new(self.seed + 9400);
16242            let drifts = gen.generate_standalone_drifts(start_date, end_date);
16243            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16244            snap.drift_events = drifts;
16245        }
16246        // `entries` parameter reserved for future JE-aware drift detection
16247        let _ = entries;
16248
16249        Ok(snap)
16250    }
16251}
16252
16253/// Get the directory name for a graph export format.
16254fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16255    match format {
16256        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16257        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16258        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16259        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16260        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16261    }
16262}
16263
16264/// Aggregate journal entry lines into per-account trial balance rows.
16265///
16266/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
16267/// debit/credit totals and a net balance (debit minus credit).
16268fn compute_trial_balance_entries(
16269    entries: &[JournalEntry],
16270    entity_code: &str,
16271    fiscal_year: i32,
16272    coa: Option<&ChartOfAccounts>,
16273) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16274    use std::collections::BTreeMap;
16275
16276    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16277        BTreeMap::new();
16278
16279    for je in entries {
16280        for line in &je.lines {
16281            let entry = balances.entry(line.account_code.clone()).or_default();
16282            entry.0 += line.debit_amount;
16283            entry.1 += line.credit_amount;
16284        }
16285    }
16286
16287    balances
16288        .into_iter()
16289        .map(
16290            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16291                account_description: coa
16292                    .and_then(|c| c.get_account(&account_code))
16293                    .map(|a| a.description().to_string())
16294                    .unwrap_or_else(|| account_code.clone()),
16295                account_code,
16296                debit_balance: debit,
16297                credit_balance: credit,
16298                net_balance: debit - credit,
16299                entity_code: entity_code.to_string(),
16300                period: format!("FY{}", fiscal_year),
16301            },
16302        )
16303        .collect()
16304}
16305
16306#[cfg(test)]
16307mod tests {
16308    use super::*;
16309    use datasynth_config::schema::*;
16310
16311    fn create_test_config() -> GeneratorConfig {
16312        GeneratorConfig {
16313            global: GlobalConfig {
16314                industry: IndustrySector::Manufacturing,
16315                start_date: "2024-01-01".to_string(),
16316                period_months: 1,
16317                seed: Some(42),
16318                parallel: false,
16319                group_currency: "USD".to_string(),
16320                presentation_currency: None,
16321                worker_threads: 0,
16322                memory_limit_mb: 0,
16323                fiscal_year_months: None,
16324            },
16325            companies: vec![CompanyConfig {
16326                code: "1000".to_string(),
16327                name: "Test Company".to_string(),
16328                currency: "USD".to_string(),
16329                functional_currency: None,
16330                country: "US".to_string(),
16331                annual_transaction_volume: TransactionVolume::TenK,
16332                volume_weight: 1.0,
16333                fiscal_year_variant: "K4".to_string(),
16334            }],
16335            chart_of_accounts: ChartOfAccountsConfig {
16336                complexity: CoAComplexity::Small,
16337                industry_specific: true,
16338                custom_accounts: None,
16339                min_hierarchy_depth: 2,
16340                max_hierarchy_depth: 4,
16341                expand_industry_subaccounts: false,
16342            },
16343            transactions: TransactionConfig::default(),
16344            output: OutputConfig::default(),
16345            fraud: FraudConfig::default(),
16346            internal_controls: InternalControlsConfig::default(),
16347            business_processes: BusinessProcessConfig::default(),
16348            user_personas: UserPersonaConfig::default(),
16349            templates: TemplateConfig::default(),
16350            approval: ApprovalConfig::default(),
16351            departments: DepartmentConfig::default(),
16352            master_data: MasterDataConfig::default(),
16353            document_flows: DocumentFlowConfig::default(),
16354            intercompany: IntercompanyConfig::default(),
16355            balance: BalanceConfig::default(),
16356            ocpm: OcpmConfig::default(),
16357            audit: AuditGenerationConfig::default(),
16358            banking: datasynth_banking::BankingConfig::default(),
16359            data_quality: DataQualitySchemaConfig::default(),
16360            scenario: ScenarioConfig::default(),
16361            temporal: TemporalDriftConfig::default(),
16362            graph_export: GraphExportConfig::default(),
16363            streaming: StreamingSchemaConfig::default(),
16364            rate_limit: RateLimitSchemaConfig::default(),
16365            temporal_attributes: TemporalAttributeSchemaConfig::default(),
16366            relationships: RelationshipSchemaConfig::default(),
16367            accounting_standards: AccountingStandardsConfig::default(),
16368            audit_standards: AuditStandardsConfig::default(),
16369            distributions: Default::default(),
16370            temporal_patterns: Default::default(),
16371            vendor_network: VendorNetworkSchemaConfig::default(),
16372            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16373            relationship_strength: RelationshipStrengthSchemaConfig::default(),
16374            cross_process_links: CrossProcessLinksSchemaConfig::default(),
16375            organizational_events: OrganizationalEventsSchemaConfig::default(),
16376            behavioral_drift: BehavioralDriftSchemaConfig::default(),
16377            market_drift: MarketDriftSchemaConfig::default(),
16378            drift_labeling: DriftLabelingSchemaConfig::default(),
16379            anomaly_injection: Default::default(),
16380            industry_specific: Default::default(),
16381            fingerprint_privacy: Default::default(),
16382            quality_gates: Default::default(),
16383            compliance: Default::default(),
16384            webhooks: Default::default(),
16385            llm: Default::default(),
16386            diffusion: Default::default(),
16387            causal: Default::default(),
16388            source_to_pay: Default::default(),
16389            financial_reporting: Default::default(),
16390            hr: Default::default(),
16391            manufacturing: Default::default(),
16392            sales_quotes: Default::default(),
16393            tax: Default::default(),
16394            treasury: Default::default(),
16395            project_accounting: Default::default(),
16396            esg: Default::default(),
16397            country_packs: None,
16398            scenarios: Default::default(),
16399            session: Default::default(),
16400            compliance_regulations: Default::default(),
16401            analytics_metadata: Default::default(),
16402            concentration: Default::default(),
16403        }
16404    }
16405
16406    #[test]
16407    fn test_enhanced_orchestrator_creation() {
16408        let config = create_test_config();
16409        let orchestrator = EnhancedOrchestrator::with_defaults(config);
16410        assert!(orchestrator.is_ok());
16411    }
16412
16413    #[test]
16414    fn test_minimal_generation() {
16415        let config = create_test_config();
16416        let phase_config = PhaseConfig {
16417            generate_master_data: false,
16418            generate_document_flows: false,
16419            generate_journal_entries: true,
16420            inject_anomalies: false,
16421            show_progress: false,
16422            ..Default::default()
16423        };
16424
16425        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16426        let result = orchestrator.generate();
16427
16428        assert!(result.is_ok());
16429        let result = result.unwrap();
16430        assert!(!result.journal_entries.is_empty());
16431    }
16432
16433    #[test]
16434    fn test_master_data_generation() {
16435        let config = create_test_config();
16436        let phase_config = PhaseConfig {
16437            generate_master_data: true,
16438            generate_document_flows: false,
16439            generate_journal_entries: false,
16440            inject_anomalies: false,
16441            show_progress: false,
16442            vendors_per_company: 5,
16443            customers_per_company: 5,
16444            materials_per_company: 10,
16445            assets_per_company: 5,
16446            employees_per_company: 10,
16447            ..Default::default()
16448        };
16449
16450        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16451        let result = orchestrator.generate().unwrap();
16452
16453        assert!(!result.master_data.vendors.is_empty());
16454        assert!(!result.master_data.customers.is_empty());
16455        assert!(!result.master_data.materials.is_empty());
16456    }
16457
16458    #[test]
16459    fn test_document_flow_generation() {
16460        let config = create_test_config();
16461        let phase_config = PhaseConfig {
16462            generate_master_data: true,
16463            generate_document_flows: true,
16464            generate_journal_entries: false,
16465            inject_anomalies: false,
16466            inject_data_quality: false,
16467            validate_balances: false,
16468            validate_coa_coverage_strict: false,
16469            generate_ocpm_events: false,
16470            show_progress: false,
16471            vendors_per_company: 5,
16472            customers_per_company: 5,
16473            materials_per_company: 10,
16474            assets_per_company: 5,
16475            employees_per_company: 10,
16476            p2p_chains: 5,
16477            o2c_chains: 5,
16478            ..Default::default()
16479        };
16480
16481        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16482        let result = orchestrator.generate().unwrap();
16483
16484        // Should have generated P2P and O2C chains
16485        assert!(!result.document_flows.p2p_chains.is_empty());
16486        assert!(!result.document_flows.o2c_chains.is_empty());
16487
16488        // Flattened documents should be populated
16489        assert!(!result.document_flows.purchase_orders.is_empty());
16490        assert!(!result.document_flows.sales_orders.is_empty());
16491    }
16492
16493    #[test]
16494    fn test_anomaly_injection() {
16495        let config = create_test_config();
16496        let phase_config = PhaseConfig {
16497            generate_master_data: false,
16498            generate_document_flows: false,
16499            generate_journal_entries: true,
16500            inject_anomalies: true,
16501            show_progress: false,
16502            ..Default::default()
16503        };
16504
16505        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16506        let result = orchestrator.generate().unwrap();
16507
16508        // Should have journal entries
16509        assert!(!result.journal_entries.is_empty());
16510
16511        // With ~833 entries and 2% rate, expect some anomalies
16512        // Note: This is probabilistic, so we just verify the structure exists
16513        assert!(result.anomaly_labels.summary.is_some());
16514    }
16515
16516    #[test]
16517    fn test_full_generation_pipeline() {
16518        let config = create_test_config();
16519        let phase_config = PhaseConfig {
16520            generate_master_data: true,
16521            generate_document_flows: true,
16522            generate_journal_entries: true,
16523            inject_anomalies: false,
16524            inject_data_quality: false,
16525            validate_balances: true,
16526            validate_coa_coverage_strict: false,
16527            generate_ocpm_events: false,
16528            show_progress: false,
16529            vendors_per_company: 3,
16530            customers_per_company: 3,
16531            materials_per_company: 5,
16532            assets_per_company: 3,
16533            employees_per_company: 5,
16534            p2p_chains: 3,
16535            o2c_chains: 3,
16536            ..Default::default()
16537        };
16538
16539        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16540        let result = orchestrator.generate().unwrap();
16541
16542        // All phases should have results
16543        assert!(!result.master_data.vendors.is_empty());
16544        assert!(!result.master_data.customers.is_empty());
16545        assert!(!result.document_flows.p2p_chains.is_empty());
16546        assert!(!result.document_flows.o2c_chains.is_empty());
16547        assert!(!result.journal_entries.is_empty());
16548        assert!(result.statistics.accounts_count > 0);
16549
16550        // Subledger linking should have run
16551        assert!(!result.subledger.ap_invoices.is_empty());
16552        assert!(!result.subledger.ar_invoices.is_empty());
16553
16554        // Balance validation should have run
16555        assert!(result.balance_validation.validated);
16556        assert!(result.balance_validation.entries_processed > 0);
16557    }
16558
16559    #[test]
16560    fn test_subledger_linking() {
16561        let config = create_test_config();
16562        let phase_config = PhaseConfig {
16563            generate_master_data: true,
16564            generate_document_flows: true,
16565            generate_journal_entries: false,
16566            inject_anomalies: false,
16567            inject_data_quality: false,
16568            validate_balances: false,
16569            validate_coa_coverage_strict: false,
16570            generate_ocpm_events: false,
16571            show_progress: false,
16572            vendors_per_company: 5,
16573            customers_per_company: 5,
16574            materials_per_company: 10,
16575            assets_per_company: 3,
16576            employees_per_company: 5,
16577            p2p_chains: 5,
16578            o2c_chains: 5,
16579            ..Default::default()
16580        };
16581
16582        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16583        let result = orchestrator.generate().unwrap();
16584
16585        // Should have document flows
16586        assert!(!result.document_flows.vendor_invoices.is_empty());
16587        assert!(!result.document_flows.customer_invoices.is_empty());
16588
16589        // Subledger should be linked from document flows
16590        assert!(!result.subledger.ap_invoices.is_empty());
16591        assert!(!result.subledger.ar_invoices.is_empty());
16592
16593        // AP invoices count should match vendor invoices count
16594        assert_eq!(
16595            result.subledger.ap_invoices.len(),
16596            result.document_flows.vendor_invoices.len()
16597        );
16598
16599        // AR invoices count should match customer invoices count
16600        assert_eq!(
16601            result.subledger.ar_invoices.len(),
16602            result.document_flows.customer_invoices.len()
16603        );
16604
16605        // Statistics should reflect subledger counts
16606        assert_eq!(
16607            result.statistics.ap_invoice_count,
16608            result.subledger.ap_invoices.len()
16609        );
16610        assert_eq!(
16611            result.statistics.ar_invoice_count,
16612            result.subledger.ar_invoices.len()
16613        );
16614    }
16615
16616    #[test]
16617    fn test_balance_validation() {
16618        let config = create_test_config();
16619        let phase_config = PhaseConfig {
16620            generate_master_data: false,
16621            generate_document_flows: false,
16622            generate_journal_entries: true,
16623            inject_anomalies: false,
16624            validate_balances: true,
16625            validate_coa_coverage_strict: false,
16626            show_progress: false,
16627            ..Default::default()
16628        };
16629
16630        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16631        let result = orchestrator.generate().unwrap();
16632
16633        // Balance validation should run
16634        assert!(result.balance_validation.validated);
16635        assert!(result.balance_validation.entries_processed > 0);
16636
16637        // Generated JEs should be balanced (no unbalanced entries)
16638        assert!(!result.balance_validation.has_unbalanced_entries);
16639
16640        // Total debits should equal total credits
16641        assert_eq!(
16642            result.balance_validation.total_debits,
16643            result.balance_validation.total_credits
16644        );
16645    }
16646
16647    #[test]
16648    fn test_statistics_accuracy() {
16649        let config = create_test_config();
16650        let phase_config = PhaseConfig {
16651            generate_master_data: true,
16652            generate_document_flows: false,
16653            generate_journal_entries: true,
16654            inject_anomalies: false,
16655            show_progress: false,
16656            vendors_per_company: 10,
16657            customers_per_company: 20,
16658            materials_per_company: 15,
16659            assets_per_company: 5,
16660            employees_per_company: 8,
16661            ..Default::default()
16662        };
16663
16664        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16665        let result = orchestrator.generate().unwrap();
16666
16667        // Statistics should match actual data
16668        assert_eq!(
16669            result.statistics.vendor_count,
16670            result.master_data.vendors.len()
16671        );
16672        assert_eq!(
16673            result.statistics.customer_count,
16674            result.master_data.customers.len()
16675        );
16676        assert_eq!(
16677            result.statistics.material_count,
16678            result.master_data.materials.len()
16679        );
16680        assert_eq!(
16681            result.statistics.total_entries as usize,
16682            result.journal_entries.len()
16683        );
16684    }
16685
16686    #[test]
16687    fn test_phase_config_defaults() {
16688        let config = PhaseConfig::default();
16689        assert!(config.generate_master_data);
16690        assert!(config.generate_document_flows);
16691        assert!(config.generate_journal_entries);
16692        assert!(!config.inject_anomalies);
16693        assert!(config.validate_balances);
16694        assert!(config.show_progress);
16695        assert!(config.vendors_per_company > 0);
16696        assert!(config.customers_per_company > 0);
16697    }
16698
16699    #[test]
16700    fn test_get_coa_before_generation() {
16701        let config = create_test_config();
16702        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16703
16704        // Before generation, CoA should be None
16705        assert!(orchestrator.get_coa().is_none());
16706    }
16707
16708    #[test]
16709    fn test_get_coa_after_generation() {
16710        let config = create_test_config();
16711        let phase_config = PhaseConfig {
16712            generate_master_data: false,
16713            generate_document_flows: false,
16714            generate_journal_entries: true,
16715            inject_anomalies: false,
16716            show_progress: false,
16717            ..Default::default()
16718        };
16719
16720        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16721        let _ = orchestrator.generate().unwrap();
16722
16723        // After generation, CoA should be available
16724        assert!(orchestrator.get_coa().is_some());
16725    }
16726
16727    #[test]
16728    fn test_get_master_data() {
16729        let config = create_test_config();
16730        let phase_config = PhaseConfig {
16731            generate_master_data: true,
16732            generate_document_flows: false,
16733            generate_journal_entries: false,
16734            inject_anomalies: false,
16735            show_progress: false,
16736            vendors_per_company: 5,
16737            customers_per_company: 5,
16738            materials_per_company: 5,
16739            assets_per_company: 5,
16740            employees_per_company: 5,
16741            ..Default::default()
16742        };
16743
16744        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16745        let result = orchestrator.generate().unwrap();
16746
16747        // After generate(), master_data is moved into the result
16748        assert!(!result.master_data.vendors.is_empty());
16749    }
16750
16751    #[test]
16752    fn test_with_progress_builder() {
16753        let config = create_test_config();
16754        let orchestrator = EnhancedOrchestrator::with_defaults(config)
16755            .unwrap()
16756            .with_progress(false);
16757
16758        // Should still work without progress
16759        assert!(!orchestrator.phase_config.show_progress);
16760    }
16761
16762    #[test]
16763    fn test_multi_company_generation() {
16764        let mut config = create_test_config();
16765        config.companies.push(CompanyConfig {
16766            code: "2000".to_string(),
16767            name: "Subsidiary".to_string(),
16768            currency: "EUR".to_string(),
16769            functional_currency: None,
16770            country: "DE".to_string(),
16771            annual_transaction_volume: TransactionVolume::TenK,
16772            volume_weight: 0.5,
16773            fiscal_year_variant: "K4".to_string(),
16774        });
16775
16776        let phase_config = PhaseConfig {
16777            generate_master_data: true,
16778            generate_document_flows: false,
16779            generate_journal_entries: true,
16780            inject_anomalies: false,
16781            show_progress: false,
16782            vendors_per_company: 5,
16783            customers_per_company: 5,
16784            materials_per_company: 5,
16785            assets_per_company: 5,
16786            employees_per_company: 5,
16787            ..Default::default()
16788        };
16789
16790        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16791        let result = orchestrator.generate().unwrap();
16792
16793        // Should have master data for both companies
16794        assert!(result.statistics.vendor_count >= 10); // 5 per company
16795        assert!(result.statistics.customer_count >= 10);
16796        assert!(result.statistics.companies_count == 2);
16797    }
16798
16799    #[test]
16800    fn test_empty_master_data_skips_document_flows() {
16801        let config = create_test_config();
16802        let phase_config = PhaseConfig {
16803            generate_master_data: false,   // Skip master data
16804            generate_document_flows: true, // Try to generate flows
16805            generate_journal_entries: false,
16806            inject_anomalies: false,
16807            show_progress: false,
16808            ..Default::default()
16809        };
16810
16811        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16812        let result = orchestrator.generate().unwrap();
16813
16814        // Without master data, document flows should be empty
16815        assert!(result.document_flows.p2p_chains.is_empty());
16816        assert!(result.document_flows.o2c_chains.is_empty());
16817    }
16818
16819    #[test]
16820    fn test_journal_entry_line_item_count() {
16821        let config = create_test_config();
16822        let phase_config = PhaseConfig {
16823            generate_master_data: false,
16824            generate_document_flows: false,
16825            generate_journal_entries: true,
16826            inject_anomalies: false,
16827            show_progress: false,
16828            ..Default::default()
16829        };
16830
16831        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16832        let result = orchestrator.generate().unwrap();
16833
16834        // Total line items should match sum of all entry line counts
16835        let calculated_line_items: u64 = result
16836            .journal_entries
16837            .iter()
16838            .map(|e| e.line_count() as u64)
16839            .sum();
16840        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16841    }
16842
16843    #[test]
16844    fn test_audit_generation() {
16845        let config = create_test_config();
16846        let phase_config = PhaseConfig {
16847            generate_master_data: false,
16848            generate_document_flows: false,
16849            generate_journal_entries: true,
16850            inject_anomalies: false,
16851            show_progress: false,
16852            generate_audit: true,
16853            audit_engagements: 2,
16854            workpapers_per_engagement: 5,
16855            evidence_per_workpaper: 2,
16856            risks_per_engagement: 3,
16857            findings_per_engagement: 2,
16858            judgments_per_engagement: 2,
16859            ..Default::default()
16860        };
16861
16862        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16863        let result = orchestrator.generate().unwrap();
16864
16865        // Should have generated audit data
16866        assert_eq!(result.audit.engagements.len(), 2);
16867        assert!(!result.audit.workpapers.is_empty());
16868        assert!(!result.audit.evidence.is_empty());
16869        assert!(!result.audit.risk_assessments.is_empty());
16870        assert!(!result.audit.findings.is_empty());
16871        assert!(!result.audit.judgments.is_empty());
16872
16873        // New ISA entity collections should also be populated
16874        assert!(
16875            !result.audit.confirmations.is_empty(),
16876            "ISA 505 confirmations should be generated"
16877        );
16878        assert!(
16879            !result.audit.confirmation_responses.is_empty(),
16880            "ISA 505 confirmation responses should be generated"
16881        );
16882        assert!(
16883            !result.audit.procedure_steps.is_empty(),
16884            "ISA 330 procedure steps should be generated"
16885        );
16886        // Samples may or may not be generated depending on workpaper sampling methods
16887        assert!(
16888            !result.audit.analytical_results.is_empty(),
16889            "ISA 520 analytical procedures should be generated"
16890        );
16891        assert!(
16892            !result.audit.ia_functions.is_empty(),
16893            "ISA 610 IA functions should be generated (one per engagement)"
16894        );
16895        assert!(
16896            !result.audit.related_parties.is_empty(),
16897            "ISA 550 related parties should be generated"
16898        );
16899
16900        // Statistics should match
16901        assert_eq!(
16902            result.statistics.audit_engagement_count,
16903            result.audit.engagements.len()
16904        );
16905        assert_eq!(
16906            result.statistics.audit_workpaper_count,
16907            result.audit.workpapers.len()
16908        );
16909        assert_eq!(
16910            result.statistics.audit_evidence_count,
16911            result.audit.evidence.len()
16912        );
16913        assert_eq!(
16914            result.statistics.audit_risk_count,
16915            result.audit.risk_assessments.len()
16916        );
16917        assert_eq!(
16918            result.statistics.audit_finding_count,
16919            result.audit.findings.len()
16920        );
16921        assert_eq!(
16922            result.statistics.audit_judgment_count,
16923            result.audit.judgments.len()
16924        );
16925        assert_eq!(
16926            result.statistics.audit_confirmation_count,
16927            result.audit.confirmations.len()
16928        );
16929        assert_eq!(
16930            result.statistics.audit_confirmation_response_count,
16931            result.audit.confirmation_responses.len()
16932        );
16933        assert_eq!(
16934            result.statistics.audit_procedure_step_count,
16935            result.audit.procedure_steps.len()
16936        );
16937        assert_eq!(
16938            result.statistics.audit_sample_count,
16939            result.audit.samples.len()
16940        );
16941        assert_eq!(
16942            result.statistics.audit_analytical_result_count,
16943            result.audit.analytical_results.len()
16944        );
16945        assert_eq!(
16946            result.statistics.audit_ia_function_count,
16947            result.audit.ia_functions.len()
16948        );
16949        assert_eq!(
16950            result.statistics.audit_ia_report_count,
16951            result.audit.ia_reports.len()
16952        );
16953        assert_eq!(
16954            result.statistics.audit_related_party_count,
16955            result.audit.related_parties.len()
16956        );
16957        assert_eq!(
16958            result.statistics.audit_related_party_transaction_count,
16959            result.audit.related_party_transactions.len()
16960        );
16961    }
16962
16963    #[test]
16964    fn test_new_phases_disabled_by_default() {
16965        let config = create_test_config();
16966        // Verify new config fields default to disabled
16967        assert!(!config.llm.enabled);
16968        assert!(!config.diffusion.enabled);
16969        assert!(!config.causal.enabled);
16970
16971        let phase_config = PhaseConfig {
16972            generate_master_data: false,
16973            generate_document_flows: false,
16974            generate_journal_entries: true,
16975            inject_anomalies: false,
16976            show_progress: false,
16977            ..Default::default()
16978        };
16979
16980        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16981        let result = orchestrator.generate().unwrap();
16982
16983        // All new phase statistics should be zero when disabled
16984        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16985        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16986        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16987        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16988        assert_eq!(result.statistics.causal_generation_ms, 0);
16989        assert_eq!(result.statistics.causal_samples_generated, 0);
16990        assert!(result.statistics.causal_validation_passed.is_none());
16991        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16992        assert!(result.counterfactual_pairs.is_empty());
16993    }
16994
16995    #[test]
16996    fn test_counterfactual_generation_enabled() {
16997        let config = create_test_config();
16998        let phase_config = PhaseConfig {
16999            generate_master_data: false,
17000            generate_document_flows: false,
17001            generate_journal_entries: true,
17002            inject_anomalies: false,
17003            show_progress: false,
17004            generate_counterfactuals: true,
17005            generate_period_close: false, // Disable so entry count matches counterfactual pairs
17006            ..Default::default()
17007        };
17008
17009        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17010        let result = orchestrator.generate().unwrap();
17011
17012        // With JE generation enabled, counterfactual pairs should be generated
17013        if !result.journal_entries.is_empty() {
17014            assert_eq!(
17015                result.counterfactual_pairs.len(),
17016                result.journal_entries.len()
17017            );
17018            assert_eq!(
17019                result.statistics.counterfactual_pair_count,
17020                result.journal_entries.len()
17021            );
17022            // Each pair should have a distinct pair_id
17023            let ids: std::collections::HashSet<_> = result
17024                .counterfactual_pairs
17025                .iter()
17026                .map(|p| p.pair_id.clone())
17027                .collect();
17028            assert_eq!(ids.len(), result.counterfactual_pairs.len());
17029        }
17030    }
17031
17032    #[test]
17033    fn test_llm_enrichment_enabled() {
17034        let mut config = create_test_config();
17035        config.llm.enabled = true;
17036        config.llm.max_vendor_enrichments = 3;
17037
17038        let phase_config = PhaseConfig {
17039            generate_master_data: true,
17040            generate_document_flows: false,
17041            generate_journal_entries: false,
17042            inject_anomalies: false,
17043            show_progress: false,
17044            vendors_per_company: 5,
17045            customers_per_company: 3,
17046            materials_per_company: 3,
17047            assets_per_company: 3,
17048            employees_per_company: 3,
17049            ..Default::default()
17050        };
17051
17052        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17053        let result = orchestrator.generate().unwrap();
17054
17055        // LLM enrichment should have run
17056        assert!(result.statistics.llm_vendors_enriched > 0);
17057        assert!(result.statistics.llm_vendors_enriched <= 3);
17058    }
17059
17060    #[test]
17061    fn test_diffusion_enhancement_enabled() {
17062        let mut config = create_test_config();
17063        config.diffusion.enabled = true;
17064        config.diffusion.n_steps = 50;
17065        config.diffusion.sample_size = 20;
17066
17067        let phase_config = PhaseConfig {
17068            generate_master_data: false,
17069            generate_document_flows: false,
17070            generate_journal_entries: true,
17071            inject_anomalies: false,
17072            show_progress: false,
17073            ..Default::default()
17074        };
17075
17076        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17077        let result = orchestrator.generate().unwrap();
17078
17079        // Diffusion phase should have generated samples
17080        assert_eq!(result.statistics.diffusion_samples_generated, 20);
17081    }
17082
17083    #[test]
17084    fn test_causal_overlay_enabled() {
17085        let mut config = create_test_config();
17086        config.causal.enabled = true;
17087        config.causal.template = "fraud_detection".to_string();
17088        config.causal.sample_size = 100;
17089        config.causal.validate = true;
17090
17091        let phase_config = PhaseConfig {
17092            generate_master_data: false,
17093            generate_document_flows: false,
17094            generate_journal_entries: true,
17095            inject_anomalies: false,
17096            show_progress: false,
17097            ..Default::default()
17098        };
17099
17100        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17101        let result = orchestrator.generate().unwrap();
17102
17103        // Causal phase should have generated samples
17104        assert_eq!(result.statistics.causal_samples_generated, 100);
17105        // Validation should have run
17106        assert!(result.statistics.causal_validation_passed.is_some());
17107    }
17108
17109    #[test]
17110    fn test_causal_overlay_revenue_cycle_template() {
17111        let mut config = create_test_config();
17112        config.causal.enabled = true;
17113        config.causal.template = "revenue_cycle".to_string();
17114        config.causal.sample_size = 50;
17115        config.causal.validate = false;
17116
17117        let phase_config = PhaseConfig {
17118            generate_master_data: false,
17119            generate_document_flows: false,
17120            generate_journal_entries: true,
17121            inject_anomalies: false,
17122            show_progress: false,
17123            ..Default::default()
17124        };
17125
17126        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17127        let result = orchestrator.generate().unwrap();
17128
17129        // Causal phase should have generated samples
17130        assert_eq!(result.statistics.causal_samples_generated, 50);
17131        // Validation was disabled
17132        assert!(result.statistics.causal_validation_passed.is_none());
17133    }
17134
17135    #[test]
17136    fn test_all_new_phases_enabled_together() {
17137        let mut config = create_test_config();
17138        config.llm.enabled = true;
17139        config.llm.max_vendor_enrichments = 2;
17140        config.diffusion.enabled = true;
17141        config.diffusion.n_steps = 20;
17142        config.diffusion.sample_size = 10;
17143        config.causal.enabled = true;
17144        config.causal.sample_size = 50;
17145        config.causal.validate = true;
17146
17147        let phase_config = PhaseConfig {
17148            generate_master_data: true,
17149            generate_document_flows: false,
17150            generate_journal_entries: true,
17151            inject_anomalies: false,
17152            show_progress: false,
17153            vendors_per_company: 5,
17154            customers_per_company: 3,
17155            materials_per_company: 3,
17156            assets_per_company: 3,
17157            employees_per_company: 3,
17158            ..Default::default()
17159        };
17160
17161        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17162        let result = orchestrator.generate().unwrap();
17163
17164        // All three phases should have run
17165        assert!(result.statistics.llm_vendors_enriched > 0);
17166        assert_eq!(result.statistics.diffusion_samples_generated, 10);
17167        assert_eq!(result.statistics.causal_samples_generated, 50);
17168        assert!(result.statistics.causal_validation_passed.is_some());
17169    }
17170
17171    #[test]
17172    fn test_statistics_serialization_with_new_fields() {
17173        let stats = EnhancedGenerationStatistics {
17174            total_entries: 100,
17175            total_line_items: 500,
17176            llm_enrichment_ms: 42,
17177            llm_vendors_enriched: 10,
17178            diffusion_enhancement_ms: 100,
17179            diffusion_samples_generated: 50,
17180            causal_generation_ms: 200,
17181            causal_samples_generated: 100,
17182            causal_validation_passed: Some(true),
17183            ..Default::default()
17184        };
17185
17186        let json = serde_json::to_string(&stats).unwrap();
17187        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17188
17189        assert_eq!(deserialized.llm_enrichment_ms, 42);
17190        assert_eq!(deserialized.llm_vendors_enriched, 10);
17191        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17192        assert_eq!(deserialized.diffusion_samples_generated, 50);
17193        assert_eq!(deserialized.causal_generation_ms, 200);
17194        assert_eq!(deserialized.causal_samples_generated, 100);
17195        assert_eq!(deserialized.causal_validation_passed, Some(true));
17196    }
17197
17198    #[test]
17199    fn test_statistics_backward_compat_deserialization() {
17200        // Old JSON without the new fields should still deserialize
17201        let old_json = r#"{
17202            "total_entries": 100,
17203            "total_line_items": 500,
17204            "accounts_count": 50,
17205            "companies_count": 1,
17206            "period_months": 12,
17207            "vendor_count": 10,
17208            "customer_count": 20,
17209            "material_count": 15,
17210            "asset_count": 5,
17211            "employee_count": 8,
17212            "p2p_chain_count": 5,
17213            "o2c_chain_count": 5,
17214            "ap_invoice_count": 5,
17215            "ar_invoice_count": 5,
17216            "ocpm_event_count": 0,
17217            "ocpm_object_count": 0,
17218            "ocpm_case_count": 0,
17219            "audit_engagement_count": 0,
17220            "audit_workpaper_count": 0,
17221            "audit_evidence_count": 0,
17222            "audit_risk_count": 0,
17223            "audit_finding_count": 0,
17224            "audit_judgment_count": 0,
17225            "anomalies_injected": 0,
17226            "data_quality_issues": 0,
17227            "banking_customer_count": 0,
17228            "banking_account_count": 0,
17229            "banking_transaction_count": 0,
17230            "banking_suspicious_count": 0,
17231            "graph_export_count": 0,
17232            "graph_node_count": 0,
17233            "graph_edge_count": 0
17234        }"#;
17235
17236        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17237
17238        // New fields should default to 0 / None
17239        assert_eq!(stats.llm_enrichment_ms, 0);
17240        assert_eq!(stats.llm_vendors_enriched, 0);
17241        assert_eq!(stats.diffusion_enhancement_ms, 0);
17242        assert_eq!(stats.diffusion_samples_generated, 0);
17243        assert_eq!(stats.causal_generation_ms, 0);
17244        assert_eq!(stats.causal_samples_generated, 0);
17245        assert!(stats.causal_validation_passed.is_none());
17246    }
17247
17248    // ── v5.33 #162 — framework-aware TB classification ──────────────────────
17249
17250    #[test]
17251    fn category_from_account_code_us_gaap_unchanged() {
17252        // US-style numbering — same answers as the pre-v5.33 hard-coded table.
17253        assert_eq!(
17254            EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17255            "Cash"
17256        );
17257        assert_eq!(
17258            EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17259            "FixedAssets"
17260        );
17261        assert_eq!(
17262            EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17263            "Revenue"
17264        );
17265        assert_eq!(
17266            EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17267            "OperatingExpenses"
17268        );
17269    }
17270
17271    #[test]
17272    fn category_from_account_code_skr04_german() {
17273        // SKR04 (German GAAP): 0xxx = fixed assets, 4xxx = revenue,
17274        // 8xxx = tax/extraordinary expense — pre-v5.33 the US-only table
17275        // mis-classified 0xxx as OperatingExpenses (default arm), 4xxx as
17276        // Revenue (accidentally correct), and 8xxx as OtherExpenses.
17277        // Framework-aware version routes them correctly.
17278        assert_eq!(
17279            EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17280            "FixedAssets",
17281            "SKR 0xxx must be classified as fixed assets, not P&L"
17282        );
17283        assert_eq!(
17284            EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17285            "Cash"
17286        );
17287        assert_eq!(
17288            EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17289            "Receivables"
17290        );
17291        assert_eq!(
17292            EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17293            "Equity"
17294        );
17295        assert_eq!(
17296            EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17297            "Payables"
17298        );
17299        assert_eq!(
17300            EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17301            "Revenue"
17302        );
17303        assert_eq!(
17304            EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17305            "CostOfSales"
17306        );
17307        assert_eq!(
17308            EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17309            "OtherExpenses"
17310        );
17311    }
17312
17313    #[test]
17314    fn category_from_account_code_pcg_french() {
17315        // PCG (French GAAP): 2 = fixed assets, 5 = cash, 6 = expenses,
17316        // 7 = revenue. Pre-v5.33 these all hit the wrong US-prefix arms.
17317        assert_eq!(
17318            EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17319            "FixedAssets"
17320        );
17321        assert_eq!(
17322            EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17323            "Receivables"
17324        );
17325        assert_eq!(
17326            EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17327            "Payables"
17328        );
17329        assert_eq!(
17330            EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17331            "Cash"
17332        );
17333        assert_eq!(
17334            EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17335            "OperatingExpenses"
17336        );
17337        assert_eq!(
17338            EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17339            "Revenue"
17340        );
17341        assert_eq!(
17342            EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17343            "Equity"
17344        );
17345    }
17346
17347    #[test]
17348    fn is_balance_sheet_account_routes_skr_correctly() {
17349        // SKR04: 0xxx fixed assets, 1xxx current assets, 2xxx equity,
17350        // 3xxx liabilities → all BS.  4xxx revenue, 5-6 expenses → P&L.
17351        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17352            "0010",
17353            "german_gaap"
17354        ));
17355        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17356            "1200",
17357            "german_gaap"
17358        ));
17359        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17360            "2000",
17361            "german_gaap"
17362        ));
17363        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17364            "3000",
17365            "german_gaap"
17366        ));
17367        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17368            "4000",
17369            "german_gaap"
17370        ));
17371        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17372            "6000",
17373            "german_gaap"
17374        ));
17375    }
17376
17377    #[test]
17378    fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17379        // Defect C regression test — every TB line was hard-coded
17380        // `account_type: Asset` regardless of the underlying code. With
17381        // the framework-aware classifier wired in, the same SKR codes
17382        // resolve to their proper sides.
17383        use datasynth_generators::TrialBalanceEntry;
17384        let entries = vec![
17385            TrialBalanceEntry {
17386                account_code: "0010".to_string(), // SKR fixed asset
17387                account_name: "Land".to_string(),
17388                category: "FixedAssets".to_string(),
17389                debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17390                credit_balance: rust_decimal::Decimal::ZERO,
17391            },
17392            TrialBalanceEntry {
17393                account_code: "3000".to_string(), // SKR liability
17394                account_name: "Trade payables".to_string(),
17395                category: "Payables".to_string(),
17396                debit_balance: rust_decimal::Decimal::ZERO,
17397                credit_balance: rust_decimal::Decimal::new(500_000, 0),
17398            },
17399            TrialBalanceEntry {
17400                account_code: "4000".to_string(), // SKR revenue
17401                account_name: "Sales".to_string(),
17402                category: "Revenue".to_string(),
17403                debit_balance: rust_decimal::Decimal::ZERO,
17404                credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17405            },
17406            TrialBalanceEntry {
17407                account_code: "6000".to_string(), // SKR expense
17408                account_name: "Personnel cost".to_string(),
17409                category: "OperatingExpenses".to_string(),
17410                debit_balance: rust_decimal::Decimal::new(800_000, 0),
17411                credit_balance: rust_decimal::Decimal::ZERO,
17412            },
17413        ];
17414        let ptb = PeriodTrialBalance {
17415            fiscal_year: 2024,
17416            fiscal_period: 12,
17417            period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17418            period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17419            entries,
17420            framework: "german_gaap".to_string(),
17421        };
17422        let tb = ptb.into_canonical("ACME_EU", "EUR");
17423        // Line account_types are no longer all-Asset.
17424        let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17425        assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17426        assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17427        assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17428        assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17429        // is_balanced is now an unconditional truth claim — the
17430        // underlying JE-balance invariant is the only one we guarantee.
17431        assert!(tb.is_balanced);
17432        assert!(tb.is_equation_valid);
17433        assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17434        assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17435    }
17436
17437    #[test]
17438    fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17439        // Old in-memory snapshots (pre-v5.33) didn't carry the framework
17440        // field. Serde `#[serde(default)]` must let them round-trip with
17441        // a `"us_gaap"` fallback so older saved sessions keep working.
17442        let legacy_json = r#"{
17443            "fiscal_year": 2024,
17444            "fiscal_period": 12,
17445            "period_start": "2024-01-01",
17446            "period_end": "2024-12-31",
17447            "entries": []
17448        }"#;
17449        let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17450        assert_eq!(ptb.framework, "us_gaap");
17451    }
17452}