Skip to main content

datasynth_runtime/
enhanced_orchestrator.rs

1//! Enhanced generation orchestrator with full feature integration.
2//!
3//! This orchestrator coordinates all generation phases:
4//! 1. Chart of Accounts generation
5//! 2. Master data generation (vendors, customers, materials, assets, employees)
6//! 3. Document flow generation (P2P, O2C) + subledger linking + OCPM events
7//! 4. Journal entry generation
8//! 5. Anomaly injection
9//! 6. Balance validation
10//! 7. Data quality injection
11//! 8. Audit data generation (engagements, workpapers, evidence, risks, findings, judgments)
12//! 9. Banking KYC/AML data generation (customers, accounts, transactions, typologies)
13//! 10. Graph export (accounting network for ML training and network reconstruction)
14//! 11. LLM enrichment (AI-augmented vendor names, descriptions)
15//! 12. Diffusion enhancement (statistical diffusion-based sample generation)
16//! 13. Causal overlay (structural causal model generation and validation)
17//! 14. Source-to-Contract (S2C) sourcing data generation
18//! 15. Bank reconciliation generation
19//! 16. Financial statement generation
20//! 25. Counterfactual pair generation (ML training)
21
22use std::collections::HashMap;
23use std::path::PathBuf;
24use std::sync::Arc;
25
26use chrono::{Datelike, NaiveDate};
27use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
28use rand::SeedableRng;
29use serde::{Deserialize, Serialize};
30use tracing::{debug, info, warn};
31
32use datasynth_banking::{
33    models::{BankAccount, BankTransaction, BankingCustomer, CustomerName},
34    BankingOrchestratorBuilder,
35};
36use datasynth_config::schema::GeneratorConfig;
37use datasynth_core::error::{SynthError, SynthResult};
38use datasynth_core::models::audit::{
39    AnalyticalProcedureResult, AuditEngagement, AuditEvidence, AuditFinding, AuditProcedureStep,
40    AuditSample, ComponentAuditor, ComponentAuditorReport, ComponentInstruction,
41    ConfirmationResponse, EngagementLetter, ExternalConfirmation, GroupAuditPlan,
42    InternalAuditFunction, InternalAuditReport, ProfessionalJudgment, RelatedParty,
43    RelatedPartyTransaction, RiskAssessment, ServiceOrganization, SocReport, SubsequentEvent,
44    UserEntityControl, Workpaper,
45};
46use datasynth_core::models::sourcing::{
47    BidEvaluation, CatalogItem, ProcurementContract, RfxEvent, SourcingProject, SpendAnalysis,
48    SupplierBid, SupplierQualification, SupplierScorecard,
49};
50use datasynth_core::models::subledger::ap::{APAgingReport, APInvoice};
51use datasynth_core::models::subledger::ar::{ARAgingReport, ARInvoice};
52use datasynth_core::models::*;
53use datasynth_core::traits::Generator;
54use datasynth_core::{DegradationActions, DegradationLevel, ResourceGuard, ResourceGuardBuilder};
55use datasynth_fingerprint::{
56    io::FingerprintReader,
57    models::Fingerprint,
58    synthesis::{ConfigSynthesizer, CopulaGeneratorSpec, SynthesisOptions},
59};
60use datasynth_generators::{
61    // Subledger linker + settlement
62    apply_ap_settlements,
63    apply_ar_settlements,
64    // Opening balance → JE conversion
65    opening_balance_to_jes,
66    // Anomaly injection
67    AnomalyInjector,
68    AnomalyInjectorConfig,
69    AssetGenerator,
70    // Audit generators
71    AuditEngagementGenerator,
72    BalanceTrackerConfig,
73    // Bank reconciliation generator
74    BankReconciliationGenerator,
75    // S2C sourcing generators
76    BidEvaluationGenerator,
77    BidGenerator,
78    // Business combination generator (IFRS 3 / ASC 805)
79    BusinessCombinationGenerator,
80    CatalogGenerator,
81    // Core generators
82    ChartOfAccountsGenerator,
83    // Consolidation generator
84    ConsolidationGenerator,
85    ContractGenerator,
86    // Control generator
87    ControlGenerator,
88    ControlGeneratorConfig,
89    CustomerGenerator,
90    DataQualityConfig,
91    // Data quality
92    DataQualityInjector,
93    DataQualityStats,
94    // Document flow JE generator
95    DocumentFlowJeConfig,
96    DocumentFlowJeGenerator,
97    DocumentFlowLinker,
98    // Expected Credit Loss generator (IFRS 9 / ASC 326)
99    EclGenerator,
100    EmployeeGenerator,
101    EnhancedInjectionConfig,
102    // ESG anomaly labels
103    EsgAnomalyLabel,
104    EvidenceGenerator,
105    // Subledger depreciation schedule generator
106    FaDepreciationScheduleConfig,
107    FaDepreciationScheduleGenerator,
108    // Financial statement generator
109    FinancialStatementGenerator,
110    FindingGenerator,
111    // Inventory valuation generator
112    InventoryValuationGenerator,
113    InventoryValuationGeneratorConfig,
114    JournalEntryGenerator,
115    JudgmentGenerator,
116    LatePaymentDistribution,
117    // Manufacturing cost accounting + warranty provisions
118    ManufacturingCostAccounting,
119    MaterialGenerator,
120    O2CDocumentChain,
121    O2CGenerator,
122    O2CGeneratorConfig,
123    O2CPaymentBehavior,
124    P2PDocumentChain,
125    // Document flow generators
126    P2PGenerator,
127    P2PGeneratorConfig,
128    P2PPaymentBehavior,
129    PaymentReference,
130    // Provisions and contingencies generator (IAS 37 / ASC 450)
131    ProvisionGenerator,
132    QualificationGenerator,
133    RfxGenerator,
134    RiskAssessmentGenerator,
135    // Balance validation
136    RunningBalanceTracker,
137    ScorecardGenerator,
138    // Segment reporting generator (IFRS 8 / ASC 280)
139    SegmentGenerator,
140    SegmentSeed,
141    SourcingProjectGenerator,
142    SpendAnalysisGenerator,
143    ValidationError,
144    // Master data generators
145    VendorGenerator,
146    WarrantyProvisionGenerator,
147    WorkpaperGenerator,
148};
149use datasynth_graph::{
150    ApprovalGraphBuilder, ApprovalGraphConfig, BankingGraphBuilder, BankingGraphConfig,
151    EntityGraphBuilder, EntityGraphConfig, PyGExportConfig, PyGExporter, TransactionGraphBuilder,
152    TransactionGraphConfig,
153};
154use datasynth_ocpm::{
155    AuditDocuments, BankDocuments, BankReconDocuments, EventLogMetadata, H2rDocuments,
156    MfgDocuments, O2cDocuments, OcpmEventGenerator, OcpmEventLog, OcpmGeneratorConfig,
157    OcpmUuidFactory, P2pDocuments, S2cDocuments,
158};
159
160use datasynth_config::schema::{O2CFlowConfig, P2PFlowConfig};
161use datasynth_core::causal::{CausalGraph, CausalValidator, StructuralCausalModel};
162use datasynth_core::diffusion::{DiffusionBackend, DiffusionConfig, StatisticalDiffusionBackend};
163use datasynth_core::llm::{HttpLlmProvider, MockLlmProvider};
164use datasynth_core::models::balance::{
165    AccountCategory, AccountType, GeneratedOpeningBalance, IndustryType, OpeningBalanceSpec,
166    TrialBalance, TrialBalanceLine, TrialBalanceStatus, TrialBalanceType,
167};
168use datasynth_core::models::documents::PaymentMethod;
169use datasynth_core::models::IndustrySector;
170use datasynth_generators::audit::analytical_procedure_generator::AnalyticalProcedureGenerator;
171use datasynth_generators::audit::component_audit_generator::ComponentAuditGenerator;
172use datasynth_generators::audit::confirmation_generator::ConfirmationGenerator;
173use datasynth_generators::audit::engagement_letter_generator::EngagementLetterGenerator;
174use datasynth_generators::audit::internal_audit_generator::InternalAuditGenerator;
175use datasynth_generators::audit::procedure_step_generator::ProcedureStepGenerator;
176use datasynth_generators::audit::related_party_generator::RelatedPartyGenerator;
177use datasynth_generators::audit::sample_generator::SampleGenerator;
178use datasynth_generators::audit::service_org_generator::ServiceOrgGenerator;
179use datasynth_generators::audit::subsequent_event_generator::SubsequentEventGenerator;
180use datasynth_generators::coa_generator::CoAFramework;
181use rayon::prelude::*;
182use rust_decimal::Decimal;
183
184// ============================================================================
185// Configuration Conversion Functions
186// ============================================================================
187
188/// Convert P2P flow config from schema to generator config.
189/// v4.4.1 — build a `DataQualityStats` with only `total_records`
190/// populated to `n_entries`. Used when the data-quality phase is
191/// skipped (by config or resource pressure) so downstream consumers
192/// can still see the denominator. Before v4.4.1 the writer emitted
193/// `total_records: 0` in those cases, which the SDK team flagged as
194/// indistinguishable from "ran but processed nothing".
195fn stats_with_denominator(n_entries: usize) -> DataQualityStats {
196    #[allow(clippy::field_reassign_with_default)]
197    {
198        let mut s = DataQualityStats::default();
199        s.total_records = n_entries;
200        s.missing_values.total_records = n_entries;
201        s.format_variations.total_processed = n_entries;
202        s.duplicates.total_processed = n_entries;
203        s
204    }
205}
206
207fn convert_p2p_config(schema_config: &P2PFlowConfig) -> P2PGeneratorConfig {
208    let payment_behavior = &schema_config.payment_behavior;
209    let late_dist = &payment_behavior.late_payment_days_distribution;
210
211    P2PGeneratorConfig {
212        three_way_match_rate: schema_config.three_way_match_rate,
213        partial_delivery_rate: schema_config.partial_delivery_rate,
214        over_delivery_rate: schema_config.over_delivery_rate.unwrap_or(0.02),
215        price_variance_rate: schema_config.price_variance_rate,
216        max_price_variance_percent: schema_config.max_price_variance_percent,
217        avg_days_po_to_gr: schema_config.average_po_to_gr_days,
218        avg_days_gr_to_invoice: schema_config.average_gr_to_invoice_days,
219        avg_days_invoice_to_payment: schema_config.average_invoice_to_payment_days,
220        payment_method_distribution: vec![
221            (PaymentMethod::BankTransfer, 0.60),
222            (PaymentMethod::Check, 0.25),
223            (PaymentMethod::Wire, 0.10),
224            (PaymentMethod::CreditCard, 0.05),
225        ],
226        early_payment_discount_rate: schema_config.early_payment_discount_rate.unwrap_or(0.30),
227        payment_behavior: P2PPaymentBehavior {
228            late_payment_rate: payment_behavior.late_payment_rate,
229            late_payment_distribution: LatePaymentDistribution {
230                slightly_late_1_to_7: late_dist.slightly_late_1_to_7,
231                late_8_to_14: late_dist.late_8_to_14,
232                very_late_15_to_30: late_dist.very_late_15_to_30,
233                severely_late_31_to_60: late_dist.severely_late_31_to_60,
234                extremely_late_over_60: late_dist.extremely_late_over_60,
235            },
236            partial_payment_rate: payment_behavior.partial_payment_rate,
237            payment_correction_rate: payment_behavior.payment_correction_rate,
238            avg_days_until_remainder: payment_behavior.avg_days_until_remainder,
239        },
240    }
241}
242
243/// Convert O2C flow config from schema to generator config.
244fn convert_o2c_config(schema_config: &O2CFlowConfig) -> O2CGeneratorConfig {
245    let payment_behavior = &schema_config.payment_behavior;
246
247    O2CGeneratorConfig {
248        credit_check_failure_rate: schema_config.credit_check_failure_rate,
249        partial_shipment_rate: schema_config.partial_shipment_rate,
250        avg_days_so_to_delivery: schema_config.average_so_to_delivery_days,
251        avg_days_delivery_to_invoice: schema_config.average_delivery_to_invoice_days,
252        avg_days_invoice_to_payment: schema_config.average_invoice_to_receipt_days,
253        late_payment_rate: schema_config.late_payment_rate.unwrap_or(0.15),
254        bad_debt_rate: schema_config.bad_debt_rate,
255        returns_rate: schema_config.return_rate,
256        cash_discount_take_rate: schema_config.cash_discount.taken_rate,
257        payment_method_distribution: vec![
258            (PaymentMethod::BankTransfer, 0.50),
259            (PaymentMethod::Check, 0.30),
260            (PaymentMethod::Wire, 0.15),
261            (PaymentMethod::CreditCard, 0.05),
262        ],
263        payment_behavior: O2CPaymentBehavior {
264            partial_payment_rate: payment_behavior.partial_payments.rate,
265            short_payment_rate: payment_behavior.short_payments.rate,
266            max_short_percent: payment_behavior.short_payments.max_short_percent,
267            on_account_rate: payment_behavior.on_account_payments.rate,
268            payment_correction_rate: payment_behavior.payment_corrections.rate,
269            avg_days_until_remainder: payment_behavior.partial_payments.avg_days_until_remainder,
270        },
271    }
272}
273
274/// Configuration for which generation phases to run.
275#[derive(Debug, Clone)]
276pub struct PhaseConfig {
277    /// Generate master data (vendors, customers, materials, assets, employees).
278    pub generate_master_data: bool,
279    /// Generate document flows (P2P, O2C).
280    pub generate_document_flows: bool,
281    /// Generate OCPM events from document flows.
282    pub generate_ocpm_events: bool,
283    /// Generate journal entries.
284    pub generate_journal_entries: bool,
285    /// Inject anomalies.
286    pub inject_anomalies: bool,
287    /// Inject data quality variations (typos, missing values, format variations).
288    pub inject_data_quality: bool,
289    /// Validate balance sheet equation after generation.
290    pub validate_balances: bool,
291    /// Validate that every `gl_account` referenced in generated JEs exists
292    /// in the chart of accounts. Off by default (a soft warning is emitted
293    /// instead). Set true to fail the run on any orphan account.
294    pub validate_coa_coverage_strict: bool,
295    /// Show progress bars.
296    pub show_progress: bool,
297    /// Number of vendors to generate per company.
298    pub vendors_per_company: usize,
299    /// Number of customers to generate per company.
300    pub customers_per_company: usize,
301    /// Number of materials to generate per company.
302    pub materials_per_company: usize,
303    /// Number of assets to generate per company.
304    pub assets_per_company: usize,
305    /// Number of employees to generate per company.
306    pub employees_per_company: usize,
307    /// Number of P2P chains to generate.
308    pub p2p_chains: usize,
309    /// Number of O2C chains to generate.
310    pub o2c_chains: usize,
311    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
312    pub generate_audit: bool,
313    /// Number of audit engagements to generate.
314    pub audit_engagements: usize,
315    /// Number of workpapers per engagement.
316    pub workpapers_per_engagement: usize,
317    /// Number of evidence items per workpaper.
318    pub evidence_per_workpaper: usize,
319    /// Number of risk assessments per engagement.
320    pub risks_per_engagement: usize,
321    /// Number of findings per engagement.
322    pub findings_per_engagement: usize,
323    /// Number of professional judgments per engagement.
324    pub judgments_per_engagement: usize,
325    /// Generate banking KYC/AML data (customers, accounts, transactions, typologies).
326    pub generate_banking: bool,
327    /// Generate graph exports (accounting network for ML training).
328    pub generate_graph_export: bool,
329    /// Generate S2C sourcing data (spend analysis, RFx, bids, contracts, catalogs, scorecards).
330    pub generate_sourcing: bool,
331    /// Generate bank reconciliations from payments.
332    pub generate_bank_reconciliation: bool,
333    /// Generate financial statements from trial balances.
334    pub generate_financial_statements: bool,
335    /// Generate accounting standards data (revenue recognition, impairment).
336    pub generate_accounting_standards: bool,
337    /// Generate manufacturing data (production orders, quality inspections, cycle counts).
338    pub generate_manufacturing: bool,
339    /// Generate sales quotes, management KPIs, and budgets.
340    pub generate_sales_kpi_budgets: bool,
341    /// Generate tax jurisdictions and tax codes.
342    pub generate_tax: bool,
343    /// Generate ESG data (emissions, energy, water, waste, social, governance).
344    pub generate_esg: bool,
345    /// Generate intercompany transactions and eliminations.
346    pub generate_intercompany: bool,
347    /// Generate process evolution and organizational events.
348    pub generate_evolution_events: bool,
349    /// Generate counterfactual (original, mutated) JE pairs for ML training.
350    pub generate_counterfactuals: bool,
351    /// Generate compliance regulations data (standards registry, procedures, findings, filings).
352    pub generate_compliance_regulations: bool,
353    /// Generate period-close journal entries (tax provision, income statement close).
354    pub generate_period_close: bool,
355    /// Generate HR data (payroll, time entries, expenses, pensions, stock comp).
356    pub generate_hr: bool,
357    /// Generate treasury data (cash management, hedging, debt, pooling).
358    pub generate_treasury: bool,
359    /// Generate project accounting data (projects, costs, revenue, EVM, milestones).
360    pub generate_project_accounting: bool,
361    /// v3.3.0: generate legal documents per engagement (engagement letters,
362    /// management rep letters, legal opinions, regulatory filings,
363    /// board resolutions). Gated by `compliance_regulations.legal_documents.enabled`.
364    pub generate_legal_documents: bool,
365    /// v3.3.0: generate IT general controls (access logs, change
366    /// management records) per audit engagement. Gated by
367    /// `audit.it_controls.enabled`.
368    pub generate_it_controls: bool,
369    /// v3.3.0: run the analytics-metadata phase after all JE-adding
370    /// phases. Wires PriorYearGenerator / IndustryBenchmarkGenerator /
371    /// ManagementReportGenerator / DriftEventGenerator. Gated by the
372    /// top-level `analytics_metadata.enabled` config flag.
373    pub generate_analytics_metadata: bool,
374}
375
376impl Default for PhaseConfig {
377    fn default() -> Self {
378        Self {
379            generate_master_data: true,
380            generate_document_flows: true,
381            generate_ocpm_events: false, // Off by default
382            generate_journal_entries: true,
383            inject_anomalies: false,
384            inject_data_quality: false, // Off by default (to preserve clean test data)
385            validate_balances: true,
386            validate_coa_coverage_strict: false,
387            show_progress: true,
388            vendors_per_company: 50,
389            customers_per_company: 100,
390            materials_per_company: 200,
391            assets_per_company: 50,
392            employees_per_company: 100,
393            p2p_chains: 100,
394            o2c_chains: 100,
395            generate_audit: false, // Off by default
396            audit_engagements: 5,
397            workpapers_per_engagement: 20,
398            evidence_per_workpaper: 5,
399            risks_per_engagement: 15,
400            findings_per_engagement: 8,
401            judgments_per_engagement: 10,
402            generate_banking: false,                // Off by default
403            generate_graph_export: false,           // Off by default
404            generate_sourcing: false,               // Off by default
405            generate_bank_reconciliation: false,    // Off by default
406            generate_financial_statements: false,   // Off by default
407            generate_accounting_standards: false,   // Off by default
408            generate_manufacturing: false,          // Off by default
409            generate_sales_kpi_budgets: false,      // Off by default
410            generate_tax: false,                    // Off by default
411            generate_esg: false,                    // Off by default
412            generate_intercompany: false,           // Off by default
413            generate_evolution_events: true,        // On by default
414            generate_counterfactuals: false,        // Off by default (opt-in for ML workloads)
415            generate_compliance_regulations: false, // Off by default
416            generate_period_close: true,            // On by default
417            generate_hr: false,                     // Off by default
418            generate_treasury: false,               // Off by default
419            generate_project_accounting: false,     // Off by default
420            generate_legal_documents: false,        // v3.3.0 — off by default
421            generate_it_controls: false,            // v3.3.0 — off by default
422            generate_analytics_metadata: false,     // v3.3.0 — off by default
423        }
424    }
425}
426
427impl PhaseConfig {
428    /// Derive phase flags from [`GeneratorConfig`].
429    ///
430    /// This is the canonical way to create a [`PhaseConfig`] from a YAML config file.
431    /// CLI flags can override individual fields after calling this method.
432    pub fn from_config(cfg: &datasynth_config::GeneratorConfig) -> Self {
433        Self {
434            // Always-on phases
435            generate_master_data: true,
436            generate_document_flows: true,
437            generate_journal_entries: true,
438            validate_balances: true,
439            validate_coa_coverage_strict: false,
440            generate_period_close: true,
441            generate_evolution_events: true,
442            show_progress: true,
443
444            // Feature-gated phases — derived from config sections
445            generate_audit: cfg.audit.enabled,
446            generate_banking: cfg.banking.enabled,
447            generate_graph_export: cfg.graph_export.enabled,
448            generate_sourcing: cfg.source_to_pay.enabled,
449            generate_intercompany: cfg.intercompany.enabled,
450            generate_financial_statements: cfg.financial_reporting.enabled,
451            generate_bank_reconciliation: cfg.financial_reporting.enabled,
452            generate_accounting_standards: cfg.accounting_standards.enabled,
453            generate_manufacturing: cfg.manufacturing.enabled,
454            generate_sales_kpi_budgets: cfg.sales_quotes.enabled
455                || cfg.financial_reporting.management_kpis.enabled
456                || cfg.financial_reporting.budgets.enabled
457                || cfg.financial_reporting.external_expectations.enabled
458                || cfg.financial_reporting.evidence_anchors.enabled,
459            generate_tax: cfg.tax.enabled,
460            generate_esg: cfg.esg.enabled,
461            generate_ocpm_events: cfg.ocpm.enabled,
462            generate_compliance_regulations: cfg.compliance_regulations.enabled,
463            generate_hr: cfg.hr.enabled,
464            generate_treasury: cfg.treasury.enabled,
465            generate_project_accounting: cfg.project_accounting.enabled,
466
467            // v3.3.0: L1 generator wiring
468            // Legal documents emitted when compliance_regulations is enabled
469            // and the nested legal_documents.enabled flag is set.
470            generate_legal_documents: cfg.compliance_regulations.enabled
471                && cfg.compliance_regulations.legal_documents.enabled,
472            // IT general controls emitted when audit is enabled and the
473            // nested it_controls.enabled flag is set.
474            generate_it_controls: cfg.audit.enabled && cfg.audit.it_controls.enabled,
475            // Analytics metadata phase (prior-year, industry benchmarks,
476            // management reports, drift events).
477            generate_analytics_metadata: cfg.analytics_metadata.enabled,
478
479            // Opt-in for ML workloads — driven by scenarios.generate_counterfactuals config field
480            generate_counterfactuals: cfg.scenarios.generate_counterfactuals,
481
482            inject_anomalies: cfg.fraud.enabled || cfg.anomaly_injection.enabled,
483            inject_data_quality: cfg.data_quality.enabled,
484
485            // Count defaults (CLI can override after calling this method)
486            vendors_per_company: 50,
487            customers_per_company: 100,
488            materials_per_company: 200,
489            assets_per_company: 50,
490            employees_per_company: 100,
491            p2p_chains: 100,
492            o2c_chains: 100,
493            audit_engagements: 5,
494            workpapers_per_engagement: 20,
495            evidence_per_workpaper: 5,
496            risks_per_engagement: 15,
497            findings_per_engagement: 8,
498            judgments_per_engagement: 10,
499        }
500    }
501}
502
503/// Master data snapshot containing all generated entities.
504#[derive(Debug, Clone, Default)]
505pub struct MasterDataSnapshot {
506    /// Generated vendors.
507    pub vendors: Vec<Vendor>,
508    /// Generated customers.
509    pub customers: Vec<Customer>,
510    /// Generated materials.
511    pub materials: Vec<Material>,
512    /// Generated fixed assets.
513    pub assets: Vec<FixedAsset>,
514    /// Generated employees.
515    pub employees: Vec<Employee>,
516    /// Generated cost center hierarchy (two-level: departments + sub-departments).
517    pub cost_centers: Vec<datasynth_core::models::CostCenter>,
518    /// v5.1: Generated profit centre hierarchy (two-level: top-level
519    /// segment / region / product-group nodes + sub-units).  Emits to
520    /// SAP CEPC alongside `cost_centers` → CSKS.
521    pub profit_centers: Vec<datasynth_core::models::ProfitCenter>,
522    /// Employee lifecycle change history (hired, promoted, salary adjustments, transfers, terminated).
523    pub employee_change_history: Vec<datasynth_core::models::EmployeeChangeEvent>,
524    /// v3.3.0+: organizational profiles (one per company) with
525    /// industry / geography / structure / complexity metadata. Emitted
526    /// alongside master data when `generate_master_data = true`.
527    pub organizational_profiles: Vec<datasynth_core::models::OrganizationalProfile>,
528}
529
530/// Info about a completed hypergraph export.
531#[derive(Debug, Clone)]
532pub struct HypergraphExportInfo {
533    /// Number of nodes exported.
534    pub node_count: usize,
535    /// Number of pairwise edges exported.
536    pub edge_count: usize,
537    /// Number of hyperedges exported.
538    pub hyperedge_count: usize,
539    /// Output directory path.
540    pub output_path: PathBuf,
541}
542
543/// Document flow snapshot containing all generated document chains.
544#[derive(Debug, Clone, Default)]
545pub struct DocumentFlowSnapshot {
546    /// P2P document chains.
547    pub p2p_chains: Vec<P2PDocumentChain>,
548    /// O2C document chains.
549    pub o2c_chains: Vec<O2CDocumentChain>,
550    /// All purchase orders (flattened).
551    pub purchase_orders: Vec<documents::PurchaseOrder>,
552    /// All goods receipts (flattened).
553    pub goods_receipts: Vec<documents::GoodsReceipt>,
554    /// All vendor invoices (flattened).
555    pub vendor_invoices: Vec<documents::VendorInvoice>,
556    /// All sales orders (flattened).
557    pub sales_orders: Vec<documents::SalesOrder>,
558    /// All deliveries (flattened).
559    pub deliveries: Vec<documents::Delivery>,
560    /// All customer invoices (flattened).
561    pub customer_invoices: Vec<documents::CustomerInvoice>,
562    /// All payments (flattened).
563    pub payments: Vec<documents::Payment>,
564    /// Cross-document references collected from all document headers
565    /// (PO→GR, GR→Invoice, Invoice→Payment, SO→Delivery, etc.)
566    pub document_references: Vec<documents::DocumentReference>,
567}
568
569/// Subledger snapshot containing generated subledger records.
570#[derive(Debug, Clone, Default)]
571pub struct SubledgerSnapshot {
572    /// AP invoices linked from document flow vendor invoices.
573    pub ap_invoices: Vec<APInvoice>,
574    /// AR invoices linked from document flow customer invoices.
575    pub ar_invoices: Vec<ARInvoice>,
576    /// FA subledger records (asset acquisitions from FA generator).
577    pub fa_records: Vec<datasynth_core::models::subledger::fa::FixedAssetRecord>,
578    /// Inventory positions from inventory generator.
579    pub inventory_positions: Vec<datasynth_core::models::subledger::inventory::InventoryPosition>,
580    /// Inventory movements from inventory generator.
581    pub inventory_movements: Vec<datasynth_core::models::subledger::inventory::InventoryMovement>,
582    /// AR aging reports, one per company, computed after payment settlement.
583    pub ar_aging_reports: Vec<ARAgingReport>,
584    /// AP aging reports, one per company, computed after payment settlement.
585    pub ap_aging_reports: Vec<APAgingReport>,
586    /// Depreciation runs — one per fiscal period per company (from DepreciationRunGenerator).
587    pub depreciation_runs: Vec<datasynth_core::models::subledger::fa::DepreciationRun>,
588    /// Inventory valuation results — one per company (lower-of-cost-or-NRV, IAS 2 / ASC 330).
589    pub inventory_valuations: Vec<datasynth_generators::InventoryValuationResult>,
590    /// Dunning runs executed after AR aging (one per company per dunning cycle).
591    pub dunning_runs: Vec<datasynth_core::models::subledger::ar::DunningRun>,
592    /// Dunning letters generated across all dunning runs.
593    pub dunning_letters: Vec<datasynth_core::models::subledger::ar::DunningLetter>,
594}
595
596/// OCPM snapshot containing generated OCPM event log data.
597#[derive(Debug, Clone, Default)]
598pub struct OcpmSnapshot {
599    /// OCPM event log (if generated)
600    pub event_log: Option<OcpmEventLog>,
601    /// Number of events generated
602    pub event_count: usize,
603    /// Number of objects generated
604    pub object_count: usize,
605    /// Number of cases generated
606    pub case_count: usize,
607}
608
609/// Audit data snapshot containing all generated audit-related entities.
610#[derive(Debug, Clone, Default)]
611pub struct AuditSnapshot {
612    /// Audit engagements per ISA 210/220.
613    pub engagements: Vec<AuditEngagement>,
614    /// Workpapers per ISA 230.
615    pub workpapers: Vec<Workpaper>,
616    /// Audit evidence per ISA 500.
617    pub evidence: Vec<AuditEvidence>,
618    /// Risk assessments per ISA 315/330.
619    pub risk_assessments: Vec<RiskAssessment>,
620    /// Audit findings per ISA 265.
621    pub findings: Vec<AuditFinding>,
622    /// Professional judgments per ISA 200.
623    pub judgments: Vec<ProfessionalJudgment>,
624    /// External confirmations per ISA 505.
625    pub confirmations: Vec<ExternalConfirmation>,
626    /// Confirmation responses per ISA 505.
627    pub confirmation_responses: Vec<ConfirmationResponse>,
628    /// Audit procedure steps per ISA 330/530.
629    pub procedure_steps: Vec<AuditProcedureStep>,
630    /// Audit samples per ISA 530.
631    pub samples: Vec<AuditSample>,
632    /// Analytical procedure results per ISA 520.
633    pub analytical_results: Vec<AnalyticalProcedureResult>,
634    /// Internal audit functions per ISA 610.
635    pub ia_functions: Vec<InternalAuditFunction>,
636    /// Internal audit reports per ISA 610.
637    pub ia_reports: Vec<InternalAuditReport>,
638    /// Related parties per ISA 550.
639    pub related_parties: Vec<RelatedParty>,
640    /// Related party transactions per ISA 550.
641    pub related_party_transactions: Vec<RelatedPartyTransaction>,
642    // ---- ISA 600: Group Audits ----
643    /// Component auditors assigned by jurisdiction (ISA 600).
644    pub component_auditors: Vec<ComponentAuditor>,
645    /// Group audit plan with materiality allocations (ISA 600).
646    pub group_audit_plan: Option<GroupAuditPlan>,
647    /// Component instructions issued to component auditors (ISA 600).
648    pub component_instructions: Vec<ComponentInstruction>,
649    /// Reports received from component auditors (ISA 600).
650    pub component_reports: Vec<ComponentAuditorReport>,
651    // ---- ISA 210: Engagement Letters ----
652    /// Engagement letters per ISA 210.
653    pub engagement_letters: Vec<EngagementLetter>,
654    // ---- ISA 560 / IAS 10: Subsequent Events ----
655    /// Subsequent events per ISA 560 / IAS 10.
656    pub subsequent_events: Vec<SubsequentEvent>,
657    // ---- ISA 402: Service Organization Controls ----
658    /// Service organizations identified per ISA 402.
659    pub service_organizations: Vec<ServiceOrganization>,
660    /// SOC reports obtained per ISA 402.
661    pub soc_reports: Vec<SocReport>,
662    /// User entity controls documented per ISA 402.
663    pub user_entity_controls: Vec<UserEntityControl>,
664    // ---- ISA 570: Going Concern ----
665    /// Going concern assessments per ISA 570 / ASC 205-40 (one per entity per period).
666    pub going_concern_assessments:
667        Vec<datasynth_core::models::audit::going_concern::GoingConcernAssessment>,
668    // ---- ISA 540: Accounting Estimates ----
669    /// Accounting estimates reviewed per ISA 540 (5–8 per entity).
670    pub accounting_estimates:
671        Vec<datasynth_core::models::audit::accounting_estimates::AccountingEstimate>,
672    // ---- ISA 700/701/705/706: Audit Opinions ----
673    /// Formed audit opinions per ISA 700 / 705 / 706 (one per engagement).
674    pub audit_opinions: Vec<datasynth_standards::audit::opinion::AuditOpinion>,
675    /// Key Audit Matters per ISA 701 (flattened across all opinions).
676    pub key_audit_matters: Vec<datasynth_standards::audit::opinion::KeyAuditMatter>,
677    // ---- SOX 302 / 404 ----
678    /// SOX Section 302 CEO/CFO certifications (one pair per US-listed entity per year).
679    pub sox_302_certifications: Vec<datasynth_standards::regulatory::sox::Sox302Certification>,
680    /// SOX Section 404 ICFR assessments (one per entity per year).
681    pub sox_404_assessments: Vec<datasynth_standards::regulatory::sox::Sox404Assessment>,
682    // ---- ISA 320: Materiality ----
683    /// Materiality calculations per entity per period (ISA 320).
684    pub materiality_calculations:
685        Vec<datasynth_core::models::audit::materiality_calculation::MaterialityCalculation>,
686    // ---- ISA 315: Combined Risk Assessments ----
687    /// Combined Risk Assessments per account area / assertion (ISA 315).
688    pub combined_risk_assessments:
689        Vec<datasynth_core::models::audit::risk_assessment_cra::CombinedRiskAssessment>,
690    // ---- ISA 530: Sampling Plans ----
691    /// Sampling plans per CRA at Moderate or higher (ISA 530).
692    pub sampling_plans: Vec<datasynth_core::models::audit::sampling_plan::SamplingPlan>,
693    /// Individual sampled items (key items + representative items) per ISA 530.
694    pub sampled_items: Vec<datasynth_core::models::audit::sampling_plan::SampledItem>,
695    // ---- ISA 315: Significant Classes of Transactions (SCOTS) ----
696    /// Significant classes of transactions per ISA 315 (one set per entity).
697    pub significant_transaction_classes:
698        Vec<datasynth_core::models::audit::scots::SignificantClassOfTransactions>,
699    // ---- ISA 520: Unusual Item Markers ----
700    /// Unusual item flags raised across all journal entries (5–10% flagging rate).
701    pub unusual_items: Vec<datasynth_core::models::audit::unusual_items::UnusualItemFlag>,
702    // ---- ISA 520: Analytical Relationships ----
703    /// Analytical relationships (ratios, trends, correlations) per entity.
704    pub analytical_relationships:
705        Vec<datasynth_core::models::audit::analytical_relationships::AnalyticalRelationship>,
706    // ---- PCAOB-ISA Cross-Reference ----
707    /// PCAOB-to-ISA standard mappings (key differences, similarities, application notes).
708    pub isa_pcaob_mappings: Vec<datasynth_standards::audit::pcaob::PcaobIsaMapping>,
709    // ---- ISA Standard Reference ----
710    /// Flat ISA standard reference entries (number, title, series) for `audit/isa_mappings.json`.
711    pub isa_mappings: Vec<datasynth_standards::audit::isa_reference::IsaStandardEntry>,
712    // ---- ISA 220 / ISA 300: Audit Scopes ----
713    /// Audit scope records (one per engagement) describing the audit boundary.
714    pub audit_scopes: Vec<datasynth_core::models::audit::AuditScope>,
715    // ---- FSM Event Trail ----
716    /// Optional FSM event trail produced when `audit.fsm.enabled: true`.
717    /// Contains the ordered sequence of state-transition and procedure-step events
718    /// generated by the audit FSM engine.
719    pub fsm_event_trail: Option<Vec<datasynth_audit_fsm::event::AuditEvent>>,
720    // ---- v3.3.0: L1 generator wiring ----
721    /// Legal documents (engagement letters, management reps, legal
722    /// opinions, regulatory filings, board resolutions) per entity.
723    /// Emitted by `LegalDocumentGenerator` when
724    /// `compliance_regulations.legal_documents.enabled = true`.
725    pub legal_documents: Vec<datasynth_core::models::LegalDocument>,
726    /// IT general controls — access logs (login/privileged action
727    /// audit trail). Emitted by `ItControlsGenerator` when
728    /// `audit.it_controls.enabled = true`.
729    pub it_controls_access_logs: Vec<datasynth_core::models::AccessLog>,
730    /// IT general controls — change management records (code deploys,
731    /// config changes, patches). Emitted by `ItControlsGenerator`.
732    pub it_controls_change_records: Vec<datasynth_core::models::ChangeManagementRecord>,
733}
734
735/// Banking KYC/AML data snapshot containing all generated banking entities.
736#[derive(Debug, Clone, Default)]
737pub struct BankingSnapshot {
738    /// Banking customers (retail, business, trust).
739    pub customers: Vec<BankingCustomer>,
740    /// Bank accounts.
741    pub accounts: Vec<BankAccount>,
742    /// Bank transactions with AML labels.
743    pub transactions: Vec<BankTransaction>,
744    /// Transaction-level AML labels with features.
745    pub transaction_labels: Vec<datasynth_banking::labels::TransactionLabel>,
746    /// Customer-level AML labels.
747    pub customer_labels: Vec<datasynth_banking::labels::CustomerLabel>,
748    /// Account-level AML labels.
749    pub account_labels: Vec<datasynth_banking::labels::AccountLabel>,
750    /// Relationship-level AML labels.
751    pub relationship_labels: Vec<datasynth_banking::labels::RelationshipLabel>,
752    /// Case narratives for AML scenarios.
753    pub narratives: Vec<datasynth_banking::labels::ExportedNarrative>,
754    /// Number of suspicious transactions.
755    pub suspicious_count: usize,
756    /// Number of AML scenarios generated.
757    pub scenario_count: usize,
758}
759
760/// Graph export snapshot containing exported graph metadata.
761#[derive(Debug, Clone, Default, Serialize)]
762pub struct GraphExportSnapshot {
763    /// Whether graph export was performed.
764    pub exported: bool,
765    /// Number of graphs exported.
766    pub graph_count: usize,
767    /// Exported graph metadata (by format name).
768    pub exports: HashMap<String, GraphExportInfo>,
769}
770
771/// Information about an exported graph.
772#[derive(Debug, Clone, Serialize)]
773pub struct GraphExportInfo {
774    /// Graph name.
775    pub name: String,
776    /// Export format (pytorch_geometric, neo4j, dgl).
777    pub format: String,
778    /// Output directory path.
779    pub output_path: PathBuf,
780    /// Number of nodes.
781    pub node_count: usize,
782    /// Number of edges.
783    pub edge_count: usize,
784}
785
786/// S2C sourcing data snapshot.
787#[derive(Debug, Clone, Default)]
788pub struct SourcingSnapshot {
789    /// Spend analyses.
790    pub spend_analyses: Vec<SpendAnalysis>,
791    /// Sourcing projects.
792    pub sourcing_projects: Vec<SourcingProject>,
793    /// Supplier qualifications.
794    pub qualifications: Vec<SupplierQualification>,
795    /// RFx events (RFI, RFP, RFQ).
796    pub rfx_events: Vec<RfxEvent>,
797    /// Supplier bids.
798    pub bids: Vec<SupplierBid>,
799    /// Bid evaluations.
800    pub bid_evaluations: Vec<BidEvaluation>,
801    /// Procurement contracts.
802    pub contracts: Vec<ProcurementContract>,
803    /// Catalog items.
804    pub catalog_items: Vec<CatalogItem>,
805    /// Supplier scorecards.
806    pub scorecards: Vec<SupplierScorecard>,
807}
808
809/// A single period's trial balance with metadata.
810///
811/// Used as the orchestrator's in-memory representation while it
812/// builds per-period FS / CF artefacts.  At write time the runtime
813/// converts each `PeriodTrialBalance` to the canonical
814/// [`datasynth_core::models::balance::TrialBalance`] shape via
815/// [`PeriodTrialBalance::into_canonical`] so the on-disk
816/// `period_close/trial_balances.json` matches what the group
817/// aggregate phase loads — see
818/// `crate::output_writer::write_outputs`.
819#[derive(Debug, Clone, Serialize, Deserialize)]
820pub struct PeriodTrialBalance {
821    /// Fiscal year.
822    pub fiscal_year: u16,
823    /// Fiscal period (1-12).
824    pub fiscal_period: u8,
825    /// Period start date.
826    pub period_start: NaiveDate,
827    /// Period end date.
828    pub period_end: NaiveDate,
829    /// Trial balance entries for this period.
830    pub entries: Vec<datasynth_generators::TrialBalanceEntry>,
831    /// Framework string for classifier dispatch in
832    /// [`PeriodTrialBalance::into_canonical`] (`"us_gaap"` / `"ifrs"` /
833    /// `"french_gaap"` / `"german_gaap"` / `"dual_reporting"`). Set by
834    /// the orchestrator at TB-emit time; defaults to `"us_gaap"` when
835    /// constructed by ad-hoc callers (e.g. test fixtures).
836    #[serde(default = "default_framework")]
837    pub framework: String,
838}
839
840fn default_framework() -> String {
841    "us_gaap".to_string()
842}
843
844impl PeriodTrialBalance {
845    /// Convert this in-memory period TB into the canonical
846    /// [`datasynth_core::models::balance::TrialBalance`] shape used
847    /// for the on-disk artefact.
848    ///
849    /// v5.1: the on-disk shape is now canonical end-to-end.  Group
850    /// aggregate's `tb_loader` consumes the canonical type directly,
851    /// dropping the v5.0 dual-shape detection that converted from
852    /// `PeriodTrialBalance` JSON on the fly.
853    ///
854    /// v5.33: framework-aware classification — `category` and
855    /// `account_type` are now resolved via
856    /// [`datasynth_core::framework_accounts::FrameworkAccounts`] for the
857    /// framework recorded on `self.framework`, fixing the v5.32-and-prior
858    /// regression where every line was stamped `AccountType::Asset`
859    /// regardless of code (Defect C in the 3-year medium-chain
860    /// FINDINGS doc).
861    ///
862    /// The `is_balanced` / `is_equation_valid` flags are now set to
863    /// `true` with `out_of_balance` / `equation_difference` clamped to
864    /// zero. The interim-TB shape this writer produces is "cumulative
865    /// BS positions + period-only P&L", which is the standard adjusted
866    /// TB layout but has no `Σ debits == Σ credits` invariant — that
867    /// comparison is meaningful only for a gross-flow TB built from
868    /// fully-balanced JEs over a single time window. The integrity that
869    /// IS guaranteed is the underlying per-JE balance invariant
870    /// enforced by [`datasynth_core::models::journal_entry::JournalEntry::new`].
871    /// Downstream consumers that need a real signed-equation check
872    /// (`Σ A = Σ L + Σ E + NI`) should derive it from opening balances
873    /// plus the period-only P&L lines, not from the raw debit/credit
874    /// totals stamped here.
875    pub fn into_canonical(self, company_code: &str, currency: &str) -> TrialBalance {
876        let framework = &self.framework;
877        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
878        let mut total_debits = Decimal::ZERO;
879        let mut total_credits = Decimal::ZERO;
880        let lines: Vec<TrialBalanceLine> = self
881            .entries
882            .into_iter()
883            .map(|e| {
884                total_debits += e.debit_balance;
885                total_credits += e.credit_balance;
886                let category =
887                    AccountCategory::from_account_code_with_framework(&e.account_code, framework);
888                let account_type = fa.classify_account_type(&e.account_code);
889                TrialBalanceLine {
890                    account_code: e.account_code,
891                    account_description: e.account_name,
892                    category,
893                    account_type,
894                    opening_balance: Decimal::ZERO,
895                    period_debits: e.debit_balance,
896                    period_credits: e.credit_balance,
897                    closing_balance: e.debit_balance - e.credit_balance,
898                    debit_balance: e.debit_balance,
899                    credit_balance: e.credit_balance,
900                    cost_center: None,
901                    profit_center: None,
902                }
903            })
904            .collect();
905        TrialBalance {
906            trial_balance_id: format!(
907                "{company_code}-{:04}{:02}",
908                self.fiscal_year, self.fiscal_period
909            ),
910            company_code: company_code.to_string(),
911            company_name: None,
912            as_of_date: self.period_end,
913            fiscal_year: self.fiscal_year as i32,
914            fiscal_period: self.fiscal_period as u32,
915            currency: currency.to_string(),
916            balance_type: TrialBalanceType::Adjusted,
917            lines,
918            total_debits,
919            total_credits,
920            is_balanced: true,
921            out_of_balance: Decimal::ZERO,
922            is_equation_valid: true,
923            equation_difference: Decimal::ZERO,
924            category_summary: std::collections::HashMap::new(),
925            created_at: self
926                .period_start
927                .and_hms_opt(0, 0, 0)
928                .expect("midnight is a valid time"),
929            created_by: "ORCHESTRATOR".to_string(),
930            approved_by: None,
931            approved_at: None,
932            status: TrialBalanceStatus::Final,
933        }
934    }
935}
936
937/// Financial reporting snapshot (financial statements + bank reconciliations).
938#[derive(Debug, Clone, Default)]
939pub struct FinancialReportingSnapshot {
940    /// Financial statements (balance sheet, income statement, cash flow).
941    /// For multi-entity configs this includes all standalone statements.
942    pub financial_statements: Vec<FinancialStatement>,
943    /// Standalone financial statements keyed by entity code.
944    /// Each entity has its own slice of statements.
945    pub standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>>,
946    /// Consolidated financial statements for the group (one per period, is_consolidated=true).
947    pub consolidated_statements: Vec<FinancialStatement>,
948    /// Consolidation schedules (one per period) showing pre/post elimination detail.
949    pub consolidation_schedules: Vec<ConsolidationSchedule>,
950    /// Bank reconciliations.
951    pub bank_reconciliations: Vec<BankReconciliation>,
952    /// Period-close trial balances (one per period).
953    pub trial_balances: Vec<PeriodTrialBalance>,
954    /// IFRS 8 / ASC 280 operating segment reports (one per segment per period).
955    pub segment_reports: Vec<datasynth_core::models::OperatingSegment>,
956    /// IFRS 8 / ASC 280 segment reconciliations (one per period tying segments to consolidated FS).
957    pub segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation>,
958    /// Notes to the financial statements (IAS 1 / ASC 235) — one set per entity.
959    pub notes_to_financial_statements: Vec<datasynth_core::models::FinancialStatementNote>,
960}
961
962/// HR data snapshot (payroll runs, time entries, expense reports, benefit enrollments, pensions).
963#[derive(Debug, Clone, Default)]
964pub struct HrSnapshot {
965    /// Payroll runs (actual data).
966    pub payroll_runs: Vec<PayrollRun>,
967    /// Payroll line items (actual data).
968    pub payroll_line_items: Vec<PayrollLineItem>,
969    /// Time entries (actual data).
970    pub time_entries: Vec<TimeEntry>,
971    /// Expense reports (actual data).
972    pub expense_reports: Vec<ExpenseReport>,
973    /// Benefit enrollments (actual data).
974    pub benefit_enrollments: Vec<BenefitEnrollment>,
975    /// Defined benefit pension plans (IAS 19 / ASC 715).
976    pub pension_plans: Vec<datasynth_core::models::pension::DefinedBenefitPlan>,
977    /// Pension obligation (DBO) roll-forwards.
978    pub pension_obligations: Vec<datasynth_core::models::pension::PensionObligation>,
979    /// Plan asset roll-forwards.
980    pub pension_plan_assets: Vec<datasynth_core::models::pension::PlanAssets>,
981    /// Pension disclosures.
982    pub pension_disclosures: Vec<datasynth_core::models::pension::PensionDisclosure>,
983    /// Journal entries generated from pension expense and OCI remeasurements.
984    pub pension_journal_entries: Vec<JournalEntry>,
985    /// Stock grants (ASC 718 / IFRS 2).
986    pub stock_grants: Vec<datasynth_core::models::stock_compensation::StockGrant>,
987    /// Stock-based compensation period expense records.
988    pub stock_comp_expenses: Vec<datasynth_core::models::stock_compensation::StockCompExpense>,
989    /// Journal entries generated from stock-based compensation expense.
990    pub stock_comp_journal_entries: Vec<JournalEntry>,
991    /// Payroll runs.
992    pub payroll_run_count: usize,
993    /// Payroll line item count.
994    pub payroll_line_item_count: usize,
995    /// Time entry count.
996    pub time_entry_count: usize,
997    /// Expense report count.
998    pub expense_report_count: usize,
999    /// Benefit enrollment count.
1000    pub benefit_enrollment_count: usize,
1001    /// Pension plan count.
1002    pub pension_plan_count: usize,
1003    /// Stock grant count.
1004    pub stock_grant_count: usize,
1005}
1006
1007/// Accounting standards data snapshot (revenue recognition, impairment, business combinations).
1008#[derive(Debug, Clone, Default)]
1009pub struct AccountingStandardsSnapshot {
1010    /// Revenue recognition contracts (actual data).
1011    pub contracts: Vec<datasynth_standards::accounting::revenue::CustomerContract>,
1012    /// Impairment tests (actual data).
1013    pub impairment_tests: Vec<datasynth_standards::accounting::impairment::ImpairmentTest>,
1014    /// Business combinations (IFRS 3 / ASC 805).
1015    pub business_combinations:
1016        Vec<datasynth_core::models::business_combination::BusinessCombination>,
1017    /// Journal entries generated from business combinations (Day 1 + amortization).
1018    pub business_combination_journal_entries: Vec<JournalEntry>,
1019    /// ECL models (IFRS 9 / ASC 326).
1020    pub ecl_models: Vec<datasynth_core::models::expected_credit_loss::EclModel>,
1021    /// ECL provision movements.
1022    pub ecl_provision_movements:
1023        Vec<datasynth_core::models::expected_credit_loss::EclProvisionMovement>,
1024    /// Journal entries from ECL provision.
1025    pub ecl_journal_entries: Vec<JournalEntry>,
1026    /// Provisions (IAS 37 / ASC 450).
1027    pub provisions: Vec<datasynth_core::models::provision::Provision>,
1028    /// Provision movement roll-forwards (IAS 37 / ASC 450).
1029    pub provision_movements: Vec<datasynth_core::models::provision::ProvisionMovement>,
1030    /// Contingent liabilities (IAS 37 / ASC 450).
1031    pub contingent_liabilities: Vec<datasynth_core::models::provision::ContingentLiability>,
1032    /// Journal entries from provisions.
1033    pub provision_journal_entries: Vec<JournalEntry>,
1034    /// IAS 21 functional currency translation results (one per entity per period).
1035    pub currency_translation_results:
1036        Vec<datasynth_core::models::currency_translation_result::CurrencyTranslationResult>,
1037    /// Revenue recognition contract count.
1038    pub revenue_contract_count: usize,
1039    /// Impairment test count.
1040    pub impairment_test_count: usize,
1041    /// Business combination count.
1042    pub business_combination_count: usize,
1043    /// ECL model count.
1044    pub ecl_model_count: usize,
1045    /// Provision count.
1046    pub provision_count: usize,
1047    /// Currency translation result count (IAS 21).
1048    pub currency_translation_count: usize,
1049    // ---- v3.3.1: Lease / FairValue / FrameworkReconciliation ----
1050    /// Lease contracts (IFRS 16 / ASC 842). Each entry carries its own
1051    /// ROU asset + lease liability details.
1052    pub leases: Vec<datasynth_standards::accounting::leases::Lease>,
1053    /// Fair value measurements (IFRS 13 / ASC 820) across Level 1/2/3.
1054    pub fair_value_measurements:
1055        Vec<datasynth_standards::accounting::fair_value::FairValueMeasurement>,
1056    /// Framework difference records (dual-reporting only).
1057    pub framework_differences:
1058        Vec<datasynth_standards::accounting::differences::FrameworkDifferenceRecord>,
1059    /// Per-entity framework reconciliation (dual-reporting only).
1060    pub framework_reconciliations:
1061        Vec<datasynth_standards::accounting::differences::FrameworkReconciliation>,
1062    /// Counts for stats logging.
1063    pub lease_count: usize,
1064    pub fair_value_measurement_count: usize,
1065    pub framework_difference_count: usize,
1066}
1067
1068/// Compliance regulations framework snapshot (standards, procedures, findings, filings, graph).
1069#[derive(Debug, Clone, Default)]
1070pub struct ComplianceRegulationsSnapshot {
1071    /// Flattened standard records for output.
1072    pub standard_records: Vec<datasynth_generators::compliance::ComplianceStandardRecord>,
1073    /// Cross-reference records.
1074    pub cross_reference_records: Vec<datasynth_generators::compliance::CrossReferenceRecord>,
1075    /// Jurisdiction profile records.
1076    pub jurisdiction_records: Vec<datasynth_generators::compliance::JurisdictionRecord>,
1077    /// Generated audit procedures.
1078    pub audit_procedures: Vec<datasynth_generators::compliance::AuditProcedureRecord>,
1079    /// Generated compliance findings.
1080    pub findings: Vec<datasynth_core::models::compliance::ComplianceFinding>,
1081    /// Generated regulatory filings.
1082    pub filings: Vec<datasynth_core::models::compliance::RegulatoryFiling>,
1083    /// Compliance graph (if graph integration enabled).
1084    pub compliance_graph: Option<datasynth_graph::Graph>,
1085}
1086
1087/// Manufacturing data snapshot (production orders, quality inspections, cycle counts, BOMs, inventory movements).
1088#[derive(Debug, Clone, Default)]
1089pub struct ManufacturingSnapshot {
1090    /// Production orders (actual data).
1091    pub production_orders: Vec<ProductionOrder>,
1092    /// Quality inspections (actual data).
1093    pub quality_inspections: Vec<QualityInspection>,
1094    /// Cycle counts (actual data).
1095    pub cycle_counts: Vec<CycleCount>,
1096    /// BOM components (actual data).
1097    pub bom_components: Vec<BomComponent>,
1098    /// Inventory movements (actual data).
1099    pub inventory_movements: Vec<InventoryMovement>,
1100    /// Production order count.
1101    pub production_order_count: usize,
1102    /// Quality inspection count.
1103    pub quality_inspection_count: usize,
1104    /// Cycle count count.
1105    pub cycle_count_count: usize,
1106    /// BOM component count.
1107    pub bom_component_count: usize,
1108    /// Inventory movement count.
1109    pub inventory_movement_count: usize,
1110}
1111
1112/// Sales, KPI, and budget data snapshot.
1113#[derive(Debug, Clone, Default)]
1114pub struct SalesKpiBudgetsSnapshot {
1115    /// Sales quotes (actual data).
1116    pub sales_quotes: Vec<SalesQuote>,
1117    /// Management KPIs (actual data).
1118    pub kpis: Vec<ManagementKpi>,
1119    /// Budgets (actual data).
1120    pub budgets: Vec<Budget>,
1121    /// External expectations (ISA-520 substantive-analytics layer).
1122    pub external_expectations: Vec<ExternalExpectation>,
1123    /// Evidence anchors (ISA-505 external-corroboration layer).
1124    pub evidence_anchors: Vec<EvidenceAnchor>,
1125    /// Sales quote count.
1126    pub sales_quote_count: usize,
1127    /// Management KPI count.
1128    pub kpi_count: usize,
1129    /// Budget line count.
1130    pub budget_line_count: usize,
1131}
1132
1133/// Anomaly labels generated during injection.
1134#[derive(Debug, Clone, Default)]
1135pub struct AnomalyLabels {
1136    /// All anomaly labels.
1137    pub labels: Vec<LabeledAnomaly>,
1138    /// Summary statistics.
1139    pub summary: Option<AnomalySummary>,
1140    /// Count by anomaly type.
1141    pub by_type: HashMap<String, usize>,
1142    /// Synthetic prior-year carry-forward register (confirmed campaign counterparties) — the
1143    /// confirmation channel the memory arm consumes (§40/§59). Empty unless `fraud.campaigns
1144    /// .carry_forward` is enabled. Written to `labels/carry_forward.json`.
1145    pub carry_forward: Vec<datasynth_generators::anomaly::campaign::CarryForwardRecord>,
1146}
1147
1148/// Balance validation results from running balance tracker.
1149#[derive(Debug, Clone, Default)]
1150pub struct BalanceValidationResult {
1151    /// Whether validation was performed.
1152    pub validated: bool,
1153    /// Whether balance sheet equation is satisfied.
1154    pub is_balanced: bool,
1155    /// Number of entries processed.
1156    pub entries_processed: u64,
1157    /// Total debits across all entries.
1158    pub total_debits: rust_decimal::Decimal,
1159    /// Total credits across all entries.
1160    pub total_credits: rust_decimal::Decimal,
1161    /// Number of accounts tracked.
1162    pub accounts_tracked: usize,
1163    /// Number of companies tracked.
1164    pub companies_tracked: usize,
1165    /// Validation errors encountered.
1166    pub validation_errors: Vec<ValidationError>,
1167    /// Whether any unbalanced entries were found.
1168    pub has_unbalanced_entries: bool,
1169}
1170
1171/// Tax data snapshot (jurisdictions, codes, provisions, returns, withholding).
1172#[derive(Debug, Clone, Default)]
1173pub struct TaxSnapshot {
1174    /// Tax jurisdictions.
1175    pub jurisdictions: Vec<TaxJurisdiction>,
1176    /// Tax codes.
1177    pub codes: Vec<TaxCode>,
1178    /// Tax lines computed on documents.
1179    pub tax_lines: Vec<TaxLine>,
1180    /// Tax returns filed per period.
1181    pub tax_returns: Vec<TaxReturn>,
1182    /// Tax provisions.
1183    pub tax_provisions: Vec<TaxProvision>,
1184    /// Withholding tax records.
1185    pub withholding_records: Vec<WithholdingTaxRecord>,
1186    /// Tax anomaly labels.
1187    pub tax_anomaly_labels: Vec<datasynth_generators::TaxAnomalyLabel>,
1188    /// Jurisdiction count.
1189    pub jurisdiction_count: usize,
1190    /// Code count.
1191    pub code_count: usize,
1192    /// Deferred tax engine output (temporary differences, ETR reconciliation, rollforwards, JEs).
1193    pub deferred_tax: datasynth_generators::DeferredTaxSnapshot,
1194    /// Journal entries posting tax payable/receivable from computed tax lines.
1195    pub tax_posting_journal_entries: Vec<JournalEntry>,
1196}
1197
1198/// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1199#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1200pub struct IntercompanySnapshot {
1201    /// Group ownership structure (parent/subsidiary/associate relationships).
1202    pub group_structure: Option<datasynth_core::models::intercompany::GroupStructure>,
1203    /// IC matched pairs (transaction pairs between related entities).
1204    pub matched_pairs: Vec<datasynth_core::models::intercompany::ICMatchedPair>,
1205    /// IC journal entries generated from matched pairs (seller side).
1206    pub seller_journal_entries: Vec<JournalEntry>,
1207    /// IC journal entries generated from matched pairs (buyer side).
1208    pub buyer_journal_entries: Vec<JournalEntry>,
1209    /// Elimination entries for consolidation.
1210    pub elimination_entries: Vec<datasynth_core::models::intercompany::EliminationEntry>,
1211    /// NCI measurements derived from group structure ownership percentages.
1212    pub nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement>,
1213    /// IC source document chains (seller invoices, buyer POs/GRs/VIs).
1214    #[serde(skip)]
1215    pub ic_document_chains: Option<datasynth_generators::ICDocumentChains>,
1216    /// IC matched pair count.
1217    pub matched_pair_count: usize,
1218    /// IC elimination entry count.
1219    pub elimination_entry_count: usize,
1220    /// IC matching rate (0.0 to 1.0).
1221    pub match_rate: f64,
1222}
1223
1224/// ESG data snapshot (emissions, energy, water, waste, social, governance, supply chain, disclosures).
1225#[derive(Debug, Clone, Default)]
1226pub struct EsgSnapshot {
1227    /// Emission records (scope 1, 2, 3).
1228    pub emissions: Vec<EmissionRecord>,
1229    /// Energy consumption records.
1230    pub energy: Vec<EnergyConsumption>,
1231    /// Water usage records.
1232    pub water: Vec<WaterUsage>,
1233    /// Waste records.
1234    pub waste: Vec<WasteRecord>,
1235    /// Workforce diversity metrics.
1236    pub diversity: Vec<WorkforceDiversityMetric>,
1237    /// Pay equity metrics.
1238    pub pay_equity: Vec<PayEquityMetric>,
1239    /// Safety incidents.
1240    pub safety_incidents: Vec<SafetyIncident>,
1241    /// Safety metrics.
1242    pub safety_metrics: Vec<SafetyMetric>,
1243    /// Governance metrics.
1244    pub governance: Vec<GovernanceMetric>,
1245    /// Supplier ESG assessments.
1246    pub supplier_assessments: Vec<SupplierEsgAssessment>,
1247    /// Materiality assessments.
1248    pub materiality: Vec<MaterialityAssessment>,
1249    /// ESG disclosures.
1250    pub disclosures: Vec<EsgDisclosure>,
1251    /// Climate scenarios.
1252    pub climate_scenarios: Vec<ClimateScenario>,
1253    /// ESG anomaly labels.
1254    pub anomaly_labels: Vec<EsgAnomalyLabel>,
1255    /// Total emission record count.
1256    pub emission_count: usize,
1257    /// Total disclosure count.
1258    pub disclosure_count: usize,
1259}
1260
1261/// Treasury data snapshot (cash management, hedging, debt, pooling).
1262#[derive(Debug, Clone, Default)]
1263pub struct TreasurySnapshot {
1264    /// Cash positions (daily balances per account).
1265    pub cash_positions: Vec<CashPosition>,
1266    /// Cash forecasts.
1267    pub cash_forecasts: Vec<CashForecast>,
1268    /// Cash pools.
1269    pub cash_pools: Vec<CashPool>,
1270    /// Cash pool sweep transactions.
1271    pub cash_pool_sweeps: Vec<CashPoolSweep>,
1272    /// Hedging instruments.
1273    pub hedging_instruments: Vec<HedgingInstrument>,
1274    /// Hedge relationships (ASC 815/IFRS 9 designations).
1275    pub hedge_relationships: Vec<HedgeRelationship>,
1276    /// Debt instruments.
1277    pub debt_instruments: Vec<DebtInstrument>,
1278    /// Bank guarantees and letters of credit.
1279    pub bank_guarantees: Vec<BankGuarantee>,
1280    /// Intercompany netting runs.
1281    pub netting_runs: Vec<NettingRun>,
1282    /// Treasury anomaly labels.
1283    pub treasury_anomaly_labels: Vec<datasynth_generators::treasury::TreasuryAnomalyLabel>,
1284    /// Journal entries generated from treasury instruments (debt interest accruals,
1285    /// hedge MTM, cash pool sweeps).
1286    pub journal_entries: Vec<JournalEntry>,
1287}
1288
1289/// Project accounting data snapshot (projects, costs, revenue, milestones, EVM).
1290#[derive(Debug, Clone, Default)]
1291pub struct ProjectAccountingSnapshot {
1292    /// Projects with WBS hierarchies.
1293    pub projects: Vec<Project>,
1294    /// Project cost lines (linked from source documents).
1295    pub cost_lines: Vec<ProjectCostLine>,
1296    /// Revenue recognition records.
1297    pub revenue_records: Vec<ProjectRevenue>,
1298    /// Earned value metrics.
1299    pub earned_value_metrics: Vec<EarnedValueMetric>,
1300    /// Change orders.
1301    pub change_orders: Vec<ChangeOrder>,
1302    /// Project milestones.
1303    pub milestones: Vec<ProjectMilestone>,
1304}
1305
1306/// Complete result of enhanced generation run.
1307#[derive(Debug, Default)]
1308pub struct EnhancedGenerationResult {
1309    /// Generated chart of accounts.
1310    pub chart_of_accounts: ChartOfAccounts,
1311    /// Master data snapshot.
1312    pub master_data: MasterDataSnapshot,
1313    /// Document flow snapshot.
1314    pub document_flows: DocumentFlowSnapshot,
1315    /// Subledger snapshot (linked from document flows).
1316    pub subledger: SubledgerSnapshot,
1317    /// OCPM event log snapshot (if OCPM generation enabled).
1318    pub ocpm: OcpmSnapshot,
1319    /// Audit data snapshot (if audit generation enabled).
1320    pub audit: AuditSnapshot,
1321    /// Banking KYC/AML data snapshot (if banking generation enabled).
1322    pub banking: BankingSnapshot,
1323    /// Graph export snapshot (if graph export enabled).
1324    pub graph_export: GraphExportSnapshot,
1325    /// S2C sourcing data snapshot (if sourcing generation enabled).
1326    pub sourcing: SourcingSnapshot,
1327    /// Financial reporting snapshot (financial statements + bank reconciliations).
1328    pub financial_reporting: FinancialReportingSnapshot,
1329    /// HR data snapshot (payroll, time entries, expenses).
1330    pub hr: HrSnapshot,
1331    /// Accounting standards snapshot (revenue recognition, impairment).
1332    pub accounting_standards: AccountingStandardsSnapshot,
1333    /// Manufacturing snapshot (production orders, quality inspections, cycle counts).
1334    pub manufacturing: ManufacturingSnapshot,
1335    /// Sales, KPI, and budget snapshot.
1336    pub sales_kpi_budgets: SalesKpiBudgetsSnapshot,
1337    /// Tax data snapshot (jurisdictions, codes, provisions, returns).
1338    pub tax: TaxSnapshot,
1339    /// ESG data snapshot (emissions, energy, social, governance, disclosures).
1340    pub esg: EsgSnapshot,
1341    /// Treasury data snapshot (cash management, hedging, debt).
1342    pub treasury: TreasurySnapshot,
1343    /// Project accounting data snapshot (projects, costs, revenue, EVM, milestones).
1344    pub project_accounting: ProjectAccountingSnapshot,
1345    /// Process evolution events (workflow changes, automation, policy changes, control enhancements).
1346    pub process_evolution: Vec<ProcessEvolutionEvent>,
1347    /// Organizational events (acquisitions, divestitures, reorganizations, leadership changes).
1348    pub organizational_events: Vec<OrganizationalEvent>,
1349    /// Disruption events (outages, migrations, process changes, recoveries, regulatory).
1350    pub disruption_events: Vec<datasynth_generators::disruption::DisruptionEvent>,
1351    /// Intercompany data snapshot (IC transactions, matched pairs, eliminations).
1352    pub intercompany: IntercompanySnapshot,
1353    /// Generated journal entries.
1354    pub journal_entries: Vec<JournalEntry>,
1355    /// Anomaly labels (if injection enabled).
1356    pub anomaly_labels: AnomalyLabels,
1357    /// Balance validation results (if validation enabled).
1358    pub balance_validation: BalanceValidationResult,
1359    /// Data quality statistics (if injection enabled).
1360    pub data_quality_stats: DataQualityStats,
1361    /// Data quality issue records (if injection enabled).
1362    pub quality_issues: Vec<datasynth_generators::QualityIssue>,
1363    /// Generation statistics.
1364    pub statistics: EnhancedGenerationStatistics,
1365    /// Data lineage graph (if tracking enabled).
1366    pub lineage: Option<super::lineage::LineageGraph>,
1367    /// Quality gate evaluation result.
1368    pub gate_result: Option<datasynth_eval::gates::GateResult>,
1369    /// Internal controls (if controls generation enabled).
1370    pub internal_controls: Vec<InternalControl>,
1371    /// SoD (Segregation of Duties) violations identified during control application.
1372    ///
1373    /// Each record corresponds to a journal entry where `sod_violation == true`.
1374    pub sod_violations: Vec<datasynth_core::models::SodViolation>,
1375    /// Opening balances (if opening balance generation enabled).
1376    pub opening_balances: Vec<GeneratedOpeningBalance>,
1377    /// GL-to-subledger reconciliation results (if reconciliation enabled).
1378    pub subledger_reconciliation: Vec<datasynth_generators::ReconciliationResult>,
1379    /// Counterfactual (original, mutated) JE pairs for ML training.
1380    pub counterfactual_pairs: Vec<datasynth_generators::counterfactual::CounterfactualPair>,
1381    /// Fraud red-flag indicators on P2P/O2C documents.
1382    pub red_flags: Vec<datasynth_generators::fraud::RedFlag>,
1383    /// Collusion rings (coordinated fraud networks).
1384    pub collusion_rings: Vec<datasynth_generators::fraud::CollusionRing>,
1385    /// Bi-temporal version chains for vendor entities.
1386    pub temporal_vendor_chains:
1387        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
1388    /// Entity relationship graph (nodes + edges with strength scores).
1389    pub entity_relationship_graph: Option<datasynth_core::models::EntityGraph>,
1390    /// Cross-process links (P2P ↔ O2C via inventory movements).
1391    pub cross_process_links: Vec<datasynth_core::models::CrossProcessLink>,
1392    /// Industry-specific GL accounts and metadata.
1393    pub industry_output: Option<datasynth_generators::industry::factory::IndustryOutput>,
1394    /// SP5.2 — CoA semantic prior snapshot. When `Some`, `write_journal_entries_csv`
1395    /// builds a secondary lookup from the prior's 3,123 corpus accounts and uses
1396    /// it as a fallback when the synthetic CoA index misses a line's `gl_account`
1397    /// (common when SP3.7's per-source attribute conditional emits corpus account
1398    /// numbers that differ from the synthetic CoA master table's number set).
1399    pub coa_semantic_prior:
1400        Option<datasynth_core::distributions::behavioral_priors::CoaSemanticPrior>,
1401    /// Compliance regulations framework data (standards, procedures, findings, filings, graph).
1402    pub compliance_regulations: ComplianceRegulationsSnapshot,
1403    /// v3.3.0: analytics-metadata snapshot (prior-year comparatives,
1404    /// industry benchmarks, management reports, drift events). Empty
1405    /// when `analytics_metadata.enabled = false`.
1406    pub analytics_metadata: AnalyticsMetadataSnapshot,
1407    /// v3.5.1+: statistical validation report (Benford, chi-squared,
1408    /// KS) over the generated amount distribution.  `None` when
1409    /// `distributions.validation.enabled = false`.
1410    pub statistical_validation: Option<datasynth_core::distributions::StatisticalValidationReport>,
1411    /// v4.1.3+: interconnectivity snapshot — vendor tier assignments,
1412    /// customer value-segment labels, and industry-specific metadata
1413    /// populated from the previously-inert `vendor_network`,
1414    /// `customer_segmentation`, and `industry_specific` schema
1415    /// sections. Empty when those sections are disabled.
1416    pub interconnectivity: InterconnectivitySnapshot,
1417}
1418
1419/// v4.1.3+: interconnectivity snapshot. Populated when
1420/// `vendor_network.enabled` / `customer_segmentation.enabled` /
1421/// `industry_specific.enabled` are set. Holds tier / segment / industry
1422/// labels for generated entities so downstream tooling (graph export,
1423/// risk models) can consume them without re-deriving from scratch.
1424#[derive(Debug, Clone, Default)]
1425pub struct InterconnectivitySnapshot {
1426    /// `(vendor_id, tier)` pairs. Tier 1 = strategic / primary; Tier 2
1427    /// = sub-tier suppliers to tier 1; Tier 3 = sub-sub-tier.
1428    pub vendor_tiers: Vec<(String, u8)>,
1429    /// `(vendor_id, cluster_label)` pairs where cluster_label is one of
1430    /// `"reliable_strategic" / "standard_operational" / "transactional"
1431    /// / "problematic"`.
1432    pub vendor_clusters: Vec<(String, String)>,
1433    /// `(customer_id, value_segment)` pairs where value_segment is one
1434    /// of `"enterprise" / "mid_market" / "smb" / "consumer"`.
1435    pub customer_value_segments: Vec<(String, String)>,
1436    /// `(customer_id, lifecycle_stage)` pairs where stage is one of
1437    /// `"prospect" / "new" / "growth" / "mature" / "at_risk" /
1438    /// "churned" / "won_back"`.
1439    pub customer_lifecycle_stages: Vec<(String, String)>,
1440    /// Summary: industry-specific knob applied, if any (e.g.
1441    /// `"manufacturing.bom_depth=3"`).
1442    pub industry_metadata: Vec<String>,
1443}
1444
1445/// v3.3.0: snapshot for the analytics-metadata phase.
1446#[derive(Debug, Clone, Default)]
1447pub struct AnalyticsMetadataSnapshot {
1448    /// Prior-year comparative balances per account, per entity.
1449    pub prior_year_comparatives: Vec<datasynth_core::models::PriorYearComparative>,
1450    /// Industry benchmarks for the configured industry.
1451    pub industry_benchmarks: Vec<datasynth_core::models::IndustryBenchmark>,
1452    /// Management-report artefacts (dashboards, MDA sections).
1453    pub management_reports: Vec<datasynth_core::models::ManagementReport>,
1454    /// Drift-event labels emitted from the post-generation sweep.
1455    pub drift_events: Vec<datasynth_core::models::LabeledDriftEvent>,
1456}
1457
1458/// Enhanced statistics about a generation run.
1459#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1460pub struct EnhancedGenerationStatistics {
1461    /// Total journal entries generated.
1462    pub total_entries: u64,
1463    /// Total line items generated.
1464    pub total_line_items: u64,
1465    /// Number of accounts in CoA.
1466    pub accounts_count: usize,
1467    /// Number of companies.
1468    pub companies_count: usize,
1469    /// Period in months.
1470    pub period_months: u32,
1471    /// Master data counts.
1472    pub vendor_count: usize,
1473    pub customer_count: usize,
1474    pub material_count: usize,
1475    pub asset_count: usize,
1476    pub employee_count: usize,
1477    /// Document flow counts.
1478    pub p2p_chain_count: usize,
1479    pub o2c_chain_count: usize,
1480    /// Subledger counts.
1481    pub ap_invoice_count: usize,
1482    pub ar_invoice_count: usize,
1483    /// OCPM counts.
1484    pub ocpm_event_count: usize,
1485    pub ocpm_object_count: usize,
1486    pub ocpm_case_count: usize,
1487    /// Audit counts.
1488    pub audit_engagement_count: usize,
1489    pub audit_workpaper_count: usize,
1490    pub audit_evidence_count: usize,
1491    pub audit_risk_count: usize,
1492    pub audit_finding_count: usize,
1493    pub audit_judgment_count: usize,
1494    /// ISA 505 confirmation counts.
1495    #[serde(default)]
1496    pub audit_confirmation_count: usize,
1497    #[serde(default)]
1498    pub audit_confirmation_response_count: usize,
1499    /// ISA 330/530 procedure step and sample counts.
1500    #[serde(default)]
1501    pub audit_procedure_step_count: usize,
1502    #[serde(default)]
1503    pub audit_sample_count: usize,
1504    /// ISA 520 analytical procedure counts.
1505    #[serde(default)]
1506    pub audit_analytical_result_count: usize,
1507    /// ISA 610 internal audit counts.
1508    #[serde(default)]
1509    pub audit_ia_function_count: usize,
1510    #[serde(default)]
1511    pub audit_ia_report_count: usize,
1512    /// ISA 550 related party counts.
1513    #[serde(default)]
1514    pub audit_related_party_count: usize,
1515    #[serde(default)]
1516    pub audit_related_party_transaction_count: usize,
1517    /// Anomaly counts.
1518    pub anomalies_injected: usize,
1519    /// Data quality issue counts.
1520    pub data_quality_issues: usize,
1521    /// Banking counts.
1522    pub banking_customer_count: usize,
1523    pub banking_account_count: usize,
1524    pub banking_transaction_count: usize,
1525    pub banking_suspicious_count: usize,
1526    /// Graph export counts.
1527    pub graph_export_count: usize,
1528    pub graph_node_count: usize,
1529    pub graph_edge_count: usize,
1530    /// LLM enrichment timing (milliseconds).
1531    #[serde(default)]
1532    pub llm_enrichment_ms: u64,
1533    /// Number of vendor names enriched by LLM.
1534    #[serde(default)]
1535    pub llm_vendors_enriched: usize,
1536    /// v4.1.1+: number of customer names enriched by LLM.
1537    #[serde(default)]
1538    pub llm_customers_enriched: usize,
1539    /// v4.1.1+: number of material descriptions enriched by LLM.
1540    #[serde(default)]
1541    pub llm_materials_enriched: usize,
1542    /// v4.1.1+: number of audit finding titles enriched by LLM.
1543    #[serde(default)]
1544    pub llm_findings_enriched: usize,
1545    /// Diffusion enhancement timing (milliseconds).
1546    #[serde(default)]
1547    pub diffusion_enhancement_ms: u64,
1548    /// Number of diffusion samples generated.
1549    #[serde(default)]
1550    pub diffusion_samples_generated: usize,
1551    /// Hybrid-diffusion blend weight actually applied (after clamp to \[0,1\]).
1552    /// `None` when the neural/hybrid backend is not active.
1553    #[serde(default, skip_serializing_if = "Option::is_none")]
1554    pub neural_hybrid_weight: Option<f64>,
1555    /// Hybrid-diffusion strategy applied (weighted_average / column_select / threshold).
1556    #[serde(default, skip_serializing_if = "Option::is_none")]
1557    pub neural_hybrid_strategy: Option<String>,
1558    /// How many columns were routed through the neural backend.
1559    #[serde(default, skip_serializing_if = "Option::is_none")]
1560    pub neural_routed_column_count: Option<usize>,
1561    /// Causal generation timing (milliseconds).
1562    #[serde(default)]
1563    pub causal_generation_ms: u64,
1564    /// Number of causal samples generated.
1565    #[serde(default)]
1566    pub causal_samples_generated: usize,
1567    /// Whether causal validation passed.
1568    #[serde(default)]
1569    pub causal_validation_passed: Option<bool>,
1570    /// S2C sourcing counts.
1571    #[serde(default)]
1572    pub sourcing_project_count: usize,
1573    #[serde(default)]
1574    pub rfx_event_count: usize,
1575    #[serde(default)]
1576    pub bid_count: usize,
1577    #[serde(default)]
1578    pub contract_count: usize,
1579    #[serde(default)]
1580    pub catalog_item_count: usize,
1581    #[serde(default)]
1582    pub scorecard_count: usize,
1583    /// Financial reporting counts.
1584    #[serde(default)]
1585    pub financial_statement_count: usize,
1586    #[serde(default)]
1587    pub bank_reconciliation_count: usize,
1588    /// HR counts.
1589    #[serde(default)]
1590    pub payroll_run_count: usize,
1591    #[serde(default)]
1592    pub time_entry_count: usize,
1593    #[serde(default)]
1594    pub expense_report_count: usize,
1595    #[serde(default)]
1596    pub benefit_enrollment_count: usize,
1597    #[serde(default)]
1598    pub pension_plan_count: usize,
1599    #[serde(default)]
1600    pub stock_grant_count: usize,
1601    /// Accounting standards counts.
1602    #[serde(default)]
1603    pub revenue_contract_count: usize,
1604    #[serde(default)]
1605    pub impairment_test_count: usize,
1606    #[serde(default)]
1607    pub business_combination_count: usize,
1608    #[serde(default)]
1609    pub ecl_model_count: usize,
1610    #[serde(default)]
1611    pub provision_count: usize,
1612    /// Manufacturing counts.
1613    #[serde(default)]
1614    pub production_order_count: usize,
1615    #[serde(default)]
1616    pub quality_inspection_count: usize,
1617    #[serde(default)]
1618    pub cycle_count_count: usize,
1619    #[serde(default)]
1620    pub bom_component_count: usize,
1621    #[serde(default)]
1622    pub inventory_movement_count: usize,
1623    /// Sales & reporting counts.
1624    #[serde(default)]
1625    pub sales_quote_count: usize,
1626    #[serde(default)]
1627    pub kpi_count: usize,
1628    #[serde(default)]
1629    pub budget_line_count: usize,
1630    /// Tax counts.
1631    #[serde(default)]
1632    pub tax_jurisdiction_count: usize,
1633    #[serde(default)]
1634    pub tax_code_count: usize,
1635    /// ESG counts.
1636    #[serde(default)]
1637    pub esg_emission_count: usize,
1638    #[serde(default)]
1639    pub esg_disclosure_count: usize,
1640    /// Intercompany counts.
1641    #[serde(default)]
1642    pub ic_matched_pair_count: usize,
1643    #[serde(default)]
1644    pub ic_elimination_count: usize,
1645    /// Number of intercompany journal entries (seller + buyer side).
1646    #[serde(default)]
1647    pub ic_transaction_count: usize,
1648    /// Number of fixed asset subledger records.
1649    #[serde(default)]
1650    pub fa_subledger_count: usize,
1651    /// Number of inventory subledger records.
1652    #[serde(default)]
1653    pub inventory_subledger_count: usize,
1654    /// Treasury debt instrument count.
1655    #[serde(default)]
1656    pub treasury_debt_instrument_count: usize,
1657    /// Treasury hedging instrument count.
1658    #[serde(default)]
1659    pub treasury_hedging_instrument_count: usize,
1660    /// Project accounting project count.
1661    #[serde(default)]
1662    pub project_count: usize,
1663    /// Project accounting change order count.
1664    #[serde(default)]
1665    pub project_change_order_count: usize,
1666    /// Tax provision count.
1667    #[serde(default)]
1668    pub tax_provision_count: usize,
1669    /// Opening balance count.
1670    #[serde(default)]
1671    pub opening_balance_count: usize,
1672    /// Subledger reconciliation count.
1673    #[serde(default)]
1674    pub subledger_reconciliation_count: usize,
1675    /// Tax line count.
1676    #[serde(default)]
1677    pub tax_line_count: usize,
1678    /// Project cost line count.
1679    #[serde(default)]
1680    pub project_cost_line_count: usize,
1681    /// Cash position count.
1682    #[serde(default)]
1683    pub cash_position_count: usize,
1684    /// Cash forecast count.
1685    #[serde(default)]
1686    pub cash_forecast_count: usize,
1687    /// Cash pool count.
1688    #[serde(default)]
1689    pub cash_pool_count: usize,
1690    /// Process evolution event count.
1691    #[serde(default)]
1692    pub process_evolution_event_count: usize,
1693    /// Organizational event count.
1694    #[serde(default)]
1695    pub organizational_event_count: usize,
1696    /// Counterfactual pair count.
1697    #[serde(default)]
1698    pub counterfactual_pair_count: usize,
1699    /// Number of fraud red-flag indicators generated.
1700    #[serde(default)]
1701    pub red_flag_count: usize,
1702    /// Number of collusion rings generated.
1703    #[serde(default)]
1704    pub collusion_ring_count: usize,
1705    /// Number of bi-temporal vendor version chains generated.
1706    #[serde(default)]
1707    pub temporal_version_chain_count: usize,
1708    /// Number of nodes in the entity relationship graph.
1709    #[serde(default)]
1710    pub entity_relationship_node_count: usize,
1711    /// Number of edges in the entity relationship graph.
1712    #[serde(default)]
1713    pub entity_relationship_edge_count: usize,
1714    /// Number of cross-process links generated.
1715    #[serde(default)]
1716    pub cross_process_link_count: usize,
1717    /// Number of disruption events generated.
1718    #[serde(default)]
1719    pub disruption_event_count: usize,
1720    /// Number of industry-specific GL accounts generated.
1721    #[serde(default)]
1722    pub industry_gl_account_count: usize,
1723    /// Number of period-close journal entries generated (tax provision + closing entries).
1724    #[serde(default)]
1725    pub period_close_je_count: usize,
1726}
1727
1728/// Enhanced orchestrator with full feature integration.
1729pub struct EnhancedOrchestrator {
1730    config: GeneratorConfig,
1731    phase_config: PhaseConfig,
1732    coa: Option<Arc<ChartOfAccounts>>,
1733    master_data: MasterDataSnapshot,
1734    seed: u64,
1735    multi_progress: Option<MultiProgress>,
1736    /// Resource guard for memory, disk, and CPU monitoring
1737    resource_guard: ResourceGuard,
1738    /// Output path for disk space monitoring
1739    output_path: Option<PathBuf>,
1740    /// Copula generators for preserving correlations (from fingerprint)
1741    copula_generators: Vec<CopulaGeneratorSpec>,
1742    /// Country pack registry for localized data generation
1743    country_pack_registry: datasynth_core::CountryPackRegistry,
1744    /// Optional streaming sink for phase-by-phase output
1745    phase_sink: Option<Box<dyn crate::stream_pipeline::PhaseSink>>,
1746    /// Shared template provider for user-supplied template packs.
1747    ///
1748    /// Constructed from `config.templates.path` at orchestrator creation
1749    /// time. When the path is `None`, this is still populated with an
1750    /// embedded-only provider so generators can always call trait methods
1751    /// without an `Option<…>` guard. v3.2.0+.
1752    template_provider: datasynth_core::templates::SharedTemplateProvider,
1753    /// v3.4.1+ temporal context for business-day / holiday awareness.
1754    ///
1755    /// Populated only when `temporal_patterns.business_days.enabled`. When
1756    /// `None`, document-flow / HR / treasury / period-close generators keep
1757    /// their legacy raw-RNG date-offset behaviour (byte-identical to v3.4.0
1758    /// for the same seed).
1759    temporal_context: Option<Arc<datasynth_core::distributions::TemporalContext>>,
1760    /// Optional shard-mode context (set by group-engine shard runners).
1761    /// `None` preserves byte-for-byte pre-v5.0 single-entity behavior.
1762    shard_context: Option<crate::shard_context::ShardContext>,
1763    /// SP3.12 — cached priors, shared between `generate_journal_entries` (which
1764    /// loads them) and `generate_jes_from_document_flows` (which applies padding).
1765    /// Set once after the SP3 opt-in block in `generate_journal_entries`.
1766    cached_priors: Option<std::sync::Arc<datasynth_generators::priors_loader::LoadedPriors>>,
1767}
1768
1769impl EnhancedOrchestrator {
1770    /// Create a new enhanced orchestrator.
1771    pub fn new(config: GeneratorConfig, phase_config: PhaseConfig) -> SynthResult<Self> {
1772        datasynth_config::validate_config(&config)?;
1773
1774        let seed = config.global.seed.unwrap_or_else(rand::random);
1775
1776        // Build resource guard from config
1777        let resource_guard = Self::build_resource_guard(&config, None);
1778
1779        // Build country pack registry from config
1780        let country_pack_registry = match &config.country_packs {
1781            Some(cp) => {
1782                datasynth_core::CountryPackRegistry::new(cp.external_dir.as_deref(), &cp.overrides)
1783                    .map_err(|e| SynthError::config(e.to_string()))?
1784            }
1785            None => datasynth_core::CountryPackRegistry::builtin_only()
1786                .map_err(|e| SynthError::config(e.to_string()))?,
1787        };
1788
1789        // Build the shared template provider from config.templates.path.
1790        // `None` → embedded-only provider (byte-identical pre-v3.2.0 output).
1791        // `Some(path)` → load file/dir and honour `merge_strategy`.
1792        let template_provider = Self::build_template_provider(&config)?;
1793
1794        // v3.4.1: build a shared temporal context when
1795        // `temporal_patterns.business_days.enabled`. `None` preserves the
1796        // raw-RNG date-offset behaviour per-generator.
1797        let temporal_context = Self::build_temporal_context(&config)?;
1798
1799        Ok(Self {
1800            config,
1801            phase_config,
1802            coa: None,
1803            master_data: MasterDataSnapshot::default(),
1804            seed,
1805            multi_progress: None,
1806            resource_guard,
1807            output_path: None,
1808            copula_generators: Vec::new(),
1809            country_pack_registry,
1810            phase_sink: None,
1811            template_provider,
1812            temporal_context,
1813            shard_context: None,
1814            cached_priors: None,
1815        })
1816    }
1817
1818    /// Install shard-mode context.  Called by the group shard runner
1819    /// before [`EnhancedOrchestrator::generate`] (or the equivalent
1820    /// entry point).  Has no effect on single-entity runs.
1821    ///
1822    /// See [`crate::shard_context::ShardContext`] for rationale.
1823    pub fn set_shard_context(&mut self, ctx: crate::shard_context::ShardContext) {
1824        self.shard_context = Some(ctx);
1825    }
1826
1827    /// Build the shared [`TemporalContext`] from `config.temporal_patterns`.
1828    ///
1829    /// Returns `Ok(None)` when temporal-pattern features are disabled — the
1830    /// caller keeps its legacy raw-RNG path. Returns `Ok(Some(arc))` when
1831    /// enabled. Returns `Err` only for unrecoverable config errors.
1832    fn build_temporal_context(
1833        config: &GeneratorConfig,
1834    ) -> SynthResult<Option<Arc<datasynth_core::distributions::TemporalContext>>> {
1835        use datasynth_core::distributions::{parse_region_code, TemporalContext};
1836
1837        let tp = &config.temporal_patterns;
1838        if !tp.enabled || !tp.business_days.enabled {
1839            return Ok(None);
1840        }
1841
1842        let start_date = NaiveDate::parse_from_str(&config.global.start_date, "%Y-%m-%d")
1843            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
1844        let end_date = start_date + chrono::Months::new(config.global.period_months);
1845
1846        let region_code = tp
1847            .calendars
1848            .regions
1849            .first()
1850            .cloned()
1851            .unwrap_or_else(|| "US".to_string());
1852        let region = parse_region_code(&region_code);
1853
1854        Ok(Some(TemporalContext::shared(region, start_date, end_date)))
1855    }
1856
1857    /// Build the shared template provider from `config.templates`.
1858    ///
1859    /// Always returns a provider — falls back to embedded-only when
1860    /// `config.templates.path` is `None`. The merge-strategy from config
1861    /// maps onto the loader's [`MergeStrategy`] enum. Load failures at
1862    /// orchestrator-construction time are fatal (preferable to silently
1863    /// using embedded pools when the user supplied a bad path).
1864    fn build_template_provider(
1865        config: &GeneratorConfig,
1866    ) -> SynthResult<datasynth_core::templates::SharedTemplateProvider> {
1867        use datasynth_core::templates::{
1868            loader::{MergeStrategy, TemplateLoader},
1869            DefaultTemplateProvider,
1870        };
1871        use std::sync::Arc;
1872
1873        let provider = match &config.templates.path {
1874            None => DefaultTemplateProvider::new(),
1875            Some(path) => {
1876                let data = if path.is_dir() {
1877                    TemplateLoader::load_from_directory(path)
1878                } else {
1879                    TemplateLoader::load_from_file(path)
1880                }
1881                .map_err(|e| {
1882                    SynthError::config(format!(
1883                        "Failed to load templates from {}: {e}",
1884                        path.display()
1885                    ))
1886                })?;
1887                let strategy = match config.templates.merge_strategy {
1888                    datasynth_config::TemplateMergeStrategy::Extend => MergeStrategy::Extend,
1889                    datasynth_config::TemplateMergeStrategy::Replace => MergeStrategy::Replace,
1890                    datasynth_config::TemplateMergeStrategy::MergePreferFile => {
1891                        MergeStrategy::MergePreferFile
1892                    }
1893                };
1894                DefaultTemplateProvider::with_templates(data, strategy)
1895            }
1896        };
1897        Ok(Arc::new(provider))
1898    }
1899
1900    /// Create with default phase config.
1901    pub fn with_defaults(config: GeneratorConfig) -> SynthResult<Self> {
1902        Self::new(config, PhaseConfig::default())
1903    }
1904
1905    /// Set a streaming phase sink for real-time output (builder pattern).
1906    pub fn with_phase_sink(mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) -> Self {
1907        self.phase_sink = Some(sink);
1908        self
1909    }
1910
1911    /// Set a streaming phase sink on an existing orchestrator.
1912    pub fn set_phase_sink(&mut self, sink: Box<dyn crate::stream_pipeline::PhaseSink>) {
1913        self.phase_sink = Some(sink);
1914    }
1915
1916    /// Emit a batch of items to the phase sink (if configured).
1917    fn emit_phase_items<T: serde::Serialize>(&self, phase: &str, type_name: &str, items: &[T]) {
1918        if let Some(ref sink) = self.phase_sink {
1919            for item in items {
1920                if let Ok(value) = serde_json::to_value(item) {
1921                    if let Err(e) = sink.emit(phase, type_name, &value) {
1922                        warn!(
1923                            "Stream sink emit failed for phase '{phase}', type '{type_name}': {e}"
1924                        );
1925                    }
1926                }
1927            }
1928            if let Err(e) = sink.phase_complete(phase) {
1929                warn!("Stream sink phase_complete failed for phase '{phase}': {e}");
1930            }
1931        }
1932    }
1933
1934    /// Enable/disable progress bars.
1935    pub fn with_progress(mut self, show: bool) -> Self {
1936        self.phase_config.show_progress = show;
1937        if show {
1938            self.multi_progress = Some(MultiProgress::new());
1939        }
1940        self
1941    }
1942
1943    /// Set the output path for disk space monitoring.
1944    pub fn with_output_path<P: Into<PathBuf>>(mut self, path: P) -> Self {
1945        let path = path.into();
1946        self.output_path = Some(path.clone());
1947        // Rebuild resource guard with the output path
1948        self.resource_guard = Self::build_resource_guard(&self.config, Some(path));
1949        self
1950    }
1951
1952    /// Access the country pack registry.
1953    pub fn country_pack_registry(&self) -> &datasynth_core::CountryPackRegistry {
1954        &self.country_pack_registry
1955    }
1956
1957    /// Look up a country pack by country code string.
1958    pub fn country_pack_for(&self, country: &str) -> &datasynth_core::CountryPack {
1959        self.country_pack_registry.get_by_str(country)
1960    }
1961
1962    /// Returns the ISO 3166-1 alpha-2 country code for the primary (first)
1963    /// company, defaulting to `"US"` if no companies are configured.
1964    fn primary_country_code(&self) -> &str {
1965        self.config
1966            .companies
1967            .first()
1968            .map(|c| c.country.as_str())
1969            .unwrap_or("US")
1970    }
1971
1972    /// Resolve the country pack for the primary (first) company.
1973    fn primary_pack(&self) -> &datasynth_core::CountryPack {
1974        self.country_pack_for(self.primary_country_code())
1975    }
1976
1977    /// Resolve the CoA framework from config/country-pack.
1978    fn resolve_coa_framework(&self) -> CoAFramework {
1979        if self.config.accounting_standards.enabled {
1980            match self.config.accounting_standards.framework {
1981                Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
1982                    return CoAFramework::FrenchPcg;
1983                }
1984                Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
1985                    return CoAFramework::GermanSkr04;
1986                }
1987                _ => {}
1988            }
1989        }
1990        // Fallback: derive from country pack
1991        let pack = self.primary_pack();
1992        match pack.accounting.framework.as_str() {
1993            "french_gaap" => CoAFramework::FrenchPcg,
1994            "german_gaap" | "hgb" => CoAFramework::GermanSkr04,
1995            _ => CoAFramework::UsGaap,
1996        }
1997    }
1998
1999    /// Resolve the framework string consumed by
2000    /// [`datasynth_core::framework_accounts::FrameworkAccounts::for_framework`].
2001    ///
2002    /// Mirrors [`Self::resolve_coa_framework`] but returns the snake_case
2003    /// label (`"us_gaap"`, `"ifrs"`, `"french_gaap"`, `"german_gaap"`,
2004    /// `"dual_reporting"`) that the framework-aware account classifier
2005    /// expects. Country drives selection because the country pack's CoA
2006    /// loader is what actually picks the numbering convention (SKR04 for
2007    /// DE, PCG for FR) — the entity's `accounting_framework` label can
2008    /// disagree with the chart it's posted against (e.g. a DE entity
2009    /// flagged `accounting_framework: ifrs` still gets SKR04 codes from
2010    /// its country pack).
2011    fn resolve_framework_str(&self) -> &'static str {
2012        // Country first — the chart of accounts loaded for this company
2013        // is keyed by country pack, so the code numbering convention
2014        // follows country, not the framework label.
2015        match self.primary_country_code().to_ascii_uppercase().as_str() {
2016            "DE" | "AT" => "german_gaap",
2017            "FR" | "BE" | "LU" => "french_gaap",
2018            _ => {
2019                // No country override → take the framework label.
2020                if self.config.accounting_standards.enabled {
2021                    match self.config.accounting_standards.framework {
2022                        Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
2023                            return "french_gaap";
2024                        }
2025                        Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
2026                            return "german_gaap";
2027                        }
2028                        Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
2029                            return "ifrs";
2030                        }
2031                        Some(
2032                            datasynth_config::schema::AccountingFrameworkConfig::DualReporting,
2033                        ) => {
2034                            return "dual_reporting";
2035                        }
2036                        Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap)
2037                        | None => {}
2038                    }
2039                }
2040                "us_gaap"
2041            }
2042        }
2043    }
2044
2045    /// Check if copula generators are available.
2046    ///
2047    /// Returns true if the orchestrator has copula generators for preserving
2048    /// correlations (typically from fingerprint-based generation).
2049    pub fn has_copulas(&self) -> bool {
2050        !self.copula_generators.is_empty()
2051    }
2052
2053    /// Get the copula generators.
2054    ///
2055    /// Returns a reference to the copula generators for use during generation.
2056    /// These can be used to generate correlated samples that preserve the
2057    /// statistical relationships from the source data.
2058    pub fn copulas(&self) -> &[CopulaGeneratorSpec] {
2059        &self.copula_generators
2060    }
2061
2062    /// Get a mutable reference to the copula generators.
2063    ///
2064    /// Allows generators to sample from copulas during data generation.
2065    pub fn copulas_mut(&mut self) -> &mut [CopulaGeneratorSpec] {
2066        &mut self.copula_generators
2067    }
2068
2069    /// Sample correlated values from a named copula.
2070    ///
2071    /// Returns None if the copula doesn't exist.
2072    pub fn sample_from_copula(&mut self, copula_name: &str) -> Option<Vec<f64>> {
2073        self.copula_generators
2074            .iter_mut()
2075            .find(|c| c.name == copula_name)
2076            .map(|c| c.generator.sample())
2077    }
2078
2079    /// Create an orchestrator from a fingerprint file.
2080    ///
2081    /// This reads the fingerprint, synthesizes a GeneratorConfig from it,
2082    /// and creates an orchestrator configured to generate data matching
2083    /// the statistical properties of the original data.
2084    ///
2085    /// # Arguments
2086    /// * `fingerprint_path` - Path to the .dsf fingerprint file
2087    /// * `phase_config` - Phase configuration for generation
2088    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2089    ///
2090    /// # Example
2091    /// ```no_run
2092    /// use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
2093    /// use std::path::Path;
2094    ///
2095    /// let orchestrator = EnhancedOrchestrator::from_fingerprint(
2096    ///     Path::new("fingerprint.dsf"),
2097    ///     PhaseConfig::default(),
2098    ///     1.0,
2099    /// ).unwrap();
2100    /// ```
2101    pub fn from_fingerprint(
2102        fingerprint_path: &std::path::Path,
2103        phase_config: PhaseConfig,
2104        scale: f64,
2105    ) -> SynthResult<Self> {
2106        info!("Loading fingerprint from: {}", fingerprint_path.display());
2107
2108        // Read the fingerprint
2109        let reader = FingerprintReader::new();
2110        let fingerprint = reader
2111            .read_from_file(fingerprint_path)
2112            .map_err(|e| SynthError::config(format!("Failed to read fingerprint: {e}")))?;
2113
2114        Self::from_fingerprint_data(fingerprint, phase_config, scale)
2115    }
2116
2117    /// Create an orchestrator from a loaded fingerprint.
2118    ///
2119    /// # Arguments
2120    /// * `fingerprint` - The loaded fingerprint
2121    /// * `phase_config` - Phase configuration for generation
2122    /// * `scale` - Scale factor for row counts (1.0 = same as original)
2123    pub fn from_fingerprint_data(
2124        fingerprint: Fingerprint,
2125        phase_config: PhaseConfig,
2126        scale: f64,
2127    ) -> SynthResult<Self> {
2128        info!(
2129            "Synthesizing config from fingerprint (version: {}, tables: {})",
2130            fingerprint.manifest.version,
2131            fingerprint.schema.tables.len()
2132        );
2133
2134        // Generate a seed for the synthesis
2135        let seed: u64 = rand::random();
2136        info!("Fingerprint synthesis seed: {}", seed);
2137
2138        // Use ConfigSynthesizer with scale option to convert fingerprint to GeneratorConfig
2139        let options = SynthesisOptions {
2140            scale,
2141            seed: Some(seed),
2142            preserve_correlations: true,
2143            inject_anomalies: true,
2144        };
2145        let synthesizer = ConfigSynthesizer::with_options(options);
2146
2147        // Synthesize full result including copula generators
2148        let synthesis_result = synthesizer
2149            .synthesize_full(&fingerprint, seed)
2150            .map_err(|e| {
2151                SynthError::config(format!("Failed to synthesize config from fingerprint: {e}"))
2152            })?;
2153
2154        // Start with a base config from the fingerprint's industry if available
2155        let mut config = if let Some(ref industry) = fingerprint.manifest.source.industry {
2156            Self::base_config_for_industry(industry)
2157        } else {
2158            Self::base_config_for_industry("manufacturing")
2159        };
2160
2161        // Apply the synthesized patches
2162        config = Self::apply_config_patch(config, &synthesis_result.config_patch);
2163
2164        // Log synthesis results
2165        info!(
2166            "Config synthesized: {} tables, scale={:.2}, copula generators: {}",
2167            fingerprint.schema.tables.len(),
2168            scale,
2169            synthesis_result.copula_generators.len()
2170        );
2171
2172        if !synthesis_result.copula_generators.is_empty() {
2173            for spec in &synthesis_result.copula_generators {
2174                info!(
2175                    "  Copula '{}' for table '{}': {} columns",
2176                    spec.name,
2177                    spec.table,
2178                    spec.columns.len()
2179                );
2180            }
2181        }
2182
2183        // Create the orchestrator with the synthesized config
2184        let mut orchestrator = Self::new(config, phase_config)?;
2185
2186        // Store copula generators for use during generation
2187        orchestrator.copula_generators = synthesis_result.copula_generators;
2188
2189        Ok(orchestrator)
2190    }
2191
2192    /// Create a base config for a given industry.
2193    fn base_config_for_industry(industry: &str) -> GeneratorConfig {
2194        use datasynth_config::presets::create_preset;
2195        use datasynth_config::TransactionVolume;
2196        use datasynth_core::models::{CoAComplexity, IndustrySector};
2197
2198        let sector = match industry.to_lowercase().as_str() {
2199            "manufacturing" => IndustrySector::Manufacturing,
2200            "retail" => IndustrySector::Retail,
2201            "financial" | "financial_services" => IndustrySector::FinancialServices,
2202            "healthcare" => IndustrySector::Healthcare,
2203            "technology" | "tech" => IndustrySector::Technology,
2204            _ => IndustrySector::Manufacturing,
2205        };
2206
2207        // Create a preset with reasonable defaults
2208        create_preset(
2209            sector,
2210            1,  // company count
2211            12, // period months
2212            CoAComplexity::Medium,
2213            TransactionVolume::TenK,
2214        )
2215    }
2216
2217    /// Apply a config patch to a GeneratorConfig.
2218    fn apply_config_patch(
2219        mut config: GeneratorConfig,
2220        patch: &datasynth_fingerprint::synthesis::ConfigPatch,
2221    ) -> GeneratorConfig {
2222        use datasynth_fingerprint::synthesis::ConfigValue;
2223
2224        for (key, value) in patch.values() {
2225            match (key.as_str(), value) {
2226                // Transaction count is handled via TransactionVolume enum on companies
2227                // Log it but cannot directly set it (would need to modify company volumes)
2228                ("transactions.count", ConfigValue::Integer(n)) => {
2229                    info!(
2230                        "Fingerprint suggests {} transactions (apply via company volumes)",
2231                        n
2232                    );
2233                }
2234                ("global.period_months", ConfigValue::Integer(n)) => {
2235                    config.global.period_months = (*n).clamp(1, 120) as u32;
2236                }
2237                ("global.start_date", ConfigValue::String(s)) => {
2238                    config.global.start_date = s.clone();
2239                }
2240                ("global.seed", ConfigValue::Integer(n)) => {
2241                    config.global.seed = Some(*n as u64);
2242                }
2243                ("fraud.enabled", ConfigValue::Bool(b)) => {
2244                    config.fraud.enabled = *b;
2245                }
2246                ("fraud.fraud_rate", ConfigValue::Float(f)) => {
2247                    config.fraud.fraud_rate = *f;
2248                }
2249                ("data_quality.enabled", ConfigValue::Bool(b)) => {
2250                    config.data_quality.enabled = *b;
2251                }
2252                // Handle anomaly injection paths (mapped to fraud config)
2253                ("anomaly_injection.enabled", ConfigValue::Bool(b)) => {
2254                    config.fraud.enabled = *b;
2255                }
2256                ("anomaly_injection.overall_rate", ConfigValue::Float(f)) => {
2257                    config.fraud.fraud_rate = *f;
2258                }
2259                _ => {
2260                    debug!("Ignoring unknown config patch key: {}", key);
2261                }
2262            }
2263        }
2264
2265        config
2266    }
2267
2268    /// Build a resource guard from the configuration.
2269    fn build_resource_guard(
2270        config: &GeneratorConfig,
2271        output_path: Option<PathBuf>,
2272    ) -> ResourceGuard {
2273        let mut builder = ResourceGuardBuilder::new();
2274
2275        // Configure memory limit if set
2276        if config.global.memory_limit_mb > 0 {
2277            builder = builder.memory_limit(config.global.memory_limit_mb);
2278        }
2279
2280        // Configure disk monitoring for output path
2281        if let Some(path) = output_path {
2282            builder = builder.output_path(path).min_free_disk(100); // Require at least 100 MB free
2283        }
2284
2285        // Use conservative degradation settings for production safety
2286        builder = builder.conservative();
2287
2288        builder.build()
2289    }
2290
2291    /// Check resources (memory, disk, CPU) and return degradation level.
2292    ///
2293    /// Returns an error if hard limits are exceeded.
2294    /// Returns Ok(DegradationLevel) indicating current resource state.
2295    fn check_resources(&self) -> SynthResult<DegradationLevel> {
2296        self.resource_guard.check()
2297    }
2298
2299    /// Check resources with logging.
2300    fn check_resources_with_log(&self, phase: &str) -> SynthResult<DegradationLevel> {
2301        let level = self.resource_guard.check()?;
2302
2303        if level != DegradationLevel::Normal {
2304            warn!(
2305                "Resource degradation at {}: level={}, memory={}MB, disk={}MB",
2306                phase,
2307                level,
2308                self.resource_guard.current_memory_mb(),
2309                self.resource_guard.available_disk_mb()
2310            );
2311        }
2312
2313        Ok(level)
2314    }
2315
2316    /// Get current degradation actions based on resource state.
2317    fn get_degradation_actions(&self) -> DegradationActions {
2318        self.resource_guard.get_actions()
2319    }
2320
2321    /// Legacy method for backwards compatibility - now uses ResourceGuard.
2322    fn check_memory_limit(&self) -> SynthResult<()> {
2323        self.check_resources()?;
2324        Ok(())
2325    }
2326
2327    /// Run the complete generation workflow.
2328    pub fn generate(&mut self) -> SynthResult<EnhancedGenerationResult> {
2329        info!("Starting enhanced generation workflow");
2330        info!(
2331            "Config: industry={:?}, period_months={}, companies={}",
2332            self.config.global.industry,
2333            self.config.global.period_months,
2334            self.config.companies.len()
2335        );
2336
2337        // Set decimal serialization mode (thread-local, affects JSON output).
2338        // Use a scope guard to reset on drop (prevents leaking across spawn_blocking reuse).
2339        let is_native = self.config.output.numeric_mode == datasynth_config::NumericMode::Native;
2340        datasynth_core::serde_decimal::set_numeric_native(is_native);
2341        struct NumericModeGuard;
2342        impl Drop for NumericModeGuard {
2343            fn drop(&mut self) {
2344                datasynth_core::serde_decimal::set_numeric_native(false);
2345            }
2346        }
2347        let _numeric_guard = if is_native {
2348            Some(NumericModeGuard)
2349        } else {
2350            None
2351        };
2352
2353        // Initial resource check before starting
2354        let initial_level = self.check_resources_with_log("initial")?;
2355        if initial_level == DegradationLevel::Emergency {
2356            return Err(SynthError::resource(
2357                "Insufficient resources to start generation",
2358            ));
2359        }
2360
2361        let mut stats = EnhancedGenerationStatistics {
2362            companies_count: self.config.companies.len(),
2363            period_months: self.config.global.period_months,
2364            ..Default::default()
2365        };
2366
2367        // Phase 1: Chart of Accounts
2368        let coa = self.phase_chart_of_accounts(&mut stats)?;
2369
2370        // Phase 2: Master Data
2371        self.phase_master_data(&mut stats)?;
2372
2373        // Emit master data to stream sink
2374        self.emit_phase_items("master_data", "Vendor", &self.master_data.vendors);
2375        self.emit_phase_items("master_data", "Customer", &self.master_data.customers);
2376        self.emit_phase_items("master_data", "Material", &self.master_data.materials);
2377
2378        // Phase 3: Document Flows + Subledger Linking
2379        let (mut document_flows, mut subledger, fa_journal_entries) =
2380            self.phase_document_flows(&mut stats)?;
2381
2382        // Emit document flows to stream sink
2383        self.emit_phase_items(
2384            "document_flows",
2385            "PurchaseOrder",
2386            &document_flows.purchase_orders,
2387        );
2388        self.emit_phase_items(
2389            "document_flows",
2390            "GoodsReceipt",
2391            &document_flows.goods_receipts,
2392        );
2393        self.emit_phase_items(
2394            "document_flows",
2395            "VendorInvoice",
2396            &document_flows.vendor_invoices,
2397        );
2398        self.emit_phase_items("document_flows", "SalesOrder", &document_flows.sales_orders);
2399        self.emit_phase_items("document_flows", "Delivery", &document_flows.deliveries);
2400
2401        // Phase 3b: Opening Balances (before JE generation)
2402        let opening_balances = self.phase_opening_balances(&coa, &mut stats)?;
2403
2404        // Phase 3c: Convert opening balances to journal entries and prepend them.
2405        // The CoA lookup resolves each account's normal_debit_balance flag, solving the
2406        // contra-asset problem (e.g., Accumulated Depreciation) without requiring a richer
2407        // balance map type.
2408        let opening_balance_jes: Vec<JournalEntry> = opening_balances
2409            .iter()
2410            .flat_map(|ob| opening_balance_to_jes(ob, &coa))
2411            .collect();
2412        if !opening_balance_jes.is_empty() {
2413            debug!(
2414                "Prepending {} opening balance JEs to entries",
2415                opening_balance_jes.len()
2416            );
2417        }
2418
2419        // Phase 4: Journal Entries
2420        let mut entries = self.phase_journal_entries(&coa, &document_flows, &mut stats)?;
2421
2422        // Phase 4b: Prepend opening balance JEs so the RunningBalanceTracker
2423        // starts from the correct initial state.
2424        if !opening_balance_jes.is_empty() {
2425            let mut combined = opening_balance_jes;
2426            combined.extend(entries);
2427            entries = combined;
2428        }
2429
2430        // Phase 4c: Append FA acquisition journal entries to main entries
2431        if !fa_journal_entries.is_empty() {
2432            debug!(
2433                "Appending {} FA acquisition JEs to main entries",
2434                fa_journal_entries.len()
2435            );
2436            entries.extend(fa_journal_entries);
2437        }
2438
2439        // Phase 25: Counterfactual Pairs (before anomaly injection, using clean JEs)
2440        let counterfactual_pairs = self.phase_counterfactuals(&entries, &mut stats)?;
2441
2442        // Get current degradation actions for optional phases
2443        let actions = self.get_degradation_actions();
2444
2445        // Phase 5: S2C Sourcing Data (before anomaly injection, since it's standalone)
2446        let mut sourcing = self.phase_sourcing_data(&mut stats)?;
2447
2448        // Phase 5a: Link S2C contracts to P2P purchase orders by matching vendor IDs.
2449        // Also populate the reverse FK: ProcurementContract.purchase_order_ids.
2450        if !sourcing.contracts.is_empty() {
2451            let mut linked_count = 0usize;
2452            // Collect (vendor_id, po_id) pairs from P2P chains
2453            let po_vendor_pairs: Vec<(String, String)> = document_flows
2454                .p2p_chains
2455                .iter()
2456                .map(|chain| {
2457                    (
2458                        chain.purchase_order.vendor_id.clone(),
2459                        chain.purchase_order.header.document_id.clone(),
2460                    )
2461                })
2462                .collect();
2463
2464            for chain in &mut document_flows.p2p_chains {
2465                if chain.purchase_order.contract_id.is_none() {
2466                    if let Some(contract) = sourcing
2467                        .contracts
2468                        .iter()
2469                        .find(|c| c.vendor_id == chain.purchase_order.vendor_id)
2470                    {
2471                        chain.purchase_order.contract_id = Some(contract.contract_id.clone());
2472                        linked_count += 1;
2473                    }
2474                }
2475            }
2476
2477            // Populate reverse FK: purchase_order_ids on each contract
2478            for contract in &mut sourcing.contracts {
2479                let po_ids: Vec<String> = po_vendor_pairs
2480                    .iter()
2481                    .filter(|(vendor_id, _)| *vendor_id == contract.vendor_id)
2482                    .map(|(_, po_id)| po_id.clone())
2483                    .collect();
2484                if !po_ids.is_empty() {
2485                    contract.purchase_order_ids = po_ids;
2486                }
2487            }
2488
2489            if linked_count > 0 {
2490                debug!(
2491                    "Linked {} purchase orders to S2C contracts by vendor match",
2492                    linked_count
2493                );
2494            }
2495        }
2496
2497        // Phase 5b: Intercompany Transactions + Matching + Eliminations
2498        let intercompany = self.phase_intercompany(&entries, &mut stats)?;
2499
2500        // Phase 5c: Append IC journal entries to main entries
2501        if !intercompany.seller_journal_entries.is_empty()
2502            || !intercompany.buyer_journal_entries.is_empty()
2503        {
2504            let ic_je_count = intercompany.seller_journal_entries.len()
2505                + intercompany.buyer_journal_entries.len();
2506            entries.extend(intercompany.seller_journal_entries.iter().cloned());
2507            entries.extend(intercompany.buyer_journal_entries.iter().cloned());
2508            debug!(
2509                "Appended {} IC journal entries to main entries",
2510                ic_je_count
2511            );
2512        }
2513
2514        // Phase 5d: Convert IC elimination entries to GL journal entries and append
2515        if !intercompany.elimination_entries.is_empty() {
2516            let elim_jes = datasynth_generators::elimination_to_journal_entries(
2517                &intercompany.elimination_entries,
2518            );
2519            if !elim_jes.is_empty() {
2520                debug!(
2521                    "Appended {} elimination journal entries to main entries",
2522                    elim_jes.len()
2523                );
2524                // IC elimination net-zero assertion (v2.5 hardening)
2525                let elim_debit: rust_decimal::Decimal =
2526                    elim_jes.iter().map(|je| je.total_debit()).sum();
2527                let elim_credit: rust_decimal::Decimal =
2528                    elim_jes.iter().map(|je| je.total_credit()).sum();
2529                let elim_diff = (elim_debit - elim_credit).abs();
2530                let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
2531                if elim_diff > tolerance {
2532                    return Err(datasynth_core::error::SynthError::generation(format!(
2533                        "IC elimination entries not balanced: debits={}, credits={}, diff={} (tolerance={})",
2534                        elim_debit, elim_credit, elim_diff, tolerance
2535                    )));
2536                }
2537                debug!(
2538                    "IC elimination balance verified: debits={}, credits={} (diff={})",
2539                    elim_debit, elim_credit, elim_diff
2540                );
2541                entries.extend(elim_jes);
2542            }
2543        }
2544
2545        // Phase 5e: Wire IC source documents into document flow snapshot
2546        if let Some(ic_docs) = intercompany.ic_document_chains.as_ref() {
2547            if !ic_docs.seller_invoices.is_empty() || !ic_docs.buyer_orders.is_empty() {
2548                document_flows
2549                    .customer_invoices
2550                    .extend(ic_docs.seller_invoices.iter().cloned());
2551                document_flows
2552                    .purchase_orders
2553                    .extend(ic_docs.buyer_orders.iter().cloned());
2554                document_flows
2555                    .goods_receipts
2556                    .extend(ic_docs.buyer_goods_receipts.iter().cloned());
2557                document_flows
2558                    .vendor_invoices
2559                    .extend(ic_docs.buyer_invoices.iter().cloned());
2560                debug!(
2561                    "Appended IC source documents to document flows: {} CIs, {} POs, {} GRs, {} VIs",
2562                    ic_docs.seller_invoices.len(),
2563                    ic_docs.buyer_orders.len(),
2564                    ic_docs.buyer_goods_receipts.len(),
2565                    ic_docs.buyer_invoices.len(),
2566                );
2567            }
2568        }
2569
2570        // Phase 6: HR Data (Payroll, Time Entries, Expenses)
2571        let hr = self.phase_hr_data(&mut stats)?;
2572
2573        // Phase 6b: Generate JEs from payroll runs
2574        if !hr.payroll_runs.is_empty() {
2575            let payroll_jes = Self::generate_payroll_jes(&hr.payroll_runs);
2576            debug!("Generated {} JEs from payroll runs", payroll_jes.len());
2577            entries.extend(payroll_jes);
2578        }
2579
2580        // Phase 6c: Pension expense + OCI JEs (IAS 19 / ASC 715)
2581        if !hr.pension_journal_entries.is_empty() {
2582            debug!(
2583                "Generated {} JEs from pension plans",
2584                hr.pension_journal_entries.len()
2585            );
2586            entries.extend(hr.pension_journal_entries.iter().cloned());
2587        }
2588
2589        // Phase 6d: Stock-based compensation JEs (ASC 718 / IFRS 2)
2590        if !hr.stock_comp_journal_entries.is_empty() {
2591            debug!(
2592                "Generated {} JEs from stock-based compensation",
2593                hr.stock_comp_journal_entries.len()
2594            );
2595            entries.extend(hr.stock_comp_journal_entries.iter().cloned());
2596        }
2597
2598        // Phase 7: Manufacturing (Production Orders, Quality Inspections, Cycle Counts)
2599        let manufacturing_snap = self.phase_manufacturing(&mut stats)?;
2600
2601        // Phase 7a: Generate manufacturing cost flow JEs (WIP, overhead, FG, scrap, rework, QC hold)
2602        if !manufacturing_snap.production_orders.is_empty() {
2603            let currency = self
2604                .config
2605                .companies
2606                .first()
2607                .map(|c| c.currency.as_str())
2608                .unwrap_or("USD");
2609            let mfg_jes = ManufacturingCostAccounting::generate_all_jes(
2610                &manufacturing_snap.production_orders,
2611                &manufacturing_snap.quality_inspections,
2612                currency,
2613            );
2614            debug!("Generated {} manufacturing cost flow JEs", mfg_jes.len());
2615            entries.extend(mfg_jes);
2616        }
2617
2618        // Phase 7a-warranty: Generate warranty provisions per company
2619        if !manufacturing_snap.quality_inspections.is_empty() {
2620            let framework = match self.config.accounting_standards.framework {
2621                Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => "IFRS",
2622                _ => "US_GAAP",
2623            };
2624            for company in &self.config.companies {
2625                let company_orders: Vec<_> = manufacturing_snap
2626                    .production_orders
2627                    .iter()
2628                    .filter(|o| o.company_code == company.code)
2629                    .cloned()
2630                    .collect();
2631                let company_inspections: Vec<_> = manufacturing_snap
2632                    .quality_inspections
2633                    .iter()
2634                    .filter(|i| company_orders.iter().any(|o| o.order_id == i.reference_id))
2635                    .cloned()
2636                    .collect();
2637                if company_inspections.is_empty() {
2638                    continue;
2639                }
2640                let mut warranty_gen = WarrantyProvisionGenerator::new(self.seed + 355);
2641                let warranty_result = warranty_gen.generate(
2642                    &company.code,
2643                    &company_orders,
2644                    &company_inspections,
2645                    &company.currency,
2646                    framework,
2647                );
2648                if !warranty_result.journal_entries.is_empty() {
2649                    debug!(
2650                        "Generated {} warranty provision JEs for {}",
2651                        warranty_result.journal_entries.len(),
2652                        company.code
2653                    );
2654                    entries.extend(warranty_result.journal_entries);
2655                }
2656            }
2657        }
2658
2659        // Phase 7a-cogs: Generate COGS JEs from deliveries x production orders
2660        if !manufacturing_snap.production_orders.is_empty() && !document_flows.deliveries.is_empty()
2661        {
2662            let cogs_currency = self
2663                .config
2664                .companies
2665                .first()
2666                .map(|c| c.currency.as_str())
2667                .unwrap_or("USD");
2668            let cogs_jes = ManufacturingCostAccounting::generate_cogs_on_sale(
2669                &document_flows.deliveries,
2670                &manufacturing_snap.production_orders,
2671                cogs_currency,
2672            );
2673            if !cogs_jes.is_empty() {
2674                debug!("Generated {} COGS JEs from deliveries", cogs_jes.len());
2675                entries.extend(cogs_jes);
2676            }
2677        }
2678
2679        // Phase 7a-inv: Apply manufacturing inventory movements to subledger positions (B.3).
2680        //
2681        // Manufacturing movements (GoodsReceipt / GoodsIssue) are generated independently of
2682        // subledger inventory positions.  Here we reconcile them so that position balances
2683        // reflect the actual stock movements within the generation period.
2684        if !manufacturing_snap.inventory_movements.is_empty()
2685            && !subledger.inventory_positions.is_empty()
2686        {
2687            use datasynth_core::models::MovementType as MfgMovementType;
2688            let mut receipt_count = 0usize;
2689            let mut issue_count = 0usize;
2690            for movement in &manufacturing_snap.inventory_movements {
2691                // Find a matching position by material code and company
2692                if let Some(pos) = subledger.inventory_positions.iter_mut().find(|p| {
2693                    p.material_id == movement.material_code
2694                        && p.company_code == movement.entity_code
2695                }) {
2696                    match movement.movement_type {
2697                        MfgMovementType::GoodsReceipt => {
2698                            // Increase stock and update weighted-average cost
2699                            pos.add_quantity(
2700                                movement.quantity,
2701                                movement.value,
2702                                movement.movement_date,
2703                            );
2704                            receipt_count += 1;
2705                        }
2706                        MfgMovementType::GoodsIssue | MfgMovementType::Scrap => {
2707                            // Decrease stock (best-effort; silently skip if insufficient)
2708                            let _ = pos.remove_quantity(movement.quantity, movement.movement_date);
2709                            issue_count += 1;
2710                        }
2711                        _ => {}
2712                    }
2713                }
2714            }
2715            debug!(
2716                "Phase 7a-inv: Applied {} inventory movements to subledger positions ({} receipts, {} issues/scraps)",
2717                manufacturing_snap.inventory_movements.len(),
2718                receipt_count,
2719                issue_count,
2720            );
2721        }
2722
2723        // Update final entry/line-item stats after all JE-generating phases
2724        // (FA acquisition, IC, payroll, manufacturing JEs have all been appended)
2725        if !entries.is_empty() {
2726            stats.total_entries = entries.len() as u64;
2727            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
2728            debug!(
2729                "Final entry count: {}, line items: {} (after all JE-generating phases)",
2730                stats.total_entries, stats.total_line_items
2731            );
2732        }
2733
2734        // Phase 7b: Apply internal controls to journal entries
2735        if self.config.internal_controls.enabled && !entries.is_empty() {
2736            info!("Phase 7b: Applying internal controls to journal entries");
2737            let control_config = ControlGeneratorConfig {
2738                exception_rate: self.config.internal_controls.exception_rate,
2739                sod_violation_rate: self.config.internal_controls.sod_violation_rate,
2740                enable_sox_marking: true,
2741                sox_materiality_threshold: rust_decimal::Decimal::from_f64_retain(
2742                    self.config.internal_controls.sox_materiality_threshold,
2743                )
2744                .unwrap_or_else(|| rust_decimal::Decimal::from(10000)),
2745                ..Default::default()
2746            };
2747            let mut control_gen = ControlGenerator::with_config(self.seed + 399, control_config);
2748            for entry in &mut entries {
2749                control_gen.apply_controls(entry, &coa);
2750            }
2751            let with_controls = entries
2752                .iter()
2753                .filter(|e| !e.header.control_ids.is_empty())
2754                .count();
2755            info!(
2756                "Applied controls to {} entries ({} with control IDs assigned)",
2757                entries.len(),
2758                with_controls
2759            );
2760        }
2761
2762        // Phase 7c: Extract SoD violations from annotated journal entries.
2763        // The ControlGenerator marks entries with sod_violation=true and a conflict_type.
2764        // Here we materialise those flags into standalone SodViolation records.
2765        let sod_violations: Vec<datasynth_core::models::SodViolation> = entries
2766            .iter()
2767            .filter(|e| e.header.sod_violation)
2768            .filter_map(|e| {
2769                e.header.sod_conflict_type.map(|ct| {
2770                    use datasynth_core::models::{RiskLevel, SodViolation};
2771                    let severity = match ct {
2772                        datasynth_core::models::SodConflictType::PaymentReleaser
2773                        | datasynth_core::models::SodConflictType::RequesterApprover => {
2774                            RiskLevel::Critical
2775                        }
2776                        datasynth_core::models::SodConflictType::PreparerApprover
2777                        | datasynth_core::models::SodConflictType::MasterDataMaintainer
2778                        | datasynth_core::models::SodConflictType::JournalEntryPoster
2779                        | datasynth_core::models::SodConflictType::SystemAccessConflict => {
2780                            RiskLevel::High
2781                        }
2782                        datasynth_core::models::SodConflictType::ReconcilerPoster => {
2783                            RiskLevel::Medium
2784                        }
2785                    };
2786                    let action = format!(
2787                        "SoD conflict {:?} on entry {} ({})",
2788                        ct, e.header.document_id, e.header.company_code
2789                    );
2790                    SodViolation::new(ct, e.header.created_by.clone(), action, severity)
2791                })
2792            })
2793            .collect();
2794        if !sod_violations.is_empty() {
2795            info!(
2796                "Phase 7c: Extracted {} SoD violations from {} entries",
2797                sod_violations.len(),
2798                entries.len()
2799            );
2800        }
2801
2802        // Emit journal entries to stream sink (after all JE-generating phases)
2803        self.emit_phase_items("journal_entries", "JournalEntry", &entries);
2804
2805        // Phase 7d: Document-level fraud injection + propagation to derived JEs.
2806        //
2807        // This runs BEFORE line-level anomaly injection so that JEs tagged by
2808        // document-level fraud are exempt from subsequent line-level flag
2809        // overwrites, and so downstream consumers see a coherent picture.
2810        //
2811        // Gated by `fraud.document_fraud_rate` — `None` or `0.0` is a no-op.
2812        {
2813            let doc_rate = self.config.fraud.document_fraud_rate.unwrap_or(0.0);
2814            if self.config.fraud.enabled && doc_rate > 0.0 {
2815                use datasynth_core::fraud_propagation::{
2816                    inject_document_fraud, propagate_documents_to_entries,
2817                };
2818                use datasynth_core::utils::weighted_select;
2819                use datasynth_core::FraudType;
2820                use rand_chacha::rand_core::SeedableRng;
2821
2822                let dist = &self.config.fraud.fraud_type_distribution;
2823                let fraud_type_weights: [(FraudType, f64); 8] = [
2824                    (FraudType::SuspenseAccountAbuse, dist.suspense_account_abuse),
2825                    (FraudType::FictitiousEntry, dist.fictitious_transaction),
2826                    (FraudType::RevenueManipulation, dist.revenue_manipulation),
2827                    (
2828                        FraudType::ImproperCapitalization,
2829                        dist.expense_capitalization,
2830                    ),
2831                    (FraudType::SplitTransaction, dist.split_transaction),
2832                    (FraudType::TimingAnomaly, dist.timing_anomaly),
2833                    (FraudType::UnauthorizedAccess, dist.unauthorized_access),
2834                    (FraudType::DuplicatePayment, dist.duplicate_payment),
2835                ];
2836                let weights_sum: f64 = fraud_type_weights.iter().map(|(_, w)| *w).sum();
2837                let pick = |rng: &mut rand_chacha::ChaCha8Rng| -> FraudType {
2838                    if weights_sum <= 0.0 {
2839                        FraudType::FictitiousEntry
2840                    } else {
2841                        *weighted_select(rng, &fraud_type_weights)
2842                    }
2843                };
2844
2845                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5100);
2846                let mut doc_tagged = 0usize;
2847                macro_rules! inject_into {
2848                    ($collection:expr) => {{
2849                        let mut hs: Vec<&mut datasynth_core::models::documents::DocumentHeader> =
2850                            $collection.iter_mut().map(|d| &mut d.header).collect();
2851                        doc_tagged += inject_document_fraud(&mut hs, doc_rate, &mut rng, pick);
2852                    }};
2853                }
2854                inject_into!(document_flows.purchase_orders);
2855                inject_into!(document_flows.goods_receipts);
2856                inject_into!(document_flows.vendor_invoices);
2857                inject_into!(document_flows.payments);
2858                inject_into!(document_flows.sales_orders);
2859                inject_into!(document_flows.deliveries);
2860                inject_into!(document_flows.customer_invoices);
2861                if doc_tagged > 0 {
2862                    info!(
2863                        "Injected document-level fraud on {doc_tagged} documents at rate {doc_rate}"
2864                    );
2865                }
2866
2867                if self.config.fraud.propagate_to_lines && doc_tagged > 0 {
2868                    let mut headers: Vec<datasynth_core::models::documents::DocumentHeader> =
2869                        Vec::new();
2870                    headers.extend(
2871                        document_flows
2872                            .purchase_orders
2873                            .iter()
2874                            .map(|d| d.header.clone()),
2875                    );
2876                    headers.extend(
2877                        document_flows
2878                            .goods_receipts
2879                            .iter()
2880                            .map(|d| d.header.clone()),
2881                    );
2882                    headers.extend(
2883                        document_flows
2884                            .vendor_invoices
2885                            .iter()
2886                            .map(|d| d.header.clone()),
2887                    );
2888                    headers.extend(document_flows.payments.iter().map(|d| d.header.clone()));
2889                    headers.extend(document_flows.sales_orders.iter().map(|d| d.header.clone()));
2890                    headers.extend(document_flows.deliveries.iter().map(|d| d.header.clone()));
2891                    headers.extend(
2892                        document_flows
2893                            .customer_invoices
2894                            .iter()
2895                            .map(|d| d.header.clone()),
2896                    );
2897                    let propagated = propagate_documents_to_entries(&headers, &mut entries);
2898                    if propagated > 0 {
2899                        info!(
2900                            "Propagated document-level fraud to {propagated} derived journal entries"
2901                        );
2902                    }
2903                }
2904            }
2905        }
2906
2907        // Phase 8: Anomaly Injection (after all JE-generating phases)
2908        let anomaly_labels = self.phase_anomaly_injection(&mut entries, &actions, &mut stats)?;
2909
2910        // Phase 8b: Apply behavioral biases to fraud entries that did NOT go
2911        // through the anomaly injector.
2912        //
2913        // Three paths set `is_fraud = true` without touching `is_anomaly`:
2914        //   - je_generator::determine_fraud (intrinsic fraud during JE generation)
2915        //   - fraud_propagation::propagate_documents_to_entries (doc-level cascade)
2916        //   - Any external mutation that sets is_fraud after the fact
2917        //
2918        // The anomaly injector already applies the same bias inline when it
2919        // tags an entry as fraud (and sets is_anomaly=true in the same step),
2920        // so gating this sweep on `!is_anomaly` avoids double-application.
2921        //
2922        // Without this sweep, fraud entries from these paths show 0 lift on
2923        // the canonical forensic signals (is_round_1000, is_off_hours,
2924        // is_weekend, is_post_close), which is exactly what the SDK-side
2925        // evaluator caught in v3.1 — fraud features had worse lift than
2926        // baseline. See DS-3.1 post-deploy feedback.
2927        {
2928            use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
2929            use rand_chacha::rand_core::SeedableRng;
2930            let cfg = self.config.fraud.effective_bias().to_core();
2931            if cfg.enabled {
2932                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8100);
2933                let mut swept = 0usize;
2934                for entry in entries.iter_mut() {
2935                    if entry.header.is_fraud && !entry.header.is_anomaly {
2936                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
2937                        swept += 1;
2938                    }
2939                }
2940                if swept > 0 {
2941                    info!(
2942                        "Applied behavioral biases to {swept} non-anomaly fraud entries \
2943                         (doc-propagated + je_generator intrinsic fraud)"
2944                    );
2945                }
2946            }
2947        }
2948
2949        // Emit anomaly labels to stream sink
2950        self.emit_phase_items(
2951            "anomaly_injection",
2952            "LabeledAnomaly",
2953            &anomaly_labels.labels,
2954        );
2955
2956        // Propagate fraud labels from journal entries to source documents.
2957        // This allows consumers to identify fraudulent POs, invoices, etc. directly
2958        // instead of tracing through document_references.json.
2959        //
2960        // Gated by `fraud.propagate_to_document` (default true) — disable when
2961        // downstream consumers want document fraud flags to reflect only
2962        // document-level injection, not line-level.
2963        if self.config.fraud.propagate_to_document {
2964            use std::collections::HashMap;
2965            // Build a map from document_id -> (is_fraud, fraud_type) from fraudulent JEs.
2966            //
2967            // Document-flow JE generators write `je.header.reference` as "PREFIX:DOC_ID"
2968            // (e.g., "GR:PO-2024-000001", "VI:INV-xyz", "PAY:PAY-abc") — see
2969            // `document_flow_je_generator.rs` lines 454/519/591/660/724/794. The
2970            // `DocumentHeader::propagate_fraud` lookup uses the bare document_id, so
2971            // we register BOTH the prefixed form (raw reference) AND the bare form
2972            // (post-colon portion) in the map. Also register the JE's document_id
2973            // UUID so documents that set `journal_entry_id` match via that path.
2974            //
2975            // Fix for issue #104 — fraud was registered only as "GR:foo" but documents
2976            // looked up "foo", silently producing 0 propagations.
2977            let mut fraud_map: HashMap<String, datasynth_core::FraudType> = HashMap::new();
2978            for je in &entries {
2979                if je.header.is_fraud {
2980                    if let Some(ref fraud_type) = je.header.fraud_type {
2981                        if let Some(ref reference) = je.header.reference {
2982                            // Register the full reference ("GR:PO-2024-000001")
2983                            fraud_map.insert(reference.clone(), *fraud_type);
2984                            // Also register the bare document ID ("PO-2024-000001")
2985                            // by stripping the "PREFIX:" if present.
2986                            if let Some(bare) = reference.split_once(':').map(|(_, rest)| rest) {
2987                                if !bare.is_empty() {
2988                                    fraud_map.insert(bare.to_string(), *fraud_type);
2989                                }
2990                            }
2991                        }
2992                        // Also tag via journal_entry_id on document headers
2993                        fraud_map.insert(je.header.document_id.to_string(), *fraud_type);
2994                    }
2995                }
2996            }
2997            if !fraud_map.is_empty() {
2998                let mut propagated = 0usize;
2999                // Use DocumentHeader::propagate_fraud method for each doc type
3000                macro_rules! propagate_to {
3001                    ($collection:expr) => {
3002                        for doc in &mut $collection {
3003                            if doc.header.propagate_fraud(&fraud_map) {
3004                                propagated += 1;
3005                            }
3006                        }
3007                    };
3008                }
3009                propagate_to!(document_flows.purchase_orders);
3010                propagate_to!(document_flows.goods_receipts);
3011                propagate_to!(document_flows.vendor_invoices);
3012                propagate_to!(document_flows.payments);
3013                propagate_to!(document_flows.sales_orders);
3014                propagate_to!(document_flows.deliveries);
3015                propagate_to!(document_flows.customer_invoices);
3016                if propagated > 0 {
3017                    info!(
3018                        "Propagated fraud labels to {} document flow records",
3019                        propagated
3020                    );
3021                }
3022            }
3023        }
3024
3025        // Phase 26: Red Flag Indicators (after anomaly injection so fraud labels are available)
3026        let red_flags = self.phase_red_flags(&anomaly_labels, &document_flows, &mut stats)?;
3027
3028        // Emit red flags to stream sink
3029        self.emit_phase_items("red_flags", "RedFlag", &red_flags);
3030
3031        // Phase 26b: Collusion Ring Generation (after red flags)
3032        let collusion_rings = self.phase_collusion_rings(&mut stats)?;
3033
3034        // Emit collusion rings to stream sink
3035        self.emit_phase_items("collusion_rings", "CollusionRing", &collusion_rings);
3036
3037        // Phase 8d: W8.1 — TB drift-correction pass.  When a TB anchor prior is
3038        // loaded (industry bundle with real per-account targets), emit balanced
3039        // "SA" adjustment JEs to nudge the synthetic balance sheet toward the
3040        // corpus-median shape before final balance validation runs.
3041        self.phase_tb_drift_correction(&mut entries)?;
3042
3043        // Phase 9: Balance Validation (after all JEs including payroll, manufacturing, IC)
3044        let balance_validation = self.phase_balance_validation(&entries)?;
3045
3046        // Phase 9a: COA coverage — every gl_account in JEs must exist in the
3047        // chart of accounts. Soft warning by default; hard fail when the
3048        // user passes --validate-coa-coverage / sets the strict flag.
3049        self.validate_coa_coverage(&entries, coa.as_ref())?;
3050
3051        // Phase 9b: GL-to-Subledger Reconciliation
3052        let subledger_reconciliation =
3053            self.phase_subledger_reconciliation(&subledger, &entries, &mut stats)?;
3054
3055        // Phase 10: Data Quality Injection
3056        let (data_quality_stats, quality_issues) =
3057            self.phase_data_quality_injection(&mut entries, &actions, &mut stats)?;
3058
3059        // Phase 10b: Period Close (tax provision + income statement closing entries + depreciation)
3060        self.phase_period_close(&mut entries, &subledger, &mut stats)?;
3061
3062        // Phase 10c: Hard accounting equation assertions (v2.5 — generation-time integrity)
3063        {
3064            let tolerance = rust_decimal::Decimal::new(1, 2); // 0.01
3065
3066            // Assert 1: Every non-anomaly JE must individually balance (debits = credits).
3067            // Anomaly-injected JEs are excluded since they are intentionally unbalanced (fraud).
3068            let mut unbalanced_clean = 0usize;
3069            for je in &entries {
3070                if je.header.is_fraud || je.header.is_anomaly {
3071                    continue;
3072                }
3073                let diff = (je.total_debit() - je.total_credit()).abs();
3074                if diff > tolerance {
3075                    unbalanced_clean += 1;
3076                    if unbalanced_clean <= 3 {
3077                        warn!(
3078                            "Unbalanced non-anomaly JE {}: debit={}, credit={}, diff={}",
3079                            je.header.document_id,
3080                            je.total_debit(),
3081                            je.total_credit(),
3082                            diff
3083                        );
3084                    }
3085                }
3086            }
3087            if unbalanced_clean > 0 {
3088                return Err(datasynth_core::error::SynthError::generation(format!(
3089                    "{} non-anomaly JEs are unbalanced (debits != credits). \
3090                     First few logged above. Tolerance={}",
3091                    unbalanced_clean, tolerance
3092                )));
3093            }
3094            debug!(
3095                "Phase 10c: All {} non-anomaly JEs individually balanced",
3096                entries
3097                    .iter()
3098                    .filter(|je| !je.header.is_fraud && !je.header.is_anomaly)
3099                    .count()
3100            );
3101
3102            // Assert 2: Balance sheet equation per company: Assets = Liabilities + Equity
3103            let company_codes: Vec<String> = self
3104                .config
3105                .companies
3106                .iter()
3107                .map(|c| c.code.clone())
3108                .collect();
3109            for company_code in &company_codes {
3110                let mut assets = rust_decimal::Decimal::ZERO;
3111                let mut liab_equity = rust_decimal::Decimal::ZERO;
3112
3113                for entry in &entries {
3114                    if entry.header.company_code != *company_code {
3115                        continue;
3116                    }
3117                    for line in &entry.lines {
3118                        let acct = &line.gl_account;
3119                        let net = line.debit_amount - line.credit_amount;
3120                        // Asset accounts (1xxx): normal debit balance
3121                        if acct.starts_with('1') {
3122                            assets += net;
3123                        }
3124                        // Liability (2xxx) + Equity (3xxx): normal credit balance
3125                        else if acct.starts_with('2') || acct.starts_with('3') {
3126                            liab_equity -= net; // credit-normal, so negate debit-net
3127                        }
3128                        // Revenue/expense/tax (4-8xxx) are closed to RE in period-close,
3129                        // so they net to zero after closing entries
3130                    }
3131                }
3132
3133                let bs_diff = (assets - liab_equity).abs();
3134                if bs_diff > tolerance {
3135                    warn!(
3136                        "Balance sheet equation gap for {}: A={}, L+E={}, diff={} — \
3137                         revenue/expense closing entries may not fully offset",
3138                        company_code, assets, liab_equity, bs_diff
3139                    );
3140                    // Warn rather than error: multi-period datasets may have timing
3141                    // differences from accruals/deferrals that resolve in later periods.
3142                    // The TB footing check (Assert 1) is the hard gate.
3143                } else {
3144                    debug!(
3145                        "Phase 10c: Balance sheet validated for {} — A={}, L+E={} (diff={})",
3146                        company_code, assets, liab_equity, bs_diff
3147                    );
3148                }
3149            }
3150
3151            info!("Phase 10c: All generation-time accounting assertions passed");
3152        }
3153
3154        // Phase 11: Audit Data
3155        let audit = self.phase_audit_data(&entries, &mut stats)?;
3156
3157        // Phase 12: Banking KYC/AML Data
3158        let mut banking = self.phase_banking_data(&mut stats)?;
3159
3160        // Phase 12.5: Bridge document-flow Payments → BankTransactions
3161        // Creates coherence between the accounting layer (payments, JEs) and the
3162        // banking layer (bank transactions). A vendor invoice payment now appears
3163        // on both sides with cross-references and fraud labels propagated.
3164        if self.phase_config.generate_banking
3165            && !document_flows.payments.is_empty()
3166            && !banking.accounts.is_empty()
3167        {
3168            let bridge_rate = self.config.banking.typologies.payment_bridge_rate;
3169            if bridge_rate > 0.0 {
3170                let mut bridge =
3171                    datasynth_banking::generators::payment_bridge::PaymentBridgeGenerator::new(
3172                        self.seed,
3173                    );
3174                let (bridged_txns, bridge_stats) = bridge.bridge_payments(
3175                    &document_flows.payments,
3176                    &banking.customers,
3177                    &banking.accounts,
3178                    bridge_rate,
3179                );
3180                info!(
3181                    "Payment bridge: {} payments bridged, {} bank txns emitted, {} fraud propagated",
3182                    bridge_stats.bridged_count,
3183                    bridge_stats.transactions_emitted,
3184                    bridge_stats.fraud_propagated,
3185                );
3186                let bridged_count = bridged_txns.len();
3187                banking.transactions.extend(bridged_txns);
3188
3189                // Re-run velocity computation so bridged txns also get features
3190                // (otherwise ML pipelines see a split: native=with-velocity, bridged=without)
3191                if self.config.banking.temporal.enable_velocity_features && bridged_count > 0 {
3192                    datasynth_banking::generators::velocity_computer::compute_velocity_features(
3193                        &mut banking.transactions,
3194                    );
3195                }
3196
3197                // Recompute suspicious count after bridging
3198                banking.suspicious_count = banking
3199                    .transactions
3200                    .iter()
3201                    .filter(|t| t.is_suspicious)
3202                    .count();
3203                stats.banking_transaction_count = banking.transactions.len();
3204                stats.banking_suspicious_count = banking.suspicious_count;
3205            }
3206        }
3207
3208        // Phase 13: Graph Export
3209        let graph_export = self.phase_graph_export(&entries, &coa, &mut stats)?;
3210
3211        // Phase 14: LLM Enrichment
3212        self.phase_llm_enrichment(&mut stats);
3213
3214        // Phase 15: Diffusion Enhancement
3215        self.phase_diffusion_enhancement(&entries, &mut stats);
3216
3217        // Phase 16: Causal Overlay
3218        self.phase_causal_overlay(&mut stats);
3219
3220        // Phase 17: Bank Reconciliation + Financial Statements
3221        // Notes generation is deferred to after Phase 18 + 20 so that deferred-tax and
3222        // provision data (from accounting_standards / tax snapshots) can be wired in.
3223        let mut financial_reporting = self.phase_financial_reporting(
3224            &document_flows,
3225            &entries,
3226            &coa,
3227            &hr,
3228            &audit,
3229            &mut stats,
3230        )?;
3231
3232        // BS coherence check: assets = liabilities + equity
3233        {
3234            use datasynth_core::models::StatementType;
3235            for stmt in &financial_reporting.consolidated_statements {
3236                if stmt.statement_type == StatementType::BalanceSheet {
3237                    let total_assets: rust_decimal::Decimal = stmt
3238                        .line_items
3239                        .iter()
3240                        .filter(|li| li.section.to_uppercase().contains("ASSET"))
3241                        .map(|li| li.amount)
3242                        .sum();
3243                    let total_le: rust_decimal::Decimal = stmt
3244                        .line_items
3245                        .iter()
3246                        .filter(|li| !li.section.to_uppercase().contains("ASSET"))
3247                        .map(|li| li.amount)
3248                        .sum();
3249                    if (total_assets - total_le).abs() > rust_decimal::Decimal::new(1, 0) {
3250                        warn!(
3251                            "BS equation imbalance: assets={}, L+E={}",
3252                            total_assets, total_le
3253                        );
3254                    }
3255                }
3256            }
3257        }
3258
3259        // Phase 18: Accounting Standards (Revenue Recognition, Impairment, ECL)
3260        let accounting_standards =
3261            self.phase_accounting_standards(&subledger.ar_aging_reports, &entries, &mut stats)?;
3262
3263        // Phase 18a: Merge ECL journal entries into main GL
3264        if !accounting_standards.ecl_journal_entries.is_empty() {
3265            debug!(
3266                "Generated {} JEs from ECL provision (IFRS 9 / ASC 326)",
3267                accounting_standards.ecl_journal_entries.len()
3268            );
3269            entries.extend(accounting_standards.ecl_journal_entries.iter().cloned());
3270        }
3271
3272        // Phase 18a: Merge provision journal entries into main GL
3273        if !accounting_standards.provision_journal_entries.is_empty() {
3274            debug!(
3275                "Generated {} JEs from provisions (IAS 37 / ASC 450)",
3276                accounting_standards.provision_journal_entries.len()
3277            );
3278            entries.extend(
3279                accounting_standards
3280                    .provision_journal_entries
3281                    .iter()
3282                    .cloned(),
3283            );
3284        }
3285
3286        // Phase 18b: OCPM Events (after all process data is available)
3287        let mut ocpm = self.phase_ocpm_events(
3288            &document_flows,
3289            &sourcing,
3290            &hr,
3291            &manufacturing_snap,
3292            &banking,
3293            &audit,
3294            &financial_reporting,
3295            &mut stats,
3296        )?;
3297
3298        // Emit OCPM events to stream sink
3299        if let Some(ref event_log) = ocpm.event_log {
3300            self.emit_phase_items("ocpm", "OcpmEvent", &event_log.events);
3301        }
3302
3303        // Phase 18c: Back-annotate OCPM event IDs onto JournalEntry headers (fixes #117)
3304        if let Some(ref event_log) = ocpm.event_log {
3305            // Build reverse index: document_ref → (event_id, case_id, object_ids)
3306            let mut doc_index: std::collections::HashMap<&str, Vec<usize>> =
3307                std::collections::HashMap::new();
3308            for (idx, event) in event_log.events.iter().enumerate() {
3309                if let Some(ref doc_ref) = event.document_ref {
3310                    doc_index.entry(doc_ref.as_str()).or_default().push(idx);
3311                }
3312            }
3313
3314            if !doc_index.is_empty() {
3315                let mut annotated = 0usize;
3316                for entry in &mut entries {
3317                    let doc_id_str = entry.header.document_id.to_string();
3318                    // Collect matching event indices from document_id and reference
3319                    let mut matched_indices: Vec<usize> = Vec::new();
3320                    if let Some(indices) = doc_index.get(doc_id_str.as_str()) {
3321                        matched_indices.extend(indices);
3322                    }
3323                    if let Some(ref reference) = entry.header.reference {
3324                        let bare_ref = reference
3325                            .find(':')
3326                            .map(|i| &reference[i + 1..])
3327                            .unwrap_or(reference.as_str());
3328                        if let Some(indices) = doc_index.get(bare_ref) {
3329                            for &idx in indices {
3330                                if !matched_indices.contains(&idx) {
3331                                    matched_indices.push(idx);
3332                                }
3333                            }
3334                        }
3335                    }
3336                    // Apply matches to JE header
3337                    if !matched_indices.is_empty() {
3338                        for &idx in &matched_indices {
3339                            let event = &event_log.events[idx];
3340                            if !entry.header.ocpm_event_ids.contains(&event.event_id) {
3341                                entry.header.ocpm_event_ids.push(event.event_id);
3342                            }
3343                            for obj_ref in &event.object_refs {
3344                                if !entry.header.ocpm_object_ids.contains(&obj_ref.object_id) {
3345                                    entry.header.ocpm_object_ids.push(obj_ref.object_id);
3346                                }
3347                            }
3348                            if entry.header.ocpm_case_id.is_none() {
3349                                entry.header.ocpm_case_id = event.case_id;
3350                            }
3351                        }
3352                        annotated += 1;
3353                    }
3354                }
3355                debug!(
3356                    "Phase 18c: Back-annotated {} JEs with OCPM event/object/case IDs",
3357                    annotated
3358                );
3359            }
3360        }
3361
3362        // Phase 18d: Synthesize OCPM events for orphan JEs (period-close,
3363        // IC eliminations, opening balances, standards-driven entries) so
3364        // every JournalEntry carries at least one `ocpm_event_ids` link.
3365        if let Some(ref mut event_log) = ocpm.event_log {
3366            let synthesized =
3367                datasynth_ocpm::synthesize_events_for_orphan_entries(&mut entries, event_log);
3368            if synthesized > 0 {
3369                info!(
3370                    "Phase 18d: Synthesized {synthesized} OCPM events for orphan journal entries"
3371                );
3372            }
3373
3374            // Phase 18e: Mirror JE anomaly / fraud flags onto the linked OCEL
3375            // events and their owning CaseTrace. Without this, every exported
3376            // OCEL event has `is_anomaly = false` even when the underlying JE
3377            // was flagged.
3378            let anomaly_events =
3379                datasynth_ocpm::propagate_je_anomalies_to_ocel(&entries, event_log);
3380            if anomaly_events > 0 {
3381                info!("Phase 18e: Propagated anomaly flags onto {anomaly_events} OCEL events");
3382            }
3383
3384            // Phase 18f: Inject process-variant imperfections (rework, skipped
3385            // steps, out-of-order events) so conformance checkers see
3386            // realistic variant counts and fitness < 1.0. Uses the P2P
3387            // process rates as the single source of truth.
3388            let p2p_cfg = &self.config.ocpm.p2p_process;
3389            let any_imperfection = p2p_cfg.rework_probability > 0.0
3390                || p2p_cfg.skip_step_probability > 0.0
3391                || p2p_cfg.out_of_order_probability > 0.0;
3392            if any_imperfection {
3393                use rand_chacha::rand_core::SeedableRng;
3394                let imp_cfg = datasynth_ocpm::ImperfectionConfig {
3395                    rework_rate: p2p_cfg.rework_probability,
3396                    skip_rate: p2p_cfg.skip_step_probability,
3397                    out_of_order_rate: p2p_cfg.out_of_order_probability,
3398                };
3399                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 5200);
3400                let stats =
3401                    datasynth_ocpm::inject_process_imperfections(event_log, &imp_cfg, &mut rng);
3402                if stats.rework + stats.skipped + stats.out_of_order > 0 {
3403                    info!(
3404                        "Phase 18f: Injected process imperfections — rework={} skipped={} out_of_order={}",
3405                        stats.rework, stats.skipped, stats.out_of_order
3406                    );
3407                }
3408            }
3409        }
3410
3411        // Phase 19: Sales Quotes, Management KPIs, Budgets
3412        let sales_kpi_budgets =
3413            self.phase_sales_kpi_budgets(&coa, &financial_reporting, &entries, &mut stats)?;
3414
3415        // Phase 22: Treasury Data Generation
3416        // Must run BEFORE tax so that interest expense (7100) and hedge ineffectiveness (7510)
3417        // are included in the pre-tax income used by phase_tax_generation.
3418        let treasury =
3419            self.phase_treasury_data(&document_flows, &subledger, &intercompany, &mut stats)?;
3420
3421        // Phase 22 JEs: Merge treasury journal entries into main GL (before tax phase)
3422        if !treasury.journal_entries.is_empty() {
3423            debug!(
3424                "Merging {} treasury JEs (debt interest, hedge MTM, sweeps) into GL",
3425                treasury.journal_entries.len()
3426            );
3427            entries.extend(treasury.journal_entries.iter().cloned());
3428        }
3429
3430        // Phase 20: Tax Generation
3431        let tax = self.phase_tax_generation(&document_flows, &entries, &mut stats)?;
3432
3433        // Phase 20 JEs: Merge tax posting journal entries into main GL
3434        if !tax.tax_posting_journal_entries.is_empty() {
3435            debug!(
3436                "Merging {} tax posting JEs into GL",
3437                tax.tax_posting_journal_entries.len()
3438            );
3439            entries.extend(tax.tax_posting_journal_entries.iter().cloned());
3440        }
3441
3442        // Phase 20b: FINAL fraud behavioral bias sweep.
3443        //
3444        // Many phases AFTER Phase 8b (ECL / provisions / treasury / tax /
3445        // period close) extend `entries` with new journal entries that may
3446        // carry `is_fraud = true` (e.g. tax-provision entries derived from
3447        // already-fraudulent transactions). Those late additions miss the
3448        // Phase 8b sweep and ship without bias applied — which is exactly
3449        // why SDK-team production jobs kept reporting `off_hours 0× lift`
3450        // even after v3.1.1 closed the per-phase gap for early-added JEs.
3451        //
3452        // Running the sweep one more time here guarantees every is_fraud
3453        // entry — regardless of which phase added it — has bias applied.
3454        // `!is_anomaly` gates out anomaly-injector entries (which already
3455        // got biased inline); the sweep is otherwise idempotent-ish:
3456        // weekend / off_hours re-fire to another valid weekend / off-hour,
3457        // post_close is guarded by `!is_post_close`, and round-dollar
3458        // rescaling on an already-round amount is a no-op (ratio = 1).
3459        {
3460            use datasynth_core::fraud_bias::apply_fraud_behavioral_bias;
3461            use rand_chacha::rand_core::SeedableRng;
3462            let cfg = self.config.fraud.effective_bias().to_core();
3463            if cfg.enabled {
3464                let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 8200);
3465                let mut swept = 0usize;
3466                for entry in entries.iter_mut() {
3467                    if entry.header.is_fraud && !entry.header.is_anomaly {
3468                        apply_fraud_behavioral_bias(entry, &cfg, &mut rng);
3469                        swept += 1;
3470                    }
3471                }
3472                if swept > 0 {
3473                    info!(
3474                        "Phase 20b: final behavioral-bias sweep applied to {swept} \
3475                         non-anomaly fraud entries (covers late-added JEs from \
3476                         ECL / provisions / treasury / tax / period-close)"
3477                    );
3478                }
3479            }
3480        }
3481
3482        // Phase 20a-cf: Enhanced Cash Flow (v2.4)
3483        // Build supplementary cash flow items from upstream JE data (depreciation,
3484        // interest, tax, dividends, working-capital deltas) and merge into CF statements.
3485        {
3486            use datasynth_generators::{CashFlowEnhancer, CashFlowSourceData};
3487
3488            let framework_str = {
3489                use datasynth_config::schema::AccountingFrameworkConfig;
3490                match self
3491                    .config
3492                    .accounting_standards
3493                    .framework
3494                    .unwrap_or_default()
3495                {
3496                    AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
3497                        "IFRS"
3498                    }
3499                    _ => "US_GAAP",
3500                }
3501            };
3502
3503            // Sum depreciation debits (account 6000) from close JEs
3504            let depreciation_total: rust_decimal::Decimal = entries
3505                .iter()
3506                .filter(|je| je.header.document_type == "CL")
3507                .flat_map(|je| je.lines.iter())
3508                .filter(|l| l.gl_account.starts_with("6000"))
3509                .map(|l| l.debit_amount)
3510                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3511
3512            // Sum interest expense debits (account 7100)
3513            let interest_paid: rust_decimal::Decimal = entries
3514                .iter()
3515                .flat_map(|je| je.lines.iter())
3516                .filter(|l| l.gl_account.starts_with("7100"))
3517                .map(|l| l.debit_amount)
3518                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3519
3520            // Sum tax expense debits (account 8000)
3521            let tax_paid: rust_decimal::Decimal = entries
3522                .iter()
3523                .flat_map(|je| je.lines.iter())
3524                .filter(|l| l.gl_account.starts_with("8000"))
3525                .map(|l| l.debit_amount)
3526                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3527
3528            // Sum capex debits on fixed assets (account 1500)
3529            let capex: rust_decimal::Decimal = entries
3530                .iter()
3531                .flat_map(|je| je.lines.iter())
3532                .filter(|l| l.gl_account.starts_with("1500"))
3533                .map(|l| l.debit_amount)
3534                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3535
3536            // Dividends paid: sum debits on dividends payable (account 2170) from payment JEs
3537            let dividends_paid: rust_decimal::Decimal = entries
3538                .iter()
3539                .flat_map(|je| je.lines.iter())
3540                .filter(|l| l.gl_account == "2170")
3541                .map(|l| l.debit_amount)
3542                .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
3543
3544            let cf_data = CashFlowSourceData {
3545                depreciation_total,
3546                provision_movements_net: rust_decimal::Decimal::ZERO, // best-effort: zero
3547                delta_ar: rust_decimal::Decimal::ZERO,
3548                delta_ap: rust_decimal::Decimal::ZERO,
3549                delta_inventory: rust_decimal::Decimal::ZERO,
3550                capex,
3551                debt_issuance: rust_decimal::Decimal::ZERO,
3552                debt_repayment: rust_decimal::Decimal::ZERO,
3553                interest_paid,
3554                tax_paid,
3555                dividends_paid,
3556                framework: framework_str.to_string(),
3557            };
3558
3559            let enhanced_cf_items = CashFlowEnhancer::generate(&cf_data);
3560            if !enhanced_cf_items.is_empty() {
3561                // Merge into ALL cash flow statements (standalone + consolidated)
3562                use datasynth_core::models::StatementType;
3563                let merge_count = enhanced_cf_items.len();
3564                for stmt in financial_reporting
3565                    .financial_statements
3566                    .iter_mut()
3567                    .chain(financial_reporting.consolidated_statements.iter_mut())
3568                    .chain(
3569                        financial_reporting
3570                            .standalone_statements
3571                            .values_mut()
3572                            .flat_map(|v| v.iter_mut()),
3573                    )
3574                {
3575                    if stmt.statement_type == StatementType::CashFlowStatement {
3576                        stmt.cash_flow_items.extend(enhanced_cf_items.clone());
3577                    }
3578                }
3579                info!(
3580                    "Enhanced cash flow: {} supplementary items merged into CF statements",
3581                    merge_count
3582                );
3583            }
3584        }
3585
3586        // Phase 20a: Notes to Financial Statements (IAS 1 / ASC 235)
3587        // Runs here so deferred-tax (Phase 20) and provision data (Phase 18) are available.
3588        self.generate_notes_to_financial_statements(
3589            &mut financial_reporting,
3590            &accounting_standards,
3591            &tax,
3592            &hr,
3593            &audit,
3594            &treasury,
3595        );
3596
3597        // Phase 20b: Supplement segment reports from real JEs (v2.4)
3598        // When we have 2+ companies, derive segment data from actual journal entries
3599        // to complement or replace the FS-generator-based segments.
3600        if self.config.companies.len() >= 2 && !entries.is_empty() {
3601            let companies: Vec<(String, String)> = self
3602                .config
3603                .companies
3604                .iter()
3605                .map(|c| (c.code.clone(), c.name.clone()))
3606                .collect();
3607            let ic_elim: rust_decimal::Decimal =
3608                intercompany.matched_pairs.iter().map(|p| p.amount).sum();
3609            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
3610                .unwrap_or(NaiveDate::MIN);
3611            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
3612            let period_label = format!(
3613                "{}-{:02}",
3614                end_date.year(),
3615                (end_date - chrono::Days::new(1)).month()
3616            );
3617
3618            let mut seg_gen = SegmentGenerator::new(self.seed + 31);
3619            let (je_segments, je_recon) =
3620                seg_gen.generate_from_journal_entries(&entries, &companies, &period_label, ic_elim);
3621            if !je_segments.is_empty() {
3622                info!(
3623                    "Segment reports (v2.4): {} JE-derived segments with IC elimination {}",
3624                    je_segments.len(),
3625                    ic_elim,
3626                );
3627                // Replace if existing segment_reports were empty; otherwise supplement
3628                if financial_reporting.segment_reports.is_empty() {
3629                    financial_reporting.segment_reports = je_segments;
3630                    financial_reporting.segment_reconciliations = vec![je_recon];
3631                } else {
3632                    financial_reporting.segment_reports.extend(je_segments);
3633                    financial_reporting.segment_reconciliations.push(je_recon);
3634                }
3635            }
3636        }
3637
3638        // Phase 21: ESG Data Generation
3639        let esg_snap =
3640            self.phase_esg_generation(&document_flows, &manufacturing_snap, &mut stats)?;
3641
3642        // Phase 23: Project Accounting Data Generation
3643        let project_accounting = self.phase_project_accounting(&document_flows, &hr, &mut stats)?;
3644
3645        // Phase 24: Process Evolution + Organizational Events
3646        let (process_evolution, organizational_events) = self.phase_evolution_events(&mut stats)?;
3647
3648        // Phase 24b: Disruption Events
3649        let disruption_events = self.phase_disruption_events(&mut stats)?;
3650
3651        // Phase 27: Bi-Temporal Vendor Version Chains
3652        let temporal_vendor_chains = self.phase_temporal_attributes(&mut stats)?;
3653
3654        // Phase 28: Entity Relationship Graph + Cross-Process Links
3655        let (entity_relationship_graph, cross_process_links) =
3656            self.phase_entity_relationships(&entries, &document_flows, &mut stats)?;
3657
3658        // Phase 29: Industry-specific GL accounts
3659        let industry_output = self.phase_industry_data(&mut stats);
3660
3661        // Phase: Compliance regulations (must run before hypergraph so it can be included)
3662        let compliance_regulations = self.phase_compliance_regulations(&mut stats)?;
3663
3664        // Phase: Neural enhancement (config-acknowledged-only in v4.0).
3665        //
3666        // The neural / hybrid diffusion path was a documented L2 stub
3667        // in v3.x; actual neural-network training requires ML
3668        // infrastructure (PyTorch / candle bindings, GPU access,
3669        // training loops) that was never wired through the
3670        // orchestrator. Rather than keep a silently-no-op block that
3671        // misleads users into thinking neural training happens, v4.0
3672        // acknowledges the config — exposing stats so downstream
3673        // tooling can see the request — but emits a clear warning
3674        // when a non-statistical backend is requested. The statistical
3675        // diffusion backend continues to run via
3676        // `phase_diffusion_enhancement`.
3677        //
3678        // Users who need real neural diffusion: track the roadmap item
3679        // in the v4.x backlog and consider contributing the backend
3680        // (the `DiffusionBackend` trait is the integration point).
3681        if self.config.diffusion.enabled
3682            && (self.config.diffusion.backend == "neural"
3683                || self.config.diffusion.backend == "hybrid")
3684        {
3685            let neural = &self.config.diffusion.neural;
3686            let weight = neural.hybrid_weight.clamp(0.0, 1.0);
3687            stats.neural_hybrid_weight = Some(weight);
3688            stats.neural_hybrid_strategy = Some(neural.hybrid_strategy.clone());
3689            stats.neural_routed_column_count = Some(neural.neural_columns.len());
3690            warn!(
3691                "diffusion.backend='{}' is config-acknowledged only in v4.0 — \
3692                 the neural/hybrid training path is not yet shipped. Config \
3693                 is captured in stats (weight={weight:.2}, strategy={}, \
3694                 columns={}) but no neural training runs. Statistical \
3695                 diffusion (backend='statistical') continues to work.",
3696                self.config.diffusion.backend,
3697                neural.hybrid_strategy,
3698                neural.neural_columns.len(),
3699            );
3700        }
3701
3702        // Phase 19b: Hypergraph Export (after all data is available)
3703        self.phase_hypergraph_export(
3704            &coa,
3705            &entries,
3706            &document_flows,
3707            &sourcing,
3708            &hr,
3709            &manufacturing_snap,
3710            &banking,
3711            &audit,
3712            &financial_reporting,
3713            &ocpm,
3714            &compliance_regulations,
3715            &mut stats,
3716        )?;
3717
3718        // Phase 10c: Additional graph builders (approval, entity, banking)
3719        // These run after all data is available since they need banking/IC data.
3720        if self.phase_config.generate_graph_export {
3721            self.build_additional_graphs(&banking, &intercompany, &entries, &mut stats);
3722        }
3723
3724        // Log informational messages for config sections not yet fully wired
3725        if self.config.streaming.enabled {
3726            info!("Note: streaming config is enabled but batch mode does not use it");
3727        }
3728        if self.config.vendor_network.enabled {
3729            debug!("Vendor network config available; relationship graph generation is partial");
3730        }
3731        if self.config.customer_segmentation.enabled {
3732            debug!("Customer segmentation config available; segment-aware generation is partial");
3733        }
3734
3735        // Log final resource statistics
3736        let resource_stats = self.resource_guard.stats();
3737        info!(
3738            "Generation workflow complete. Resource stats: memory_peak={}MB, disk_written={}bytes, degradation_level={}",
3739            resource_stats.memory.peak_resident_bytes / (1024 * 1024),
3740            resource_stats.disk.estimated_bytes_written,
3741            resource_stats.degradation_level
3742        );
3743
3744        // Flush any remaining stream sink data
3745        if let Some(ref sink) = self.phase_sink {
3746            if let Err(e) = sink.flush() {
3747                warn!("Stream sink flush failed: {e}");
3748            }
3749        }
3750
3751        // Build data lineage graph
3752        let lineage = self.build_lineage_graph();
3753
3754        // Evaluate quality gates if enabled in config
3755        let gate_result = if self.config.quality_gates.enabled {
3756            let profile_name = &self.config.quality_gates.profile;
3757            match datasynth_eval::gates::get_profile(profile_name) {
3758                Some(profile) => {
3759                    // Build an evaluation populated with actual generation metrics.
3760                    let mut eval = datasynth_eval::ComprehensiveEvaluation::new();
3761
3762                    // Populate balance sheet evaluation from balance validation results
3763                    if balance_validation.validated {
3764                        eval.coherence.balance =
3765                            Some(datasynth_eval::coherence::BalanceSheetEvaluation {
3766                                equation_balanced: balance_validation.is_balanced,
3767                                max_imbalance: (balance_validation.total_debits
3768                                    - balance_validation.total_credits)
3769                                    .abs(),
3770                                periods_evaluated: 1,
3771                                periods_imbalanced: if balance_validation.is_balanced {
3772                                    0
3773                                } else {
3774                                    1
3775                                },
3776                                period_results: Vec::new(),
3777                                companies_evaluated: self.config.companies.len(),
3778                            });
3779                    }
3780
3781                    // Set coherence passes based on balance validation
3782                    eval.coherence.passes = balance_validation.is_balanced;
3783                    if !balance_validation.is_balanced {
3784                        eval.coherence
3785                            .failures
3786                            .push("Balance sheet equation not satisfied".to_string());
3787                    }
3788
3789                    // Set statistical score based on entry count (basic sanity)
3790                    eval.statistical.overall_score = if entries.len() > 10 { 0.9 } else { 0.5 };
3791                    eval.statistical.passes = !entries.is_empty();
3792
3793                    // Set quality score from data quality stats
3794                    eval.quality.overall_score = 0.9; // Default high for generated data
3795                    eval.quality.passes = true;
3796
3797                    let result = datasynth_eval::gates::GateEngine::evaluate(&eval, &profile);
3798                    info!(
3799                        "Quality gates evaluated (profile '{}'): {}/{} passed — {}",
3800                        profile_name, result.gates_passed, result.gates_total, result.summary
3801                    );
3802                    Some(result)
3803                }
3804                None => {
3805                    warn!(
3806                        "Quality gates enabled but profile '{}' not found; skipping gate evaluation",
3807                        profile_name
3808                    );
3809                    None
3810                }
3811            }
3812        } else {
3813            None
3814        };
3815
3816        // Generate internal controls if enabled
3817        let internal_controls = if self.config.internal_controls.enabled {
3818            InternalControl::standard_controls()
3819        } else {
3820            Vec::new()
3821        };
3822
3823        // v3.3.0: analytics-metadata phase. Runs AFTER all JE-adding
3824        // phases (including fraud-bias sweep at Phase 20b) so derived
3825        // outputs reflect final data.
3826        let analytics_metadata = self.phase_analytics_metadata(&entries)?;
3827
3828        // v3.5.1: statistical validation over the final amount
3829        // distribution. Runs *after* all JE-adding phases so the report
3830        // reflects everything the user will see in the output. Returns
3831        // `None` unless `distributions.validation.enabled = true`.
3832        let statistical_validation = self.phase_statistical_validation(&entries)?;
3833
3834        // v4.1.3+: interconnectivity snapshot — tier assignments,
3835        // value-segment labels, industry-specific metadata. Runs after
3836        // master data is settled so it can index stable IDs.
3837        let interconnectivity = self.phase_interconnectivity();
3838
3839        // SP5.2 — snapshot the CoA semantic prior (if any) into the result so
3840        // output_writer can use it as a fallback index for account_description
3841        // resolution when the synthetic CoA index misses.
3842        let coa_semantic_prior = self
3843            .cached_priors
3844            .as_ref()
3845            .and_then(|p| p.coa_semantic.clone());
3846
3847        Ok(EnhancedGenerationResult {
3848            chart_of_accounts: Arc::try_unwrap(coa).unwrap_or_else(|arc| (*arc).clone()),
3849            master_data: std::mem::take(&mut self.master_data),
3850            document_flows,
3851            subledger,
3852            ocpm,
3853            audit,
3854            banking,
3855            graph_export,
3856            sourcing,
3857            financial_reporting,
3858            hr,
3859            accounting_standards,
3860            manufacturing: manufacturing_snap,
3861            sales_kpi_budgets,
3862            tax,
3863            esg: esg_snap,
3864            treasury,
3865            project_accounting,
3866            process_evolution,
3867            organizational_events,
3868            disruption_events,
3869            intercompany,
3870            journal_entries: entries,
3871            anomaly_labels,
3872            balance_validation,
3873            data_quality_stats,
3874            quality_issues,
3875            statistics: stats,
3876            lineage: Some(lineage),
3877            gate_result,
3878            internal_controls,
3879            sod_violations,
3880            opening_balances,
3881            subledger_reconciliation,
3882            counterfactual_pairs,
3883            red_flags,
3884            collusion_rings,
3885            temporal_vendor_chains,
3886            entity_relationship_graph,
3887            cross_process_links,
3888            industry_output,
3889            coa_semantic_prior,
3890            compliance_regulations,
3891            analytics_metadata,
3892            statistical_validation,
3893            interconnectivity,
3894        })
3895    }
3896
3897    /// v4.1.3+: populate the interconnectivity snapshot from
3898    /// previously-inert schema sections. Empty when all sections are
3899    /// disabled.
3900    fn phase_interconnectivity(&self) -> InterconnectivitySnapshot {
3901        use rand::{RngExt, SeedableRng};
3902        use rand_chacha::ChaCha8Rng;
3903
3904        let mut snap = InterconnectivitySnapshot::default();
3905        let mut rng = ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(91_001));
3906
3907        // --- Vendor network ---
3908        let vn = &self.config.vendor_network;
3909        if vn.enabled {
3910            let total = self.master_data.vendors.len();
3911            if total > 0 {
3912                let tier1_count = ((vn.tier1.min + vn.tier1.max) / 2).min(total).max(1);
3913                let remaining_after_t1 = total.saturating_sub(tier1_count);
3914                let depth = vn.depth.clamp(1, 3);
3915                let tier2_count = if depth >= 2 {
3916                    let avg = (vn.tier2_per_parent.min + vn.tier2_per_parent.max) / 2;
3917                    (tier1_count * avg).min(remaining_after_t1)
3918                } else {
3919                    0
3920                };
3921                let tier3_count = total
3922                    .saturating_sub(tier1_count)
3923                    .saturating_sub(tier2_count);
3924
3925                for (idx, vendor) in self.master_data.vendors.iter().enumerate() {
3926                    let tier = if idx < tier1_count {
3927                        1
3928                    } else if idx < tier1_count + tier2_count {
3929                        2
3930                    } else {
3931                        3
3932                    };
3933                    snap.vendor_tiers.push((vendor.vendor_id.clone(), tier));
3934
3935                    // Cluster assignment via configured ratios.
3936                    let cl = &vn.clusters;
3937                    let roll: f64 = rng.random();
3938                    let cluster = if roll < cl.reliable_strategic {
3939                        "reliable_strategic"
3940                    } else if roll < cl.reliable_strategic + cl.standard_operational {
3941                        "standard_operational"
3942                    } else if roll
3943                        < cl.reliable_strategic + cl.standard_operational + cl.transactional
3944                    {
3945                        "transactional"
3946                    } else {
3947                        "problematic"
3948                    };
3949                    snap.vendor_clusters
3950                        .push((vendor.vendor_id.clone(), cluster.to_string()));
3951                }
3952                let _ = tier3_count; // retained for clarity; tier 3 bucket is the remainder
3953            }
3954        }
3955
3956        // --- Customer segmentation ---
3957        let cs = &self.config.customer_segmentation;
3958        if cs.enabled {
3959            let seg = &cs.value_segments;
3960            for customer in &self.master_data.customers {
3961                let roll: f64 = rng.random();
3962                let value_segment = if roll < seg.enterprise.customer_share {
3963                    "enterprise"
3964                } else if roll < seg.enterprise.customer_share + seg.mid_market.customer_share {
3965                    "mid_market"
3966                } else if roll
3967                    < seg.enterprise.customer_share
3968                        + seg.mid_market.customer_share
3969                        + seg.smb.customer_share
3970                {
3971                    "smb"
3972                } else {
3973                    "consumer"
3974                };
3975                snap.customer_value_segments
3976                    .push((customer.customer_id.clone(), value_segment.to_string()));
3977
3978                let roll2: f64 = rng.random();
3979                let life = &cs.lifecycle;
3980                let lifecycle = if roll2 < life.prospect_rate {
3981                    "prospect"
3982                } else if roll2 < life.prospect_rate + life.new_rate {
3983                    "new"
3984                } else if roll2 < life.prospect_rate + life.new_rate + life.growth_rate {
3985                    "growth"
3986                } else if roll2
3987                    < life.prospect_rate + life.new_rate + life.growth_rate + life.mature_rate
3988                {
3989                    "mature"
3990                } else if roll2
3991                    < life.prospect_rate
3992                        + life.new_rate
3993                        + life.growth_rate
3994                        + life.mature_rate
3995                        + life.at_risk_rate
3996                {
3997                    "at_risk"
3998                } else if roll2
3999                    < life.prospect_rate
4000                        + life.new_rate
4001                        + life.growth_rate
4002                        + life.mature_rate
4003                        + life.at_risk_rate
4004                        + life.churned_rate
4005                {
4006                    "churned"
4007                } else {
4008                    "won_back"
4009                };
4010                snap.customer_lifecycle_stages
4011                    .push((customer.customer_id.clone(), lifecycle.to_string()));
4012            }
4013        }
4014
4015        // --- Industry-specific metadata (minimal) ---
4016        let is = &self.config.industry_specific;
4017        if is.enabled {
4018            snap.industry_metadata.push(format!(
4019                "industry_specific.enabled=true (industry={:?})",
4020                self.config.global.industry
4021            ));
4022        }
4023
4024        snap
4025    }
4026
4027    // ========================================================================
4028    // Generation Phase Methods
4029    // ========================================================================
4030
4031    /// Phase 1: Generate Chart of Accounts and update statistics.
4032    fn phase_chart_of_accounts(
4033        &mut self,
4034        stats: &mut EnhancedGenerationStatistics,
4035    ) -> SynthResult<Arc<ChartOfAccounts>> {
4036        info!("Phase 1: Generating Chart of Accounts");
4037        let coa = self.generate_coa()?;
4038        stats.accounts_count = coa.account_count();
4039        info!(
4040            "Chart of Accounts generated: {} accounts",
4041            stats.accounts_count
4042        );
4043        self.check_resources_with_log("post-coa")?;
4044        Ok(coa)
4045    }
4046
4047    /// Phase 2: Generate master data (vendors, customers, materials, assets, employees).
4048    fn phase_master_data(&mut self, stats: &mut EnhancedGenerationStatistics) -> SynthResult<()> {
4049        if self.phase_config.generate_master_data {
4050            info!("Phase 2: Generating Master Data");
4051            self.generate_master_data()?;
4052            stats.vendor_count = self.master_data.vendors.len();
4053            stats.customer_count = self.master_data.customers.len();
4054            stats.material_count = self.master_data.materials.len();
4055            stats.asset_count = self.master_data.assets.len();
4056            stats.employee_count = self.master_data.employees.len();
4057            info!(
4058                "Master data generated: {} vendors, {} customers, {} materials, {} assets, {} employees",
4059                stats.vendor_count, stats.customer_count, stats.material_count,
4060                stats.asset_count, stats.employee_count
4061            );
4062            self.check_resources_with_log("post-master-data")?;
4063        } else {
4064            debug!("Phase 2: Skipped (master data generation disabled)");
4065        }
4066        Ok(())
4067    }
4068
4069    /// Phase 3: Generate document flows (P2P and O2C) and link to subledgers.
4070    fn phase_document_flows(
4071        &mut self,
4072        stats: &mut EnhancedGenerationStatistics,
4073    ) -> SynthResult<(DocumentFlowSnapshot, SubledgerSnapshot, Vec<JournalEntry>)> {
4074        let mut document_flows = DocumentFlowSnapshot::default();
4075        let mut subledger = SubledgerSnapshot::default();
4076        // Dunning JEs (interest + charges) accumulated here and merged into the
4077        // main FA-JE list below so they appear in the GL.
4078        let mut dunning_journal_entries: Vec<JournalEntry> = Vec::new();
4079
4080        if self.phase_config.generate_document_flows && !self.master_data.vendors.is_empty() {
4081            info!("Phase 3: Generating Document Flows");
4082            self.generate_document_flows(&mut document_flows)?;
4083            stats.p2p_chain_count = document_flows.p2p_chains.len();
4084            stats.o2c_chain_count = document_flows.o2c_chains.len();
4085            info!(
4086                "Document flows generated: {} P2P chains, {} O2C chains",
4087                stats.p2p_chain_count, stats.o2c_chain_count
4088            );
4089
4090            // Phase 3b: Link document flows to subledgers (for data coherence)
4091            debug!("Phase 3b: Linking document flows to subledgers");
4092            subledger = self.link_document_flows_to_subledgers(&document_flows)?;
4093            stats.ap_invoice_count = subledger.ap_invoices.len();
4094            stats.ar_invoice_count = subledger.ar_invoices.len();
4095            debug!(
4096                "Subledgers linked: {} AP invoices, {} AR invoices",
4097                stats.ap_invoice_count, stats.ar_invoice_count
4098            );
4099
4100            // Phase 3b-settle: Apply payment settlements to reduce amount_remaining.
4101            // Without this step the subledger is systematically overstated because
4102            // amount_remaining is set at invoice creation and never reduced by
4103            // the payments that were generated in the document-flow phase.
4104            debug!("Phase 3b-settle: Applying payment settlements to subledgers");
4105            apply_ap_settlements(&mut subledger.ap_invoices, &document_flows.payments);
4106            apply_ar_settlements(&mut subledger.ar_invoices, &document_flows.payments);
4107            debug!("Payment settlements applied to AP and AR subledgers");
4108
4109            // Phase 3b-aging: Build AR/AP aging reports (one per company) after settlement.
4110            // The as-of date is the last day of the configured period.
4111            if let Ok(start_date) =
4112                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4113            {
4114                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4115                    - chrono::Days::new(1);
4116                debug!("Phase 3b-aging: Building AR/AP aging reports as of {as_of_date}");
4117                // Note: AR aging total_ar_balance reflects subledger ARInvoice records only
4118                // (created from O2C document flows). The Balance Sheet "Receivables" figure is
4119                // derived from JE-level aggregation and will typically differ. This is a known
4120                // data model gap: subledger AR (document-flow-driven) and GL AR (JE-driven) are
4121                // generated independently. A future reconciliation phase should align them by
4122                // using subledger totals as the authoritative source for BS Receivables.
4123                for company in &self.config.companies {
4124                    let ar_report = ARAgingReport::from_invoices(
4125                        company.code.clone(),
4126                        &subledger.ar_invoices,
4127                        as_of_date,
4128                    );
4129                    subledger.ar_aging_reports.push(ar_report);
4130
4131                    let ap_report = APAgingReport::from_invoices(
4132                        company.code.clone(),
4133                        &subledger.ap_invoices,
4134                        as_of_date,
4135                    );
4136                    subledger.ap_aging_reports.push(ap_report);
4137                }
4138                debug!(
4139                    "AR/AP aging reports built: {} AR, {} AP",
4140                    subledger.ar_aging_reports.len(),
4141                    subledger.ap_aging_reports.len()
4142                );
4143
4144                // Phase 3b-dunning: Run dunning process on overdue AR invoices.
4145                debug!("Phase 3b-dunning: Executing dunning runs for overdue AR invoices");
4146                {
4147                    use datasynth_generators::DunningGenerator;
4148                    let mut dunning_gen = DunningGenerator::new(self.seed + 2500);
4149                    for company in &self.config.companies {
4150                        let currency = company.currency.as_str();
4151                        // Collect mutable references to AR invoices for this company
4152                        // (dunning generator updates dunning_info on invoices in-place).
4153                        let mut company_invoices: Vec<
4154                            datasynth_core::models::subledger::ar::ARInvoice,
4155                        > = subledger
4156                            .ar_invoices
4157                            .iter()
4158                            .filter(|inv| inv.company_code == company.code)
4159                            .cloned()
4160                            .collect();
4161
4162                        if company_invoices.is_empty() {
4163                            continue;
4164                        }
4165
4166                        let result = dunning_gen.execute_dunning_run(
4167                            &company.code,
4168                            as_of_date,
4169                            &mut company_invoices,
4170                            currency,
4171                        );
4172
4173                        // Write back updated dunning info to the main AR invoice list
4174                        for updated in &company_invoices {
4175                            if let Some(orig) = subledger
4176                                .ar_invoices
4177                                .iter_mut()
4178                                .find(|i| i.invoice_number == updated.invoice_number)
4179                            {
4180                                orig.dunning_info = updated.dunning_info.clone();
4181                            }
4182                        }
4183
4184                        subledger.dunning_runs.push(result.dunning_run);
4185                        subledger.dunning_letters.extend(result.letters);
4186                        // Dunning JEs (interest + charges) collected into local buffer.
4187                        dunning_journal_entries.extend(result.journal_entries);
4188                    }
4189                    debug!(
4190                        "Dunning runs complete: {} runs, {} letters",
4191                        subledger.dunning_runs.len(),
4192                        subledger.dunning_letters.len()
4193                    );
4194                }
4195            }
4196
4197            self.check_resources_with_log("post-document-flows")?;
4198        } else {
4199            debug!("Phase 3: Skipped (document flow generation disabled or no master data)");
4200        }
4201
4202        // Generate FA subledger records (and acquisition JEs) from master data fixed assets
4203        let mut fa_journal_entries: Vec<JournalEntry> = dunning_journal_entries;
4204        if !self.master_data.assets.is_empty() {
4205            debug!("Generating FA subledger records");
4206            let company_code = self
4207                .config
4208                .companies
4209                .first()
4210                .map(|c| c.code.as_str())
4211                .unwrap_or("1000");
4212            let currency = self
4213                .config
4214                .companies
4215                .first()
4216                .map(|c| c.currency.as_str())
4217                .unwrap_or("USD");
4218
4219            let mut fa_gen = datasynth_generators::FAGenerator::new(
4220                datasynth_generators::FAGeneratorConfig::default(),
4221                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 70),
4222            );
4223
4224            for asset in &self.master_data.assets {
4225                let (record, je) = fa_gen.generate_asset_acquisition(
4226                    company_code,
4227                    &format!("{:?}", asset.asset_class),
4228                    &asset.description,
4229                    asset.acquisition_date,
4230                    currency,
4231                    asset.cost_center.as_deref(),
4232                );
4233                subledger.fa_records.push(record);
4234                fa_journal_entries.push(je);
4235            }
4236
4237            stats.fa_subledger_count = subledger.fa_records.len();
4238            debug!(
4239                "FA subledger records generated: {} (with {} acquisition JEs)",
4240                stats.fa_subledger_count,
4241                fa_journal_entries.len()
4242            );
4243        }
4244
4245        // Generate Inventory subledger records from master data materials
4246        if !self.master_data.materials.is_empty() {
4247            debug!("Generating Inventory subledger records");
4248            let first_company = self.config.companies.first();
4249            let company_code = first_company.map(|c| c.code.as_str()).unwrap_or("1000");
4250            let inv_currency = first_company
4251                .map(|c| c.currency.clone())
4252                .unwrap_or_else(|| "USD".to_string());
4253
4254            let mut inv_gen = datasynth_generators::InventoryGenerator::new_with_currency(
4255                datasynth_generators::InventoryGeneratorConfig::default(),
4256                rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 71),
4257                inv_currency.clone(),
4258            );
4259
4260            for (i, material) in self.master_data.materials.iter().enumerate() {
4261                let plant = format!("PLANT{:02}", (i % 3) + 1);
4262                let storage_loc = format!("SL-{:03}", (i % 10) + 1);
4263                let initial_qty = rust_decimal::Decimal::from(
4264                    material
4265                        .safety_stock
4266                        .to_string()
4267                        .parse::<i64>()
4268                        .unwrap_or(100),
4269                );
4270
4271                let position = inv_gen.generate_position(
4272                    company_code,
4273                    &plant,
4274                    &storage_loc,
4275                    &material.material_id,
4276                    &material.description,
4277                    initial_qty,
4278                    Some(material.standard_cost),
4279                    &inv_currency,
4280                );
4281                subledger.inventory_positions.push(position);
4282            }
4283
4284            stats.inventory_subledger_count = subledger.inventory_positions.len();
4285            debug!(
4286                "Inventory subledger records generated: {}",
4287                stats.inventory_subledger_count
4288            );
4289        }
4290
4291        // Phase 3-depr: Run depreciation for each fiscal period covered by the config.
4292        if !subledger.fa_records.is_empty() {
4293            if let Ok(start_date) =
4294                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4295            {
4296                let company_code = self
4297                    .config
4298                    .companies
4299                    .first()
4300                    .map(|c| c.code.as_str())
4301                    .unwrap_or("1000");
4302                let fiscal_year = start_date.year();
4303                let start_period = start_date.month();
4304                let end_period =
4305                    (start_period + self.config.global.period_months.saturating_sub(1)).min(12);
4306
4307                let depr_cfg = FaDepreciationScheduleConfig {
4308                    fiscal_year,
4309                    start_period,
4310                    end_period,
4311                    seed_offset: 800,
4312                };
4313                let depr_gen = FaDepreciationScheduleGenerator::new(depr_cfg, self.seed);
4314                let runs = depr_gen.generate(company_code, &subledger.fa_records);
4315                let run_count = runs.len();
4316                subledger.depreciation_runs = runs;
4317                debug!(
4318                    "Depreciation runs generated: {} runs for {} periods",
4319                    run_count, self.config.global.period_months
4320                );
4321            }
4322        }
4323
4324        // Phase 3-inv-val: Build inventory valuation report (lower-of-cost-or-NRV).
4325        if !subledger.inventory_positions.is_empty() {
4326            if let Ok(start_date) =
4327                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4328            {
4329                let as_of_date = start_date + chrono::Months::new(self.config.global.period_months)
4330                    - chrono::Days::new(1);
4331
4332                let inv_val_cfg = InventoryValuationGeneratorConfig::default();
4333                let inv_val_gen = InventoryValuationGenerator::new(inv_val_cfg, self.seed);
4334
4335                for company in &self.config.companies {
4336                    let result = inv_val_gen.generate(
4337                        &company.code,
4338                        &subledger.inventory_positions,
4339                        as_of_date,
4340                    );
4341                    subledger.inventory_valuations.push(result);
4342                }
4343                debug!(
4344                    "Inventory valuations generated: {} company reports",
4345                    subledger.inventory_valuations.len()
4346                );
4347            }
4348        }
4349
4350        Ok((document_flows, subledger, fa_journal_entries))
4351    }
4352
4353    /// Phase 3c: Generate OCPM events from document flows.
4354    #[allow(clippy::too_many_arguments)]
4355    fn phase_ocpm_events(
4356        &mut self,
4357        document_flows: &DocumentFlowSnapshot,
4358        sourcing: &SourcingSnapshot,
4359        hr: &HrSnapshot,
4360        manufacturing: &ManufacturingSnapshot,
4361        banking: &BankingSnapshot,
4362        audit: &AuditSnapshot,
4363        financial_reporting: &FinancialReportingSnapshot,
4364        stats: &mut EnhancedGenerationStatistics,
4365    ) -> SynthResult<OcpmSnapshot> {
4366        let degradation = self.check_resources()?;
4367        if degradation >= DegradationLevel::Reduced {
4368            debug!(
4369                "Phase skipped due to resource pressure (degradation: {:?})",
4370                degradation
4371            );
4372            return Ok(OcpmSnapshot::default());
4373        }
4374        if self.phase_config.generate_ocpm_events {
4375            info!("Phase 3c: Generating OCPM Events");
4376            let ocpm_snapshot = self.generate_ocpm_events(
4377                document_flows,
4378                sourcing,
4379                hr,
4380                manufacturing,
4381                banking,
4382                audit,
4383                financial_reporting,
4384            )?;
4385            stats.ocpm_event_count = ocpm_snapshot.event_count;
4386            stats.ocpm_object_count = ocpm_snapshot.object_count;
4387            stats.ocpm_case_count = ocpm_snapshot.case_count;
4388            info!(
4389                "OCPM events generated: {} events, {} objects, {} cases",
4390                stats.ocpm_event_count, stats.ocpm_object_count, stats.ocpm_case_count
4391            );
4392            self.check_resources_with_log("post-ocpm")?;
4393            Ok(ocpm_snapshot)
4394        } else {
4395            debug!("Phase 3c: Skipped (OCPM generation disabled or no document flows)");
4396            Ok(OcpmSnapshot::default())
4397        }
4398    }
4399
4400    /// Phase 4: Generate journal entries from document flows and standalone generation.
4401    fn phase_journal_entries(
4402        &mut self,
4403        coa: &Arc<ChartOfAccounts>,
4404        document_flows: &DocumentFlowSnapshot,
4405        _stats: &mut EnhancedGenerationStatistics,
4406    ) -> SynthResult<Vec<JournalEntry>> {
4407        let mut entries = Vec::new();
4408
4409        // Phase 4a: Generate JEs from document flows (for data coherence)
4410        if self.phase_config.generate_document_flows && !document_flows.p2p_chains.is_empty() {
4411            debug!("Phase 4a: Generating JEs from document flows");
4412            let flow_entries = self.generate_jes_from_document_flows(document_flows)?;
4413            debug!("Generated {} JEs from document flows", flow_entries.len());
4414            entries.extend(flow_entries);
4415        }
4416
4417        // Phase 4b: Generate standalone journal entries
4418        if self.phase_config.generate_journal_entries {
4419            info!("Phase 4: Generating Journal Entries");
4420            let je_entries = self.generate_journal_entries(coa)?;
4421            info!("Generated {} standalone journal entries", je_entries.len());
4422            entries.extend(je_entries);
4423        } else {
4424            debug!("Phase 4: Skipped (journal entry generation disabled)");
4425        }
4426
4427        // Phase 4c (shard mode): inject pre-built IC journal entries from
4428        // `ShardContext`. When running standalone (no group engine), this
4429        // is a no-op. See crate::shard_context::ShardContext for rationale.
4430        if let Some(ctx) = &self.shard_context {
4431            if !ctx.extra_journal_entries.is_empty() {
4432                debug!(
4433                    "Phase 4c: appending {} shard-mode IC journal entries",
4434                    ctx.extra_journal_entries.len()
4435                );
4436                entries.extend(ctx.extra_journal_entries.iter().cloned());
4437            }
4438        }
4439
4440        if !entries.is_empty() {
4441            // Note: stats.total_entries/total_line_items are set in generate()
4442            // after all JE-generating phases (FA, IC, payroll, mfg) complete.
4443            self.check_resources_with_log("post-journal-entries")?;
4444        }
4445
4446        Ok(entries)
4447    }
4448
4449    /// Phase 5: Inject anomalies into journal entries.
4450    fn phase_anomaly_injection(
4451        &mut self,
4452        entries: &mut [JournalEntry],
4453        actions: &DegradationActions,
4454        stats: &mut EnhancedGenerationStatistics,
4455    ) -> SynthResult<AnomalyLabels> {
4456        if self.phase_config.inject_anomalies
4457            && !entries.is_empty()
4458            && !actions.skip_anomaly_injection
4459        {
4460            info!("Phase 5: Injecting Anomalies");
4461            let result = self.inject_anomalies(entries)?;
4462            stats.anomalies_injected = result.labels.len();
4463            info!("Injected {} anomalies", stats.anomalies_injected);
4464            self.check_resources_with_log("post-anomaly-injection")?;
4465            Ok(result)
4466        } else if actions.skip_anomaly_injection {
4467            warn!("Phase 5: Skipped due to resource degradation");
4468            Ok(AnomalyLabels::default())
4469        } else {
4470            debug!("Phase 5: Skipped (anomaly injection disabled or no entries)");
4471            Ok(AnomalyLabels::default())
4472        }
4473    }
4474
4475    /// Phase 8d (W8.1): TB drift-correction pass.
4476    ///
4477    /// Builds a `RunningBalanceTracker` over all JEs assembled so far, attaches
4478    /// the TB anchor prior (when available), and — if `drift_correction_needed()`
4479    /// fires for any company — emits one balanced "SA" adjustment JE per company
4480    /// to pull the synthetic balances toward the corpus-median targets.
4481    ///
4482    /// No-op when no TB anchor is loaded (backwards-compatible).
4483    fn phase_tb_drift_correction(&mut self, entries: &mut Vec<JournalEntry>) -> SynthResult<()> {
4484        // Only proceed when priors with a TB anchor are loaded.
4485        let tb_anchor = match &self.cached_priors {
4486            Some(priors) => match &priors.tb_anchor {
4487                Some(anchor) => anchor.clone(),
4488                None => return Ok(()),
4489            },
4490            None => return Ok(()),
4491        };
4492
4493        if !tb_anchor.has_data() {
4494            return Ok(());
4495        }
4496
4497        tracing::info!(
4498            target: "datasynth_runtime::tb_anchor",
4499            accounts = tb_anchor.per_account.len(),
4500            total_assets = tb_anchor.total_assets,
4501            "W8.1 — TB anchor loaded; running drift-correction pass"
4502        );
4503
4504        // Build a tracker over all current JEs.
4505        let tracker_config = BalanceTrackerConfig {
4506            validate_on_each_entry: false,
4507            track_history: false,
4508            fail_on_validation_error: false,
4509            ..Default::default()
4510        };
4511        let currency = self
4512            .config
4513            .companies
4514            .first()
4515            .map(|c| c.currency.clone())
4516            .unwrap_or_else(|| "USD".to_string());
4517
4518        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, currency);
4519        tracker.set_tb_anchor(tb_anchor.clone());
4520        let _ = tracker.apply_entries(entries);
4521
4522        // SP5.1 — Diagnostic: log the number of accounts being tracked vs in the
4523        // anchor, plus the top-5 most-drifted accounts for each company so we
4524        // can distinguish "no drift" from "drift below threshold" at a glance.
4525        for company in &self.config.companies {
4526            let code = &company.code;
4527            let drifts = tracker.account_drift(code);
4528            let mut sorted_drifts = drifts.clone();
4529            sorted_drifts.sort_by(|a, b| {
4530                b.1.abs()
4531                    .partial_cmp(&a.1.abs())
4532                    .unwrap_or(std::cmp::Ordering::Equal)
4533            });
4534            let aggregate_drift: f64 = drifts.iter().map(|(_, d)| d.abs()).sum();
4535            let correction_needed = tracker.drift_correction_needed(code);
4536            tracing::info!(
4537                target: "datasynth_runtime::tb_anchor",
4538                company = %code,
4539                anchor_accounts = tb_anchor.per_account.len(),
4540                tracked_accounts = drifts.len(),
4541                aggregate_drift = aggregate_drift,
4542                correction_needed = correction_needed,
4543                "W8.1 SP5.1 — per-company drift summary before correction"
4544            );
4545            for (acc, drift) in sorted_drifts.iter().take(5) {
4546                tracing::info!(
4547                    target: "datasynth_runtime::tb_anchor",
4548                    company = %code,
4549                    account = %acc,
4550                    drift = drift,
4551                    "W8.1 SP5.1 — top-5 drifted accounts"
4552                );
4553            }
4554        }
4555
4556        // Derive the posting date: use the last day of the simulation period.
4557        let period_end = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4558            .map(|d| d + chrono::Months::new(self.config.global.period_months))
4559            .unwrap_or_else(|_| chrono::Utc::now().naive_utc().date());
4560
4561        // Distinct seed offset so drift-correction draws are independent of other phases.
4562        use rand_chacha::rand_core::SeedableRng as _;
4563        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(0xD81F_C0F3));
4564
4565        let mut correction_count = 0usize;
4566        for company in &self.config.companies {
4567            let code = &company.code;
4568            if !tracker.drift_correction_needed(code) {
4569                tracing::debug!(
4570                    target: "datasynth_runtime::tb_anchor",
4571                    company = %code,
4572                    "W8.1 — drift_correction_needed returned false; skipping company"
4573                );
4574                continue;
4575            }
4576            if let Some(je) = tracker.build_drift_correction_je(code, period_end, &mut rng) {
4577                tracing::debug!(
4578                    target: "datasynth_runtime::tb_anchor",
4579                    company = %code,
4580                    lines = je.lines.len(),
4581                    debit = %je.total_debit(),
4582                    credit = %je.total_credit(),
4583                    "W8.1 — emitting drift-correction JE"
4584                );
4585                // Apply the correction to the tracker so the running state is current.
4586                let _ = tracker.apply_entry(&je);
4587                entries.push(je);
4588                correction_count += 1;
4589            }
4590        }
4591
4592        if correction_count > 0 {
4593            tracing::info!(
4594                target: "datasynth_runtime::tb_anchor",
4595                correction_count,
4596                "W8.1 — drift-correction pass emitted {} JE(s)",
4597                correction_count
4598            );
4599        } else {
4600            tracing::debug!(
4601                target: "datasynth_runtime::tb_anchor",
4602                "W8.1 — drift-correction pass: no corrections needed"
4603            );
4604        }
4605
4606        Ok(())
4607    }
4608
4609    /// Phase 6: Validate balance sheet equation on journal entries.
4610    fn phase_balance_validation(
4611        &mut self,
4612        entries: &[JournalEntry],
4613    ) -> SynthResult<BalanceValidationResult> {
4614        if self.phase_config.validate_balances && !entries.is_empty() {
4615            debug!("Phase 6: Validating Balances");
4616            let balance_validation = self.validate_journal_entries(entries)?;
4617            if balance_validation.is_balanced {
4618                debug!("Balance validation passed");
4619            } else {
4620                warn!(
4621                    "Balance validation found {} errors",
4622                    balance_validation.validation_errors.len()
4623                );
4624            }
4625            Ok(balance_validation)
4626        } else {
4627            Ok(BalanceValidationResult::default())
4628        }
4629    }
4630
4631    /// Validate that every `gl_account` referenced in `entries` exists in the
4632    /// chart of accounts.
4633    ///
4634    /// Always emits a warn-level log when the COA is missing accounts; in
4635    /// strict mode (`phase_config.validate_coa_coverage_strict`) returns
4636    /// `SynthError::generation` so the caller can fail fast.
4637    fn validate_coa_coverage(
4638        &self,
4639        entries: &[JournalEntry],
4640        coa: &ChartOfAccounts,
4641    ) -> SynthResult<()> {
4642        if entries.is_empty() {
4643            return Ok(());
4644        }
4645        let coa_set: std::collections::HashSet<&str> = coa
4646            .accounts
4647            .iter()
4648            .map(|a| a.account_number.as_str())
4649            .collect();
4650        let mut missing: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
4651        for je in entries {
4652            for line in je.lines.iter() {
4653                if !coa_set.contains(line.gl_account.as_str()) {
4654                    missing.insert(line.gl_account.clone());
4655                }
4656            }
4657        }
4658        if missing.is_empty() {
4659            debug!("COA coverage validation passed");
4660            return Ok(());
4661        }
4662        let msg = format!(
4663            "JEs reference {} gl_account values not in the chart of accounts (sample: {:?})",
4664            missing.len(),
4665            missing.iter().take(10).collect::<Vec<_>>()
4666        );
4667        if self.phase_config.validate_coa_coverage_strict {
4668            Err(SynthError::generation(msg))
4669        } else {
4670            warn!("{} — pass --validate-coa-coverage to fail on this", msg);
4671            Ok(())
4672        }
4673    }
4674
4675    /// Phase 7: Inject data quality variations (typos, missing values, format issues).
4676    fn phase_data_quality_injection(
4677        &mut self,
4678        entries: &mut [JournalEntry],
4679        actions: &DegradationActions,
4680        stats: &mut EnhancedGenerationStatistics,
4681    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
4682        if self.phase_config.inject_data_quality
4683            && !entries.is_empty()
4684            && !actions.skip_data_quality
4685        {
4686            info!("Phase 7: Injecting Data Quality Variations");
4687            let (dq_stats, quality_issues) = self.inject_data_quality(entries)?;
4688            stats.data_quality_issues = dq_stats.records_with_issues;
4689            info!("Injected {} data quality issues", stats.data_quality_issues);
4690            self.check_resources_with_log("post-data-quality")?;
4691            Ok((dq_stats, quality_issues))
4692        } else if actions.skip_data_quality {
4693            warn!("Phase 7: Skipped due to resource degradation");
4694            // v4.4.1: report the denominator (entries seen) even when
4695            // injection is skipped, so downstream consumers can tell
4696            // "skipped, 0/N" apart from "ran but found nothing".
4697            Ok((stats_with_denominator(entries.len()), Vec::new()))
4698        } else {
4699            debug!("Phase 7: Skipped (data quality injection disabled or no entries)");
4700            Ok((stats_with_denominator(entries.len()), Vec::new()))
4701        }
4702    }
4703
4704    /// Phase 10b: Generate period-close journal entries.
4705    ///
4706    /// Generates:
4707    /// 1. Depreciation JEs per asset: DR Depreciation Expense (6000) / CR Accumulated
4708    ///    Depreciation (1510) based on FA subledger records and straight-line amortisation
4709    ///    for the configured period.
4710    /// 2. Tax provision JE per company: DR Tax Expense (8000) / CR Sales Tax Payable (2100)
4711    /// 3. Income statement closing JE per company: transfer net income after tax to retained
4712    ///    earnings via the Income Summary (3600) clearing account.
4713    fn phase_period_close(
4714        &mut self,
4715        entries: &mut Vec<JournalEntry>,
4716        subledger: &SubledgerSnapshot,
4717        stats: &mut EnhancedGenerationStatistics,
4718    ) -> SynthResult<()> {
4719        if !self.phase_config.generate_period_close || entries.is_empty() {
4720            debug!("Phase 10b: Skipped (period close disabled or no entries)");
4721            return Ok(());
4722        }
4723
4724        info!("Phase 10b: Generating period-close journal entries");
4725
4726        use datasynth_core::accounts::{
4727            control_accounts, equity_accounts, expense_accounts, tax_accounts, AccountCategory,
4728        };
4729        use rust_decimal::Decimal;
4730
4731        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
4732            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
4733        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
4734        // Posting date for close entries is the last day of the period
4735        let close_date = end_date - chrono::Days::new(1);
4736
4737        // Statutory tax rate (21% — configurable rates come in later tiers)
4738        let tax_rate = Decimal::new(21, 2); // 0.21
4739
4740        // Collect company codes from config
4741        let company_codes: Vec<String> = self
4742            .config
4743            .companies
4744            .iter()
4745            .map(|c| c.code.clone())
4746            .collect();
4747
4748        // Estimate capacity: one JE per active FA + 2 JEs per company (tax + close)
4749        let estimated_close_jes = subledger.fa_records.len() + company_codes.len() * 2;
4750        let mut close_jes: Vec<JournalEntry> = Vec::with_capacity(estimated_close_jes);
4751
4752        // --- Depreciation JEs (per asset) ---
4753        // Compute period depreciation for each active fixed asset using straight-line method.
4754        // period_depreciation = (acquisition_cost - salvage_value) / useful_life_months * period_months
4755        let period_months = self.config.global.period_months;
4756        for asset in &subledger.fa_records {
4757            // Skip assets that are inactive / fully depreciated / non-depreciable
4758            use datasynth_core::models::subledger::fa::AssetStatus;
4759            if asset.status != AssetStatus::Active || asset.is_fully_depreciated() {
4760                continue;
4761            }
4762            let useful_life_months = asset.useful_life_months();
4763            if useful_life_months == 0 {
4764                // Land or CIP — not depreciated
4765                continue;
4766            }
4767            let salvage_value = asset.salvage_value();
4768            let depreciable_base = (asset.acquisition_cost - salvage_value).max(Decimal::ZERO);
4769            if depreciable_base == Decimal::ZERO {
4770                continue;
4771            }
4772            let period_depr = (depreciable_base / Decimal::from(useful_life_months)
4773                * Decimal::from(period_months))
4774            .round_dp(2);
4775            if period_depr <= Decimal::ZERO {
4776                continue;
4777            }
4778
4779            let mut depr_header = JournalEntryHeader::new(asset.company_code.clone(), close_date);
4780            depr_header.document_type = "CL".to_string();
4781            depr_header.header_text = Some(format!(
4782                "Depreciation - {} {}",
4783                asset.asset_number, asset.description
4784            ));
4785            depr_header.created_by = "CLOSE_ENGINE".to_string();
4786            depr_header.source = TransactionSource::Automated;
4787            depr_header.business_process = Some(BusinessProcess::R2R);
4788
4789            let doc_id = depr_header.document_id;
4790            let mut depr_je = JournalEntry::new(depr_header);
4791
4792            // DR Depreciation Expense (6000)
4793            depr_je.add_line(JournalEntryLine::debit(
4794                doc_id,
4795                1,
4796                expense_accounts::DEPRECIATION.to_string(),
4797                period_depr,
4798            ));
4799            // CR Accumulated Depreciation (1510)
4800            depr_je.add_line(JournalEntryLine::credit(
4801                doc_id,
4802                2,
4803                control_accounts::ACCUMULATED_DEPRECIATION.to_string(),
4804                period_depr,
4805            ));
4806
4807            debug_assert!(depr_je.is_balanced(), "Depreciation JE must be balanced");
4808            close_jes.push(depr_je);
4809        }
4810
4811        if !subledger.fa_records.is_empty() {
4812            debug!(
4813                "Generated {} depreciation JEs from {} FA records",
4814                close_jes.len(),
4815                subledger.fa_records.len()
4816            );
4817        }
4818
4819        // --- Accrual entries (standard period-end accruals per company) ---
4820        // Generate standard accrued expense entries (utilities, rent, interest) using
4821        // a revenue-based estimate. These use account 6200 (Misc Expense) / 2100 (Accrued Liab).
4822        {
4823            use datasynth_generators::{AccrualGenerator, AccrualGeneratorConfig};
4824            let mut accrual_gen = AccrualGenerator::new(AccrualGeneratorConfig::default());
4825            // v3.4.3: snap reversal dates to business days. No-op when
4826            // temporal_patterns.business_days is disabled.
4827            if let Some(ctx) = &self.temporal_context {
4828                accrual_gen.set_temporal_context(Arc::clone(ctx));
4829            }
4830
4831            // Standard accrual items: (description, expense_acct, liability_acct, % of revenue)
4832            let accrual_items: &[(&str, &str, &str)] = &[
4833                ("Accrued Utilities", "6200", "2100"),
4834                ("Accrued Rent", "6300", "2100"),
4835                ("Accrued Interest", "6100", "2150"),
4836            ];
4837
4838            for company_code in &company_codes {
4839                // Estimate company revenue from existing JEs
4840                let company_revenue: Decimal = entries
4841                    .iter()
4842                    .filter(|e| e.header.company_code == *company_code)
4843                    .flat_map(|e| e.lines.iter())
4844                    .filter(|l| l.gl_account.starts_with('4'))
4845                    .map(|l| l.credit_amount - l.debit_amount)
4846                    .fold(Decimal::ZERO, |acc, v| acc + v);
4847
4848                if company_revenue <= Decimal::ZERO {
4849                    continue;
4850                }
4851
4852                // Use 0.5% of period revenue per accrual item as a proxy
4853                let accrual_base = (company_revenue * Decimal::new(5, 3)).round_dp(2);
4854                if accrual_base <= Decimal::ZERO {
4855                    continue;
4856                }
4857
4858                for (description, expense_acct, liability_acct) in accrual_items {
4859                    let (accrual_je, reversal_je) = accrual_gen.generate_accrued_expense(
4860                        company_code,
4861                        description,
4862                        accrual_base,
4863                        expense_acct,
4864                        liability_acct,
4865                        close_date,
4866                        None,
4867                    );
4868                    close_jes.push(accrual_je);
4869                    if let Some(rev_je) = reversal_je {
4870                        close_jes.push(rev_je);
4871                    }
4872                }
4873            }
4874
4875            debug!(
4876                "Generated accrual entries for {} companies",
4877                company_codes.len()
4878            );
4879        }
4880
4881        for company_code in &company_codes {
4882            // Calculate net income for this company from existing JEs:
4883            // Net income = sum of credit-normal revenue postings - sum of debit-normal expense postings
4884            // Revenue (4xxx): credit-normal, so net = credits - debits
4885            // COGS (5xxx), OpEx (6xxx), Other I/E (7xxx), Tax (8xxx): debit-normal, so net = debits - credits
4886            let mut total_revenue = Decimal::ZERO;
4887            let mut total_expenses = Decimal::ZERO;
4888
4889            for entry in entries.iter() {
4890                if entry.header.company_code != *company_code {
4891                    continue;
4892                }
4893                for line in &entry.lines {
4894                    let category = AccountCategory::from_account(&line.gl_account);
4895                    match category {
4896                        AccountCategory::Revenue => {
4897                            // Revenue is credit-normal: net revenue = credits - debits
4898                            total_revenue += line.credit_amount - line.debit_amount;
4899                        }
4900                        AccountCategory::Cogs
4901                        | AccountCategory::OperatingExpense
4902                        | AccountCategory::OtherIncomeExpense
4903                        | AccountCategory::Tax => {
4904                            // Expenses are debit-normal: net expense = debits - credits
4905                            total_expenses += line.debit_amount - line.credit_amount;
4906                        }
4907                        _ => {}
4908                    }
4909                }
4910            }
4911
4912            let pre_tax_income = total_revenue - total_expenses;
4913
4914            // Skip if no income statement activity
4915            if pre_tax_income == Decimal::ZERO {
4916                debug!(
4917                    "Company {}: no pre-tax income, skipping period close",
4918                    company_code
4919                );
4920                continue;
4921            }
4922
4923            // --- Tax provision / DTA JE ---
4924            if pre_tax_income > Decimal::ZERO {
4925                // Profitable year: DR Tax Expense (8000) / CR Income Tax Payable (2130)
4926                let tax_amount = (pre_tax_income * tax_rate).round_dp(2);
4927
4928                let mut tax_header = JournalEntryHeader::new(company_code.clone(), close_date);
4929                tax_header.document_type = "CL".to_string();
4930                tax_header.header_text = Some(format!("Tax provision - {}", company_code));
4931                tax_header.created_by = "CLOSE_ENGINE".to_string();
4932                tax_header.source = TransactionSource::Automated;
4933                tax_header.business_process = Some(BusinessProcess::R2R);
4934
4935                let doc_id = tax_header.document_id;
4936                let mut tax_je = JournalEntry::new(tax_header);
4937
4938                // DR Tax Expense (8000)
4939                tax_je.add_line(JournalEntryLine::debit(
4940                    doc_id,
4941                    1,
4942                    tax_accounts::TAX_EXPENSE.to_string(),
4943                    tax_amount,
4944                ));
4945                // CR Income Tax Payable (2130)
4946                tax_je.add_line(JournalEntryLine::credit(
4947                    doc_id,
4948                    2,
4949                    tax_accounts::INCOME_TAX_PAYABLE.to_string(),
4950                    tax_amount,
4951                ));
4952
4953                debug_assert!(tax_je.is_balanced(), "Tax provision JE must be balanced");
4954                close_jes.push(tax_je);
4955            } else {
4956                // Loss year: recognise a Deferred Tax Asset (DTA) = |loss| × statutory_rate
4957                // DR Deferred Tax Asset (1600) / CR Tax Benefit (8000 credit = income tax benefit)
4958                let dta_amount = (pre_tax_income.abs() * tax_rate).round_dp(2);
4959                if dta_amount > Decimal::ZERO {
4960                    let mut dta_header = JournalEntryHeader::new(company_code.clone(), close_date);
4961                    dta_header.document_type = "CL".to_string();
4962                    dta_header.header_text =
4963                        Some(format!("Deferred tax asset (DTA) - {}", company_code));
4964                    dta_header.created_by = "CLOSE_ENGINE".to_string();
4965                    dta_header.source = TransactionSource::Automated;
4966                    dta_header.business_process = Some(BusinessProcess::R2R);
4967
4968                    let doc_id = dta_header.document_id;
4969                    let mut dta_je = JournalEntry::new(dta_header);
4970
4971                    // DR Deferred Tax Asset (1600)
4972                    dta_je.add_line(JournalEntryLine::debit(
4973                        doc_id,
4974                        1,
4975                        tax_accounts::DEFERRED_TAX_ASSET.to_string(),
4976                        dta_amount,
4977                    ));
4978                    // CR Income Tax Benefit (8000) — credit reduces the tax expense line,
4979                    // reflecting the benefit of the future deductible temporary difference.
4980                    dta_je.add_line(JournalEntryLine::credit(
4981                        doc_id,
4982                        2,
4983                        tax_accounts::TAX_EXPENSE.to_string(),
4984                        dta_amount,
4985                    ));
4986
4987                    debug_assert!(dta_je.is_balanced(), "DTA JE must be balanced");
4988                    close_jes.push(dta_je);
4989                    debug!(
4990                        "Company {}: loss year — recognised DTA of {}",
4991                        company_code, dta_amount
4992                    );
4993                }
4994            }
4995
4996            // --- Dividend JEs (v2.4) ---
4997            // If the entity is profitable after tax, declare a 10% dividend payout.
4998            // This runs AFTER tax provision so the dividend is based on post-tax income
4999            // but BEFORE the retained earnings close so the RE transfer reflects the
5000            // reduced balance.
5001            let tax_provision = if pre_tax_income > Decimal::ZERO {
5002                (pre_tax_income * tax_rate).round_dp(2)
5003            } else {
5004                Decimal::ZERO
5005            };
5006            let net_income = pre_tax_income - tax_provision;
5007
5008            if net_income > Decimal::ZERO {
5009                use datasynth_generators::DividendGenerator;
5010                let dividend_amount = (net_income * Decimal::new(10, 2)).round_dp(2); // 10% payout
5011                let mut div_gen = DividendGenerator::new(self.seed + 460);
5012                let currency_str = self
5013                    .config
5014                    .companies
5015                    .iter()
5016                    .find(|c| c.code == *company_code)
5017                    .map(|c| c.currency.as_str())
5018                    .unwrap_or("USD");
5019                let div_result = div_gen.generate(
5020                    company_code,
5021                    close_date,
5022                    Decimal::new(1, 0), // $1 per share placeholder
5023                    dividend_amount,
5024                    currency_str,
5025                );
5026                let div_je_count = div_result.journal_entries.len();
5027                close_jes.extend(div_result.journal_entries);
5028                debug!(
5029                    "Company {}: declared dividend of {} ({} JEs)",
5030                    company_code, dividend_amount, div_je_count
5031                );
5032            }
5033
5034            // --- Income statement closing JE ---
5035            // Net income after tax (profit years) or net loss before DTA benefit (loss years).
5036            // For a loss year the DTA JE above already recognises the deferred benefit; here we
5037            // close the pre-tax loss into Retained Earnings as-is.
5038            if net_income != Decimal::ZERO {
5039                let mut close_header = JournalEntryHeader::new(company_code.clone(), close_date);
5040                close_header.document_type = "CL".to_string();
5041                close_header.header_text =
5042                    Some(format!("Income statement close - {}", company_code));
5043                close_header.created_by = "CLOSE_ENGINE".to_string();
5044                close_header.source = TransactionSource::Automated;
5045                close_header.business_process = Some(BusinessProcess::R2R);
5046
5047                let doc_id = close_header.document_id;
5048                let mut close_je = JournalEntry::new(close_header);
5049
5050                let abs_net_income = net_income.abs();
5051
5052                if net_income > Decimal::ZERO {
5053                    // Profit: DR Income Summary (3600) / CR Retained Earnings (3200)
5054                    close_je.add_line(JournalEntryLine::debit(
5055                        doc_id,
5056                        1,
5057                        equity_accounts::INCOME_SUMMARY.to_string(),
5058                        abs_net_income,
5059                    ));
5060                    close_je.add_line(JournalEntryLine::credit(
5061                        doc_id,
5062                        2,
5063                        equity_accounts::RETAINED_EARNINGS.to_string(),
5064                        abs_net_income,
5065                    ));
5066                } else {
5067                    // Loss: DR Retained Earnings (3200) / CR Income Summary (3600)
5068                    close_je.add_line(JournalEntryLine::debit(
5069                        doc_id,
5070                        1,
5071                        equity_accounts::RETAINED_EARNINGS.to_string(),
5072                        abs_net_income,
5073                    ));
5074                    close_je.add_line(JournalEntryLine::credit(
5075                        doc_id,
5076                        2,
5077                        equity_accounts::INCOME_SUMMARY.to_string(),
5078                        abs_net_income,
5079                    ));
5080                }
5081
5082                debug_assert!(
5083                    close_je.is_balanced(),
5084                    "Income statement closing JE must be balanced"
5085                );
5086                close_jes.push(close_je);
5087            }
5088        }
5089
5090        let close_count = close_jes.len();
5091        if close_count > 0 {
5092            info!("Generated {} period-close journal entries", close_count);
5093            self.emit_phase_items("period_close", "JournalEntry", &close_jes);
5094            entries.extend(close_jes);
5095            stats.period_close_je_count = close_count;
5096
5097            // Update total entry/line-item stats
5098            stats.total_entries = entries.len() as u64;
5099            stats.total_line_items = entries.iter().map(|e| e.line_count() as u64).sum();
5100        } else {
5101            debug!("No period-close entries generated (no income statement activity)");
5102        }
5103
5104        Ok(())
5105    }
5106
5107    /// Phase 8: Generate audit data (engagements, workpapers, evidence, risks, findings).
5108    fn phase_audit_data(
5109        &mut self,
5110        entries: &[JournalEntry],
5111        stats: &mut EnhancedGenerationStatistics,
5112    ) -> SynthResult<AuditSnapshot> {
5113        if self.phase_config.generate_audit {
5114            info!("Phase 8: Generating Audit Data");
5115            let audit_snapshot = self.generate_audit_data(entries)?;
5116            stats.audit_engagement_count = audit_snapshot.engagements.len();
5117            stats.audit_workpaper_count = audit_snapshot.workpapers.len();
5118            stats.audit_evidence_count = audit_snapshot.evidence.len();
5119            stats.audit_risk_count = audit_snapshot.risk_assessments.len();
5120            stats.audit_finding_count = audit_snapshot.findings.len();
5121            stats.audit_judgment_count = audit_snapshot.judgments.len();
5122            stats.audit_confirmation_count = audit_snapshot.confirmations.len();
5123            stats.audit_confirmation_response_count = audit_snapshot.confirmation_responses.len();
5124            stats.audit_procedure_step_count = audit_snapshot.procedure_steps.len();
5125            stats.audit_sample_count = audit_snapshot.samples.len();
5126            stats.audit_analytical_result_count = audit_snapshot.analytical_results.len();
5127            stats.audit_ia_function_count = audit_snapshot.ia_functions.len();
5128            stats.audit_ia_report_count = audit_snapshot.ia_reports.len();
5129            stats.audit_related_party_count = audit_snapshot.related_parties.len();
5130            stats.audit_related_party_transaction_count =
5131                audit_snapshot.related_party_transactions.len();
5132            info!(
5133                "Audit data generated: {} engagements, {} workpapers, {} evidence, {} risks, \
5134                 {} findings, {} judgments, {} confirmations, {} procedure steps, {} samples, \
5135                 {} analytical results, {} IA functions, {} IA reports, {} related parties, \
5136                 {} RP transactions",
5137                stats.audit_engagement_count,
5138                stats.audit_workpaper_count,
5139                stats.audit_evidence_count,
5140                stats.audit_risk_count,
5141                stats.audit_finding_count,
5142                stats.audit_judgment_count,
5143                stats.audit_confirmation_count,
5144                stats.audit_procedure_step_count,
5145                stats.audit_sample_count,
5146                stats.audit_analytical_result_count,
5147                stats.audit_ia_function_count,
5148                stats.audit_ia_report_count,
5149                stats.audit_related_party_count,
5150                stats.audit_related_party_transaction_count,
5151            );
5152            self.check_resources_with_log("post-audit")?;
5153            Ok(audit_snapshot)
5154        } else {
5155            debug!("Phase 8: Skipped (audit generation disabled)");
5156            Ok(AuditSnapshot::default())
5157        }
5158    }
5159
5160    /// Phase 9: Generate banking KYC/AML data.
5161    fn phase_banking_data(
5162        &mut self,
5163        stats: &mut EnhancedGenerationStatistics,
5164    ) -> SynthResult<BankingSnapshot> {
5165        if self.phase_config.generate_banking {
5166            info!("Phase 9: Generating Banking KYC/AML Data");
5167            let banking_snapshot = self.generate_banking_data()?;
5168            stats.banking_customer_count = banking_snapshot.customers.len();
5169            stats.banking_account_count = banking_snapshot.accounts.len();
5170            stats.banking_transaction_count = banking_snapshot.transactions.len();
5171            stats.banking_suspicious_count = banking_snapshot.suspicious_count;
5172            info!(
5173                "Banking data generated: {} customers, {} accounts, {} transactions ({} suspicious)",
5174                stats.banking_customer_count, stats.banking_account_count,
5175                stats.banking_transaction_count, stats.banking_suspicious_count
5176            );
5177            self.check_resources_with_log("post-banking")?;
5178            Ok(banking_snapshot)
5179        } else {
5180            debug!("Phase 9: Skipped (banking generation disabled)");
5181            Ok(BankingSnapshot::default())
5182        }
5183    }
5184
5185    /// Phase 10: Export accounting network graphs for ML training.
5186    fn phase_graph_export(
5187        &mut self,
5188        entries: &[JournalEntry],
5189        coa: &Arc<ChartOfAccounts>,
5190        stats: &mut EnhancedGenerationStatistics,
5191    ) -> SynthResult<GraphExportSnapshot> {
5192        if self.phase_config.generate_graph_export && !entries.is_empty() {
5193            info!("Phase 10: Exporting Accounting Network Graphs");
5194            match self.export_graphs(entries, coa, stats) {
5195                Ok(snapshot) => {
5196                    info!(
5197                        "Graph export complete: {} graphs ({} nodes, {} edges)",
5198                        snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
5199                    );
5200                    Ok(snapshot)
5201                }
5202                Err(e) => {
5203                    warn!("Phase 10: Graph export failed: {}", e);
5204                    Ok(GraphExportSnapshot::default())
5205                }
5206            }
5207        } else {
5208            debug!("Phase 10: Skipped (graph export disabled or no entries)");
5209            Ok(GraphExportSnapshot::default())
5210        }
5211    }
5212
5213    /// Phase 19b: Export multi-layer hypergraph for RustGraph integration.
5214    #[allow(clippy::too_many_arguments)]
5215    fn phase_hypergraph_export(
5216        &self,
5217        coa: &Arc<ChartOfAccounts>,
5218        entries: &[JournalEntry],
5219        document_flows: &DocumentFlowSnapshot,
5220        sourcing: &SourcingSnapshot,
5221        hr: &HrSnapshot,
5222        manufacturing: &ManufacturingSnapshot,
5223        banking: &BankingSnapshot,
5224        audit: &AuditSnapshot,
5225        financial_reporting: &FinancialReportingSnapshot,
5226        ocpm: &OcpmSnapshot,
5227        compliance: &ComplianceRegulationsSnapshot,
5228        stats: &mut EnhancedGenerationStatistics,
5229    ) -> SynthResult<()> {
5230        if self.config.graph_export.hypergraph.enabled && !entries.is_empty() {
5231            info!("Phase 19b: Exporting Multi-Layer Hypergraph");
5232            match self.export_hypergraph(
5233                coa,
5234                entries,
5235                document_flows,
5236                sourcing,
5237                hr,
5238                manufacturing,
5239                banking,
5240                audit,
5241                financial_reporting,
5242                ocpm,
5243                compliance,
5244                stats,
5245            ) {
5246                Ok(info) => {
5247                    info!(
5248                        "Hypergraph export complete: {} nodes, {} edges, {} hyperedges",
5249                        info.node_count, info.edge_count, info.hyperedge_count
5250                    );
5251                }
5252                Err(e) => {
5253                    warn!("Phase 10b: Hypergraph export failed: {}", e);
5254                }
5255            }
5256        } else {
5257            debug!("Phase 10b: Skipped (hypergraph export disabled or no entries)");
5258        }
5259        Ok(())
5260    }
5261
5262    /// Phase 11: LLM Enrichment.
5263    ///
5264    /// Uses an LLM provider (mock by default) to enrich vendor names with
5265    /// realistic, context-aware names. This phase is non-blocking: failures
5266    /// log a warning but do not stop the generation pipeline.
5267    fn phase_llm_enrichment(&mut self, stats: &mut EnhancedGenerationStatistics) {
5268        if !self.config.llm.enabled {
5269            debug!("Phase 11: Skipped (LLM enrichment disabled)");
5270            return;
5271        }
5272
5273        info!("Phase 11: Starting LLM Enrichment");
5274        let start = std::time::Instant::now();
5275
5276        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5277            // Select provider: use HttpLlmProvider when a non-mock provider is configured
5278            // and the corresponding API key environment variable is present.
5279            let provider: Arc<dyn datasynth_core::llm::LlmProvider> = {
5280                let schema_provider = &self.config.llm.provider;
5281                let api_key_env = match schema_provider.as_str() {
5282                    "openai" => Some("OPENAI_API_KEY"),
5283                    "anthropic" => Some("ANTHROPIC_API_KEY"),
5284                    "custom" => Some("LLM_API_KEY"),
5285                    _ => None,
5286                };
5287                if let Some(key_env) = api_key_env {
5288                    if std::env::var(key_env).is_ok() {
5289                        let llm_config = datasynth_core::llm::LlmConfig {
5290                            model: self.config.llm.model.clone(),
5291                            api_key_env: key_env.to_string(),
5292                            ..datasynth_core::llm::LlmConfig::default()
5293                        };
5294                        match HttpLlmProvider::new(llm_config) {
5295                            Ok(p) => Arc::new(p),
5296                            Err(e) => {
5297                                warn!(
5298                                    "Failed to create HttpLlmProvider: {}; falling back to mock",
5299                                    e
5300                                );
5301                                Arc::new(MockLlmProvider::new(self.seed))
5302                            }
5303                        }
5304                    } else {
5305                        Arc::new(MockLlmProvider::new(self.seed))
5306                    }
5307                } else {
5308                    Arc::new(MockLlmProvider::new(self.seed))
5309                }
5310            };
5311            // v4.1.1+: multi-category enrichment. Vendors remain the
5312            // default path; customers and materials opt in via
5313            // `llm.enrich_customers` / `llm.enrich_materials` flags.
5314            let industry = format!("{:?}", self.config.global.industry);
5315
5316            let vendor_enricher =
5317                datasynth_generators::llm_enrichment::VendorLlmEnricher::new(Arc::clone(&provider));
5318            let max_vendors = self
5319                .config
5320                .llm
5321                .max_vendor_enrichments
5322                .min(self.master_data.vendors.len());
5323            let mut vendors_enriched = 0usize;
5324            for vendor in self.master_data.vendors.iter_mut().take(max_vendors) {
5325                match vendor_enricher.enrich_vendor_name(&industry, "general", &vendor.country) {
5326                    Ok(name) => {
5327                        vendor.name = name;
5328                        vendors_enriched += 1;
5329                    }
5330                    Err(e) => warn!(
5331                        "LLM vendor enrichment failed for {}: {}",
5332                        vendor.vendor_id, e
5333                    ),
5334                }
5335            }
5336
5337            let mut customers_enriched = 0usize;
5338            if self.config.llm.enrich_customers {
5339                let customer_enricher =
5340                    datasynth_generators::llm_enrichment::CustomerLlmEnricher::new(Arc::clone(
5341                        &provider,
5342                    ));
5343                let max_customers = self
5344                    .config
5345                    .llm
5346                    .max_customer_enrichments
5347                    .min(self.master_data.customers.len());
5348                for customer in self.master_data.customers.iter_mut().take(max_customers) {
5349                    match customer_enricher.enrich_customer_name(
5350                        &industry,
5351                        "general",
5352                        &customer.country,
5353                    ) {
5354                        Ok(name) => {
5355                            customer.name = name;
5356                            customers_enriched += 1;
5357                        }
5358                        Err(e) => warn!(
5359                            "LLM customer enrichment failed for {}: {}",
5360                            customer.customer_id, e
5361                        ),
5362                    }
5363                }
5364            }
5365
5366            let mut materials_enriched = 0usize;
5367            if self.config.llm.enrich_materials {
5368                let material_enricher =
5369                    datasynth_generators::llm_enrichment::MaterialLlmEnricher::new(Arc::clone(
5370                        &provider,
5371                    ));
5372                let max_materials = self
5373                    .config
5374                    .llm
5375                    .max_material_enrichments
5376                    .min(self.master_data.materials.len());
5377                for material in self.master_data.materials.iter_mut().take(max_materials) {
5378                    let material_type = format!("{:?}", material.material_type);
5379                    match material_enricher.enrich_material_description(&material_type, &industry) {
5380                        Ok(desc) => {
5381                            material.description = desc;
5382                            materials_enriched += 1;
5383                        }
5384                        Err(e) => warn!(
5385                            "LLM material enrichment failed for {}: {}",
5386                            material.material_id, e
5387                        ),
5388                    }
5389                }
5390            }
5391
5392            (vendors_enriched, customers_enriched, materials_enriched)
5393        }));
5394
5395        match result {
5396            Ok((v, c, m)) => {
5397                stats.llm_vendors_enriched = v;
5398                stats.llm_customers_enriched = c;
5399                stats.llm_materials_enriched = m;
5400                let elapsed = start.elapsed();
5401                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5402                info!(
5403                    "Phase 11 complete: {} vendors, {} customers, {} materials enriched in {}ms",
5404                    v, c, m, stats.llm_enrichment_ms
5405                );
5406            }
5407            Err(_) => {
5408                let elapsed = start.elapsed();
5409                stats.llm_enrichment_ms = elapsed.as_millis() as u64;
5410                warn!("Phase 11: LLM enrichment failed (panic caught), continuing");
5411            }
5412        }
5413    }
5414
5415    /// Phase 12: Diffusion Enhancement.
5416    ///
5417    /// Generates a sample set matching distribution properties from the
5418    /// generated data. v4.4.0+ honours `config.diffusion.backend`:
5419    /// - `"statistical"` (default) — moment-matching backend, always fast.
5420    /// - `"neural"` / `"hybrid"` — candle-based score network. Requires
5421    ///   the `neural` Cargo feature; falls back to statistical when the
5422    ///   feature isn't compiled in, with a loud warning.
5423    ///
5424    /// This phase is non-blocking: failures log a warning but do not
5425    /// stop the pipeline.
5426    fn phase_diffusion_enhancement(
5427        &self,
5428        #[cfg_attr(not(feature = "neural"), allow(unused_variables))] entries: &[JournalEntry],
5429        stats: &mut EnhancedGenerationStatistics,
5430    ) {
5431        if !self.config.diffusion.enabled {
5432            debug!("Phase 12: Skipped (diffusion enhancement disabled)");
5433            return;
5434        }
5435
5436        info!("Phase 12: Starting Diffusion Enhancement");
5437        let start = std::time::Instant::now();
5438
5439        let backend_choice = self.config.diffusion.backend.as_str();
5440        let use_neural = matches!(backend_choice, "neural" | "hybrid");
5441
5442        if use_neural {
5443            #[cfg(feature = "neural")]
5444            {
5445                match self.run_neural_diffusion_phase(entries) {
5446                    Ok(sample_count) => {
5447                        stats.diffusion_samples_generated = sample_count;
5448                        let elapsed = start.elapsed();
5449                        stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5450                        info!(
5451                            "Phase 12 complete ({}): {} samples in {}ms",
5452                            backend_choice, sample_count, stats.diffusion_enhancement_ms
5453                        );
5454                        return;
5455                    }
5456                    Err(e) => {
5457                        warn!(
5458                            "Phase 12: neural diffusion failed: {e}. Falling back to statistical."
5459                        );
5460                        // Fall through to statistical path below.
5461                    }
5462                }
5463            }
5464            #[cfg(not(feature = "neural"))]
5465            {
5466                warn!(
5467                    "Phase 12: backend='{}' requested but the `neural` Cargo feature is \
5468                     not compiled in — falling back to statistical. Rebuild with \
5469                     `--features neural` (or `neural-cuda` for GPU) to enable.",
5470                    backend_choice
5471                );
5472            }
5473        } else if !matches!(backend_choice, "statistical" | "") {
5474            warn!(
5475                "Phase 12: unknown backend '{}', falling back to statistical",
5476                backend_choice
5477            );
5478        }
5479
5480        // Statistical path (default + fallback).
5481        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5482            let means = vec![5000.0, 3.0, 2.0];
5483            let stds = vec![2000.0, 1.5, 1.0];
5484
5485            let diffusion_config = DiffusionConfig {
5486                n_steps: self.config.diffusion.n_steps,
5487                seed: self.seed,
5488                ..Default::default()
5489            };
5490
5491            let backend = StatisticalDiffusionBackend::new(means, stds, diffusion_config);
5492            let n_samples = self.config.diffusion.sample_size;
5493            let n_features = 3;
5494            backend.generate(n_samples, n_features, self.seed).len()
5495        }));
5496
5497        match result {
5498            Ok(sample_count) => {
5499                stats.diffusion_samples_generated = sample_count;
5500                let elapsed = start.elapsed();
5501                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5502                info!(
5503                    "Phase 12 complete (statistical): {} samples in {}ms",
5504                    sample_count, stats.diffusion_enhancement_ms
5505                );
5506            }
5507            Err(_) => {
5508                let elapsed = start.elapsed();
5509                stats.diffusion_enhancement_ms = elapsed.as_millis() as u64;
5510                warn!("Phase 12: Diffusion enhancement failed (panic caught), continuing");
5511            }
5512        }
5513    }
5514
5515    /// Neural-backend execution — either load a pre-trained checkpoint
5516    /// (when `config.diffusion.neural.checkpoint_path` is set) or train
5517    /// from the first batch of JE amounts. Returns the sample count
5518    /// produced; any error bubbles up to the statistical fallback.
5519    #[cfg(feature = "neural")]
5520    fn run_neural_diffusion_phase(&self, entries: &[JournalEntry]) -> Result<usize, SynthError> {
5521        use datasynth_core::diffusion::{DiffusionBackend, NeuralDiffusionBackend};
5522
5523        if entries.is_empty() {
5524            return Err(SynthError::generation(
5525                "neural diffusion: no journal entries available as training data",
5526            ));
5527        }
5528
5529        let training_data: Vec<Vec<f64>> = entries
5530            .iter()
5531            .take(5000)
5532            .map(|je| {
5533                let total_amount: f64 = je
5534                    .lines
5535                    .iter()
5536                    .filter(|l| l.debit_amount > rust_decimal::Decimal::ZERO)
5537                    .map(|l| {
5538                        use rust_decimal::prelude::ToPrimitive;
5539                        l.debit_amount.to_f64().unwrap_or(0.0)
5540                    })
5541                    .sum();
5542                let line_count = je.lines.len() as f64;
5543                // Use the approval-workflow depth as the third feature
5544                // (proxy for complexity / risk). `None` → 1.
5545                let approval_level = je
5546                    .header
5547                    .approval_workflow
5548                    .as_ref()
5549                    .map(|w| w.required_levels as f64)
5550                    .unwrap_or(1.0);
5551                vec![total_amount, line_count, approval_level]
5552            })
5553            .collect();
5554
5555        let n_features = training_data.first().map(|r| r.len()).unwrap_or(3);
5556
5557        let cfg = &self.config.diffusion;
5558        let neural_cfg = &cfg.neural;
5559
5560        let backend: NeuralDiffusionBackend = if let Some(ckpt_path) =
5561            neural_cfg.checkpoint_path.as_ref()
5562        {
5563            let path = std::path::Path::new(ckpt_path);
5564            info!(
5565                "  Neural diffusion: loading checkpoint from {}",
5566                path.display()
5567            );
5568            NeuralDiffusionBackend::load(path)
5569                .map_err(|e| SynthError::generation(format!("checkpoint load failed: {e}")))?
5570        } else {
5571            use datasynth_core::diffusion::{NeuralDiffusionTrainer, NeuralTrainingConfig};
5572            info!(
5573                "  Neural diffusion: training score network on {} rows × {} features, \
5574                     {} epochs, hidden_dims={:?}",
5575                training_data.len(),
5576                n_features,
5577                neural_cfg.training_epochs,
5578                neural_cfg.hidden_dims
5579            );
5580            let training_config = NeuralTrainingConfig {
5581                n_steps: cfg.n_steps,
5582                schedule: cfg.schedule.clone(),
5583                hidden_dims: neural_cfg.hidden_dims.clone(),
5584                timestep_embed_dim: neural_cfg.timestep_embed_dim,
5585                learning_rate: neural_cfg.learning_rate,
5586                epochs: neural_cfg.training_epochs,
5587                batch_size: neural_cfg.batch_size,
5588            };
5589            let (backend, report) =
5590                NeuralDiffusionTrainer::train(&training_data, &training_config, self.seed)
5591                    .map_err(|e| SynthError::generation(format!("neural training failed: {e}")))?;
5592            info!(
5593                "  Neural diffusion: training done — {} epochs, final_loss={:.4}",
5594                report.epochs_completed, report.final_loss
5595            );
5596            backend
5597        };
5598
5599        let samples = backend.generate(cfg.sample_size, n_features, self.seed);
5600        Ok(samples.len())
5601    }
5602
5603    /// Phase 13: Causal Overlay.
5604    ///
5605    /// Builds a structural causal model from a built-in template (e.g.,
5606    /// fraud_detection) and generates causal samples. Optionally validates
5607    /// that the output respects the causal structure. This phase is
5608    /// non-blocking: failures log a warning but do not stop the pipeline.
5609    fn phase_causal_overlay(&self, stats: &mut EnhancedGenerationStatistics) {
5610        if !self.config.causal.enabled {
5611            debug!("Phase 13: Skipped (causal generation disabled)");
5612            return;
5613        }
5614
5615        info!("Phase 13: Starting Causal Overlay");
5616        let start = std::time::Instant::now();
5617
5618        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5619            // Select template based on config
5620            let graph = match self.config.causal.template.as_str() {
5621                "revenue_cycle" => CausalGraph::revenue_cycle_template(),
5622                _ => CausalGraph::fraud_detection_template(),
5623            };
5624
5625            let scm = StructuralCausalModel::new(graph.clone())
5626                .map_err(|e| SynthError::generation(format!("Failed to build SCM: {e}")))?;
5627
5628            let n_samples = self.config.causal.sample_size;
5629            let samples = scm
5630                .generate(n_samples, self.seed)
5631                .map_err(|e| SynthError::generation(format!("SCM generation failed: {e}")))?;
5632
5633            // Optionally validate causal structure
5634            let validation_passed = if self.config.causal.validate {
5635                let report = CausalValidator::validate_causal_structure(&samples, &graph);
5636                if report.valid {
5637                    info!(
5638                        "Causal validation passed: all {} checks OK",
5639                        report.checks.len()
5640                    );
5641                } else {
5642                    warn!(
5643                        "Causal validation: {} violations detected: {:?}",
5644                        report.violations.len(),
5645                        report.violations
5646                    );
5647                }
5648                Some(report.valid)
5649            } else {
5650                None
5651            };
5652
5653            Ok::<(usize, Option<bool>), SynthError>((samples.len(), validation_passed))
5654        }));
5655
5656        match result {
5657            Ok(Ok((sample_count, validation_passed))) => {
5658                stats.causal_samples_generated = sample_count;
5659                stats.causal_validation_passed = validation_passed;
5660                let elapsed = start.elapsed();
5661                stats.causal_generation_ms = elapsed.as_millis() as u64;
5662                info!(
5663                    "Phase 13 complete: {} causal samples generated in {}ms (validation: {:?})",
5664                    sample_count, stats.causal_generation_ms, validation_passed,
5665                );
5666            }
5667            Ok(Err(e)) => {
5668                let elapsed = start.elapsed();
5669                stats.causal_generation_ms = elapsed.as_millis() as u64;
5670                warn!("Phase 13: Causal generation failed: {}", e);
5671            }
5672            Err(_) => {
5673                let elapsed = start.elapsed();
5674                stats.causal_generation_ms = elapsed.as_millis() as u64;
5675                warn!("Phase 13: Causal generation failed (panic caught), continuing");
5676            }
5677        }
5678    }
5679
5680    /// Phase 14: Generate S2C sourcing data.
5681    fn phase_sourcing_data(
5682        &mut self,
5683        stats: &mut EnhancedGenerationStatistics,
5684    ) -> SynthResult<SourcingSnapshot> {
5685        if !self.phase_config.generate_sourcing && !self.config.source_to_pay.enabled {
5686            debug!("Phase 14: Skipped (sourcing generation disabled)");
5687            return Ok(SourcingSnapshot::default());
5688        }
5689        let degradation = self.check_resources()?;
5690        if degradation >= DegradationLevel::Reduced {
5691            debug!(
5692                "Phase skipped due to resource pressure (degradation: {:?})",
5693                degradation
5694            );
5695            return Ok(SourcingSnapshot::default());
5696        }
5697
5698        info!("Phase 14: Generating S2C Sourcing Data");
5699        let seed = self.seed;
5700
5701        // Gather vendor data from master data
5702        let vendor_ids: Vec<String> = self
5703            .master_data
5704            .vendors
5705            .iter()
5706            .map(|v| v.vendor_id.clone())
5707            .collect();
5708        if vendor_ids.is_empty() {
5709            debug!("Phase 14: Skipped (no vendors available)");
5710            return Ok(SourcingSnapshot::default());
5711        }
5712
5713        let categories: Vec<(String, String)> = vec![
5714            ("CAT-RAW".to_string(), "Raw Materials".to_string()),
5715            ("CAT-OFF".to_string(), "Office Supplies".to_string()),
5716            ("CAT-IT".to_string(), "IT Equipment".to_string()),
5717            ("CAT-SVC".to_string(), "Professional Services".to_string()),
5718            ("CAT-LOG".to_string(), "Logistics".to_string()),
5719        ];
5720        let categories_with_spend: Vec<(String, String, rust_decimal::Decimal)> = categories
5721            .iter()
5722            .map(|(id, name)| {
5723                (
5724                    id.clone(),
5725                    name.clone(),
5726                    rust_decimal::Decimal::from(100_000),
5727                )
5728            })
5729            .collect();
5730
5731        let company_code = self
5732            .config
5733            .companies
5734            .first()
5735            .map(|c| c.code.as_str())
5736            .unwrap_or("1000");
5737        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5738            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5739        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5740        let fiscal_year = start_date.year() as u16;
5741        let owner_ids: Vec<String> = self
5742            .master_data
5743            .employees
5744            .iter()
5745            .take(5)
5746            .map(|e| e.employee_id.clone())
5747            .collect();
5748        let owner_id = owner_ids
5749            .first()
5750            .map(std::string::String::as_str)
5751            .unwrap_or("BUYER-001");
5752
5753        // Step 1: Spend Analysis
5754        let mut spend_gen = SpendAnalysisGenerator::new(seed);
5755        let spend_analyses =
5756            spend_gen.generate(company_code, &vendor_ids, &categories, fiscal_year);
5757
5758        // Step 2: Sourcing Projects
5759        let mut project_gen = SourcingProjectGenerator::new(seed + 1);
5760        let sourcing_projects = if owner_ids.is_empty() {
5761            Vec::new()
5762        } else {
5763            project_gen.generate(
5764                company_code,
5765                &categories_with_spend,
5766                &owner_ids,
5767                start_date,
5768                self.config.global.period_months,
5769            )
5770        };
5771        stats.sourcing_project_count = sourcing_projects.len();
5772
5773        // Step 3: Qualifications
5774        let qual_vendor_ids: Vec<String> = vendor_ids.iter().take(20).cloned().collect();
5775        let mut qual_gen = QualificationGenerator::new(seed + 2);
5776        let qualifications = qual_gen.generate(
5777            company_code,
5778            &qual_vendor_ids,
5779            sourcing_projects.first().map(|p| p.project_id.as_str()),
5780            owner_id,
5781            start_date,
5782        );
5783
5784        // Step 4: RFx Events
5785        let mut rfx_gen = RfxGenerator::new(seed + 3);
5786        let rfx_events: Vec<RfxEvent> = sourcing_projects
5787            .iter()
5788            .map(|proj| {
5789                let qualified_vids: Vec<String> = vendor_ids.iter().take(5).cloned().collect();
5790                rfx_gen.generate(
5791                    company_code,
5792                    &proj.project_id,
5793                    &proj.category_id,
5794                    &qualified_vids,
5795                    owner_id,
5796                    start_date,
5797                    50000.0,
5798                )
5799            })
5800            .collect();
5801        stats.rfx_event_count = rfx_events.len();
5802
5803        // Step 5: Bids
5804        let mut bid_gen = BidGenerator::new(seed + 4);
5805        let mut all_bids = Vec::new();
5806        for rfx in &rfx_events {
5807            let bidder_count = vendor_ids.len().clamp(2, 5);
5808            let responding: Vec<String> = vendor_ids.iter().take(bidder_count).cloned().collect();
5809            let bids = bid_gen.generate(rfx, &responding, start_date);
5810            all_bids.extend(bids);
5811        }
5812        stats.bid_count = all_bids.len();
5813
5814        // Step 6: Bid Evaluations
5815        let mut eval_gen = BidEvaluationGenerator::new(seed + 5);
5816        let bid_evaluations: Vec<BidEvaluation> = rfx_events
5817            .iter()
5818            .map(|rfx| {
5819                let rfx_bids: Vec<SupplierBid> = all_bids
5820                    .iter()
5821                    .filter(|b| b.rfx_id == rfx.rfx_id)
5822                    .cloned()
5823                    .collect();
5824                eval_gen.evaluate(rfx, &rfx_bids, owner_id)
5825            })
5826            .collect();
5827
5828        // Step 7: Contracts from winning bids
5829        let mut contract_gen = ContractGenerator::new(seed + 6);
5830        let contracts: Vec<ProcurementContract> = bid_evaluations
5831            .iter()
5832            .zip(rfx_events.iter())
5833            .filter_map(|(eval, rfx)| {
5834                eval.ranked_bids.first().and_then(|winner| {
5835                    all_bids
5836                        .iter()
5837                        .find(|b| b.bid_id == winner.bid_id)
5838                        .map(|winning_bid| {
5839                            contract_gen.generate_from_bid(
5840                                winning_bid,
5841                                Some(&rfx.sourcing_project_id),
5842                                &rfx.category_id,
5843                                owner_id,
5844                                start_date,
5845                            )
5846                        })
5847                })
5848            })
5849            .collect();
5850        stats.contract_count = contracts.len();
5851
5852        // Step 8: Catalog Items
5853        let mut catalog_gen = CatalogGenerator::new(seed + 7);
5854        let catalog_items = catalog_gen.generate(&contracts);
5855        stats.catalog_item_count = catalog_items.len();
5856
5857        // Step 9: Scorecards
5858        let mut scorecard_gen = ScorecardGenerator::new(seed + 8);
5859        let vendor_contracts: Vec<(String, Vec<&ProcurementContract>)> = contracts
5860            .iter()
5861            .fold(
5862                std::collections::HashMap::<String, Vec<&ProcurementContract>>::new(),
5863                |mut acc, c| {
5864                    acc.entry(c.vendor_id.clone()).or_default().push(c);
5865                    acc
5866                },
5867            )
5868            .into_iter()
5869            .collect();
5870        let scorecards = scorecard_gen.generate(
5871            company_code,
5872            &vendor_contracts,
5873            start_date,
5874            end_date,
5875            owner_id,
5876        );
5877        stats.scorecard_count = scorecards.len();
5878
5879        // Back-populate cross-references on sourcing projects (Task 35)
5880        // Link each project to its RFx events, contracts, and spend analyses
5881        let mut sourcing_projects = sourcing_projects;
5882        for project in &mut sourcing_projects {
5883            // Link RFx events generated for this project
5884            project.rfx_ids = rfx_events
5885                .iter()
5886                .filter(|rfx| rfx.sourcing_project_id == project.project_id)
5887                .map(|rfx| rfx.rfx_id.clone())
5888                .collect();
5889
5890            // Link contract awarded from this project's RFx
5891            project.contract_id = contracts
5892                .iter()
5893                .find(|c| {
5894                    c.sourcing_project_id
5895                        .as_deref()
5896                        .is_some_and(|sp| sp == project.project_id)
5897                })
5898                .map(|c| c.contract_id.clone());
5899
5900            // Link spend analysis for matching category (use category_id as the reference)
5901            project.spend_analysis_id = spend_analyses
5902                .iter()
5903                .find(|sa| sa.category_id == project.category_id)
5904                .map(|sa| sa.category_id.clone());
5905        }
5906
5907        info!(
5908            "S2C sourcing generated: {} projects, {} RFx, {} bids, {} contracts, {} catalog items, {} scorecards",
5909            stats.sourcing_project_count, stats.rfx_event_count, stats.bid_count,
5910            stats.contract_count, stats.catalog_item_count, stats.scorecard_count
5911        );
5912        self.check_resources_with_log("post-sourcing")?;
5913
5914        Ok(SourcingSnapshot {
5915            spend_analyses,
5916            sourcing_projects,
5917            qualifications,
5918            rfx_events,
5919            bids: all_bids,
5920            bid_evaluations,
5921            contracts,
5922            catalog_items,
5923            scorecards,
5924        })
5925    }
5926
5927    /// Build a [`GroupStructure`] from the current company configuration.
5928    ///
5929    /// The first company in the configuration is treated as the ultimate parent.
5930    /// All remaining companies become wholly-owned (100 %) subsidiaries with
5931    /// [`GroupConsolidationMethod::FullConsolidation`] by default.
5932    fn build_group_structure(&self) -> datasynth_core::models::intercompany::GroupStructure {
5933        use datasynth_core::models::intercompany::{GroupStructure, SubsidiaryRelationship};
5934
5935        let parent_code = self
5936            .config
5937            .companies
5938            .first()
5939            .map(|c| c.code.clone())
5940            .unwrap_or_else(|| "PARENT".to_string());
5941
5942        let mut group = GroupStructure::new(parent_code);
5943
5944        for company in self.config.companies.iter().skip(1) {
5945            let sub =
5946                SubsidiaryRelationship::new_full(company.code.clone(), company.currency.clone());
5947            group.add_subsidiary(sub);
5948        }
5949
5950        group
5951    }
5952
5953    /// Phase 14b: Generate intercompany transactions, matching, and eliminations.
5954    fn phase_intercompany(
5955        &mut self,
5956        journal_entries: &[JournalEntry],
5957        stats: &mut EnhancedGenerationStatistics,
5958    ) -> SynthResult<IntercompanySnapshot> {
5959        // Skip if intercompany is disabled in config
5960        if !self.phase_config.generate_intercompany && !self.config.intercompany.enabled {
5961            debug!("Phase 14b: Skipped (intercompany generation disabled)");
5962            return Ok(IntercompanySnapshot::default());
5963        }
5964
5965        // Intercompany requires at least 2 companies
5966        if self.config.companies.len() < 2 {
5967            debug!(
5968                "Phase 14b: Skipped (intercompany requires 2+ companies, found {})",
5969                self.config.companies.len()
5970            );
5971            return Ok(IntercompanySnapshot::default());
5972        }
5973
5974        info!("Phase 14b: Generating Intercompany Transactions");
5975
5976        // Build the group structure early — used by ISA 600 component auditor scope
5977        // and consolidated financial statement generators downstream.
5978        let group_structure = self.build_group_structure();
5979        debug!(
5980            "Group structure built: parent={}, subsidiaries={}",
5981            group_structure.parent_entity,
5982            group_structure.subsidiaries.len()
5983        );
5984
5985        let seed = self.seed;
5986        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
5987            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
5988        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
5989
5990        // Build ownership structure from company configs
5991        // First company is treated as the parent, remaining are subsidiaries
5992        let parent_code = self.config.companies[0].code.clone();
5993        let mut ownership_structure =
5994            datasynth_core::models::intercompany::OwnershipStructure::new(parent_code.clone());
5995
5996        for (i, company) in self.config.companies.iter().skip(1).enumerate() {
5997            let relationship = datasynth_core::models::intercompany::IntercompanyRelationship::new(
5998                format!("REL{:03}", i + 1),
5999                parent_code.clone(),
6000                company.code.clone(),
6001                rust_decimal::Decimal::from(100), // Default 100% ownership
6002                start_date,
6003            );
6004            ownership_structure.add_relationship(relationship);
6005        }
6006
6007        // Convert config transfer pricing method to core model enum
6008        let tp_method = match self.config.intercompany.transfer_pricing_method {
6009            datasynth_config::schema::TransferPricingMethod::CostPlus => {
6010                datasynth_core::models::intercompany::TransferPricingMethod::CostPlus
6011            }
6012            datasynth_config::schema::TransferPricingMethod::ComparableUncontrolled => {
6013                datasynth_core::models::intercompany::TransferPricingMethod::ComparableUncontrolled
6014            }
6015            datasynth_config::schema::TransferPricingMethod::ResalePrice => {
6016                datasynth_core::models::intercompany::TransferPricingMethod::ResalePrice
6017            }
6018            datasynth_config::schema::TransferPricingMethod::TransactionalNetMargin => {
6019                datasynth_core::models::intercompany::TransferPricingMethod::TransactionalNetMargin
6020            }
6021            datasynth_config::schema::TransferPricingMethod::ProfitSplit => {
6022                datasynth_core::models::intercompany::TransferPricingMethod::ProfitSplit
6023            }
6024        };
6025
6026        // Build IC generator config from schema config
6027        let ic_currency = self
6028            .config
6029            .companies
6030            .first()
6031            .map(|c| c.currency.clone())
6032            .unwrap_or_else(|| "USD".to_string());
6033        let ic_gen_config = datasynth_generators::ICGeneratorConfig {
6034            ic_transaction_rate: self.config.intercompany.ic_transaction_rate,
6035            transfer_pricing_method: tp_method,
6036            markup_percent: rust_decimal::Decimal::from_f64_retain(
6037                self.config.intercompany.markup_percent,
6038            )
6039            .unwrap_or(rust_decimal::Decimal::from(5)),
6040            generate_matched_pairs: self.config.intercompany.generate_matched_pairs,
6041            default_currency: ic_currency,
6042            ..Default::default()
6043        };
6044
6045        // Create IC generator
6046        let mut ic_generator = datasynth_generators::ICGenerator::new(
6047            ic_gen_config,
6048            ownership_structure.clone(),
6049            seed + 50,
6050        );
6051
6052        // Generate IC transactions for the period
6053        // Use ~3 transactions per day as a reasonable default
6054        let transactions_per_day = 3;
6055        let matched_pairs = ic_generator.generate_transactions_for_period(
6056            start_date,
6057            end_date,
6058            transactions_per_day,
6059        );
6060
6061        // Generate IC source P2P/O2C documents
6062        let ic_doc_chains = ic_generator.generate_ic_document_chains(&matched_pairs);
6063        debug!(
6064            "Generated {} IC seller invoices, {} IC buyer POs",
6065            ic_doc_chains.seller_invoices.len(),
6066            ic_doc_chains.buyer_orders.len()
6067        );
6068
6069        // Generate journal entries from matched pairs
6070        let mut seller_entries = Vec::new();
6071        let mut buyer_entries = Vec::new();
6072        let fiscal_year = start_date.year();
6073
6074        for pair in &matched_pairs {
6075            let fiscal_period = pair.posting_date.month();
6076            let (seller_je, buyer_je) =
6077                ic_generator.generate_journal_entries(pair, fiscal_year, fiscal_period);
6078            seller_entries.push(seller_je);
6079            buyer_entries.push(buyer_je);
6080        }
6081
6082        // Run matching engine
6083        let matching_config = datasynth_generators::ICMatchingConfig {
6084            base_currency: self
6085                .config
6086                .companies
6087                .first()
6088                .map(|c| c.currency.clone())
6089                .unwrap_or_else(|| "USD".to_string()),
6090            ..Default::default()
6091        };
6092        let mut matching_engine = datasynth_generators::ICMatchingEngine::new(matching_config);
6093        matching_engine.load_matched_pairs(&matched_pairs);
6094        let matching_result = matching_engine.run_matching(end_date);
6095
6096        // Generate elimination entries if configured
6097        let mut elimination_entries = Vec::new();
6098        if self.config.intercompany.generate_eliminations {
6099            let elim_config = datasynth_generators::EliminationConfig {
6100                consolidation_entity: "GROUP".to_string(),
6101                base_currency: self
6102                    .config
6103                    .companies
6104                    .first()
6105                    .map(|c| c.currency.clone())
6106                    .unwrap_or_else(|| "USD".to_string()),
6107                ..Default::default()
6108            };
6109
6110            let mut elim_generator =
6111                datasynth_generators::EliminationGenerator::new(elim_config, ownership_structure);
6112
6113            let fiscal_period = format!("{}{:02}", fiscal_year, end_date.month());
6114            let all_balances: Vec<datasynth_core::models::intercompany::ICAggregatedBalance> =
6115                matching_result
6116                    .matched_balances
6117                    .iter()
6118                    .chain(matching_result.unmatched_balances.iter())
6119                    .cloned()
6120                    .collect();
6121
6122            // Build investment and equity maps from the group structure so that the
6123            // elimination generator can produce equity-investment elimination entries
6124            // (parent's investment in subsidiary vs. subsidiary's equity capital).
6125            //
6126            // investment_amounts key = "{parent}_{subsidiary}", value = net_assets × ownership_pct
6127            // equity_amounts key = subsidiary_code, value = map of equity_account → amount
6128            //   (split 10% share capital / 30% APIC / 60% retained earnings by convention)
6129            //
6130            // Net assets are derived from the journal entries using account-range heuristics:
6131            // assets (1xx) minus liabilities (2xx).  A fallback of 1_000_000 is used when
6132            // no JE data is available (IC phase runs early in the generation pipeline).
6133            let mut investment_amounts: std::collections::HashMap<String, rust_decimal::Decimal> =
6134                std::collections::HashMap::new();
6135            let mut equity_amounts: std::collections::HashMap<
6136                String,
6137                std::collections::HashMap<String, rust_decimal::Decimal>,
6138            > = std::collections::HashMap::new();
6139            {
6140                use rust_decimal::Decimal;
6141                let hundred = Decimal::from(100u32);
6142                let ten_pct = Decimal::new(10, 2); // 0.10
6143                let thirty_pct = Decimal::new(30, 2); // 0.30
6144                let sixty_pct = Decimal::new(60, 2); // 0.60
6145                let parent_code = &group_structure.parent_entity;
6146                for sub in &group_structure.subsidiaries {
6147                    let net_assets = {
6148                        let na = Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6149                        if na > Decimal::ZERO {
6150                            na
6151                        } else {
6152                            Decimal::from(1_000_000u64)
6153                        }
6154                    };
6155                    let ownership_pct = sub.ownership_percentage / hundred; // 0.0–1.0
6156                    let inv_key = format!("{}_{}", parent_code, sub.entity_code);
6157                    investment_amounts.insert(inv_key, (net_assets * ownership_pct).round_dp(2));
6158
6159                    // Split subsidiary equity into conventional components:
6160                    // 10 % share capital / 30 % APIC / 60 % retained earnings
6161                    let mut eq_map = std::collections::HashMap::new();
6162                    eq_map.insert("3100".to_string(), (net_assets * ten_pct).round_dp(2));
6163                    eq_map.insert("3200".to_string(), (net_assets * thirty_pct).round_dp(2));
6164                    eq_map.insert("3300".to_string(), (net_assets * sixty_pct).round_dp(2));
6165                    equity_amounts.insert(sub.entity_code.clone(), eq_map);
6166                }
6167            }
6168
6169            let journal = elim_generator.generate_eliminations(
6170                &fiscal_period,
6171                end_date,
6172                &all_balances,
6173                &matched_pairs,
6174                &investment_amounts,
6175                &equity_amounts,
6176            );
6177
6178            elimination_entries = journal.entries.clone();
6179        }
6180
6181        let matched_pair_count = matched_pairs.len();
6182        let elimination_entry_count = elimination_entries.len();
6183        let match_rate = matching_result.match_rate;
6184
6185        stats.ic_matched_pair_count = matched_pair_count;
6186        stats.ic_elimination_count = elimination_entry_count;
6187        stats.ic_transaction_count = seller_entries.len() + buyer_entries.len();
6188
6189        info!(
6190            "Intercompany data generated: {} matched pairs, {} JEs ({} seller + {} buyer), {} elimination entries, {:.1}% match rate",
6191            matched_pair_count,
6192            stats.ic_transaction_count,
6193            seller_entries.len(),
6194            buyer_entries.len(),
6195            elimination_entry_count,
6196            match_rate * 100.0
6197        );
6198        self.check_resources_with_log("post-intercompany")?;
6199
6200        // ----------------------------------------------------------------
6201        // NCI measurements: derive from group structure ownership percentages
6202        // ----------------------------------------------------------------
6203        let nci_measurements: Vec<datasynth_core::models::intercompany::NciMeasurement> = {
6204            use datasynth_core::models::intercompany::{GroupConsolidationMethod, NciMeasurement};
6205            use rust_decimal::Decimal;
6206
6207            let eight_pct = Decimal::new(8, 2); // 0.08
6208
6209            group_structure
6210                .subsidiaries
6211                .iter()
6212                .filter(|sub| {
6213                    sub.nci_percentage > Decimal::ZERO
6214                        && sub.consolidation_method == GroupConsolidationMethod::FullConsolidation
6215                })
6216                .map(|sub| {
6217                    // Compute net assets from actual journal entries for this subsidiary.
6218                    // Fall back to 1_000_000 when no JE data is available yet (e.g. the
6219                    // IC phase runs before the main JE batch has been populated).
6220                    let net_assets_from_jes =
6221                        Self::compute_entity_net_assets(journal_entries, &sub.entity_code);
6222
6223                    let net_assets = if net_assets_from_jes > Decimal::ZERO {
6224                        net_assets_from_jes.round_dp(2)
6225                    } else {
6226                        // Fallback: use a plausible base amount
6227                        Decimal::from(1_000_000u64)
6228                    };
6229
6230                    // Net income approximated as 8% of net assets
6231                    let net_income = (net_assets * eight_pct).round_dp(2);
6232
6233                    NciMeasurement::compute(
6234                        sub.entity_code.clone(),
6235                        sub.nci_percentage,
6236                        net_assets,
6237                        net_income,
6238                    )
6239                })
6240                .collect()
6241        };
6242
6243        if !nci_measurements.is_empty() {
6244            info!(
6245                "NCI measurements: {} subsidiaries with non-controlling interests",
6246                nci_measurements.len()
6247            );
6248        }
6249
6250        Ok(IntercompanySnapshot {
6251            group_structure: Some(group_structure),
6252            matched_pairs,
6253            seller_journal_entries: seller_entries,
6254            buyer_journal_entries: buyer_entries,
6255            elimination_entries,
6256            nci_measurements,
6257            ic_document_chains: Some(ic_doc_chains),
6258            matched_pair_count,
6259            elimination_entry_count,
6260            match_rate,
6261        })
6262    }
6263
6264    /// Phase 15: Generate bank reconciliations and financial statements.
6265    fn phase_financial_reporting(
6266        &mut self,
6267        document_flows: &DocumentFlowSnapshot,
6268        journal_entries: &[JournalEntry],
6269        coa: &Arc<ChartOfAccounts>,
6270        _hr: &HrSnapshot,
6271        _audit: &AuditSnapshot,
6272        stats: &mut EnhancedGenerationStatistics,
6273    ) -> SynthResult<FinancialReportingSnapshot> {
6274        let fs_enabled = self.phase_config.generate_financial_statements
6275            || self.config.financial_reporting.enabled;
6276        let br_enabled = self.phase_config.generate_bank_reconciliation;
6277
6278        if !fs_enabled && !br_enabled {
6279            debug!("Phase 15: Skipped (financial reporting disabled)");
6280            return Ok(FinancialReportingSnapshot::default());
6281        }
6282
6283        info!("Phase 15: Generating Financial Reporting Data");
6284
6285        let seed = self.seed;
6286        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6287            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
6288
6289        let mut financial_statements = Vec::new();
6290        let mut bank_reconciliations = Vec::new();
6291        let mut trial_balances = Vec::new();
6292        let mut segment_reports: Vec<datasynth_core::models::OperatingSegment> = Vec::new();
6293        let mut segment_reconciliations: Vec<datasynth_core::models::SegmentReconciliation> =
6294            Vec::new();
6295        // Standalone statements keyed by entity code
6296        let mut standalone_statements: std::collections::HashMap<String, Vec<FinancialStatement>> =
6297            std::collections::HashMap::new();
6298        // Consolidated statements (one per period)
6299        let mut consolidated_statements: Vec<FinancialStatement> = Vec::new();
6300        // Consolidation schedules (one per period)
6301        let mut consolidation_schedules: Vec<ConsolidationSchedule> = Vec::new();
6302
6303        // Generate financial statements from JE-derived trial balances.
6304        //
6305        // When journal entries are available, we use cumulative trial balances for
6306        // balance sheet accounts and current-period trial balances for income
6307        // statement accounts. We also track prior-period trial balances so the
6308        // generator can produce comparative amounts, and we build a proper
6309        // cash flow statement from working capital changes rather than random data.
6310        if fs_enabled {
6311            let has_journal_entries = !journal_entries.is_empty();
6312
6313            // Use FinancialStatementGenerator for balance sheet and income statement,
6314            // but build cash flow ourselves from TB data when JEs are available.
6315            let mut fs_gen = FinancialStatementGenerator::new(seed + 20);
6316            // Separate generator for consolidated statements (different seed offset)
6317            let mut cons_gen = FinancialStatementGenerator::new(seed + 21);
6318
6319            // Collect elimination JEs once (reused across periods)
6320            let elimination_entries: Vec<&JournalEntry> = journal_entries
6321                .iter()
6322                .filter(|je| je.header.is_elimination)
6323                .collect();
6324
6325            // Generate one set of statements per period, per entity
6326            for period in 0..self.config.global.period_months {
6327                let period_start = start_date + chrono::Months::new(period);
6328                let period_end =
6329                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6330                let fiscal_year = period_end.year() as u16;
6331                let fiscal_period = period_end.month() as u8;
6332                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6333
6334                // Build per-entity trial balances for this period (non-elimination JEs)
6335                // We accumulate them for the consolidation step.
6336                let mut entity_tb_map: std::collections::HashMap<
6337                    String,
6338                    std::collections::HashMap<String, rust_decimal::Decimal>,
6339                > = std::collections::HashMap::new();
6340
6341                // --- Standalone: one set of statements per company ---
6342                // v5.33: resolve once per phase. In single-shard / standalone
6343                // mode this is the primary country's framework; in group
6344                // mode each shard runs against its own entity (one company)
6345                // so the primary-country lookup is the entity's. Either way
6346                // the string drives framework-aware TB classification (Defect
6347                // A fix — German SKR / French PCG accounts no longer routed
6348                // through a US-only prefix table).
6349                let framework_str = self.resolve_framework_str();
6350                for (company_idx, company) in self.config.companies.iter().enumerate() {
6351                    let company_code = company.code.as_str();
6352                    let currency = company.currency.as_str();
6353                    // Use a unique seed offset per company to keep statements deterministic
6354                    // and distinct across companies
6355                    let company_seed_offset = 20u64 + (company_idx as u64 * 100);
6356                    let mut company_fs_gen =
6357                        FinancialStatementGenerator::new(seed + company_seed_offset);
6358
6359                    if has_journal_entries {
6360                        let tb_entries = Self::build_cumulative_trial_balance(
6361                            journal_entries,
6362                            coa,
6363                            company_code,
6364                            start_date,
6365                            period_end,
6366                            fiscal_year,
6367                            fiscal_period,
6368                            framework_str,
6369                        );
6370
6371                        // Accumulate per-entity category balances for consolidation
6372                        let entity_cat_map =
6373                            entity_tb_map.entry(company_code.to_string()).or_default();
6374                        for tb_entry in &tb_entries {
6375                            let net = tb_entry.debit_balance - tb_entry.credit_balance;
6376                            *entity_cat_map.entry(tb_entry.category.clone()).or_default() += net;
6377                        }
6378
6379                        let stmts = company_fs_gen.generate(
6380                            company_code,
6381                            currency,
6382                            &tb_entries,
6383                            period_start,
6384                            period_end,
6385                            fiscal_year,
6386                            fiscal_period,
6387                            None,
6388                            "SYS-AUTOCLOSE",
6389                        );
6390
6391                        let mut entity_stmts = Vec::new();
6392                        for stmt in stmts {
6393                            if stmt.statement_type == StatementType::CashFlowStatement {
6394                                let net_income = Self::calculate_net_income_from_tb(&tb_entries);
6395                                let cf_items = Self::build_cash_flow_from_trial_balances(
6396                                    &tb_entries,
6397                                    None,
6398                                    net_income,
6399                                );
6400                                entity_stmts.push(FinancialStatement {
6401                                    cash_flow_items: cf_items,
6402                                    ..stmt
6403                                });
6404                            } else {
6405                                entity_stmts.push(stmt);
6406                            }
6407                        }
6408
6409                        // Add to the flat financial_statements list (used by KPI/budget)
6410                        financial_statements.extend(entity_stmts.clone());
6411
6412                        // Store standalone per-entity
6413                        standalone_statements
6414                            .entry(company_code.to_string())
6415                            .or_default()
6416                            .extend(entity_stmts);
6417
6418                        // Only store trial balance for the first company in the period
6419                        // to avoid duplicates in the trial_balances list
6420                        if company_idx == 0 {
6421                            trial_balances.push(PeriodTrialBalance {
6422                                fiscal_year,
6423                                fiscal_period,
6424                                period_start,
6425                                period_end,
6426                                entries: tb_entries,
6427                                framework: framework_str.to_string(),
6428                            });
6429                        }
6430                    } else {
6431                        // Fallback: no JEs available
6432                        let tb_entries = Self::build_trial_balance_from_entries(
6433                            journal_entries,
6434                            coa,
6435                            company_code,
6436                            fiscal_year,
6437                            fiscal_period,
6438                            framework_str,
6439                        );
6440
6441                        let stmts = company_fs_gen.generate(
6442                            company_code,
6443                            currency,
6444                            &tb_entries,
6445                            period_start,
6446                            period_end,
6447                            fiscal_year,
6448                            fiscal_period,
6449                            None,
6450                            "SYS-AUTOCLOSE",
6451                        );
6452                        financial_statements.extend(stmts.clone());
6453                        standalone_statements
6454                            .entry(company_code.to_string())
6455                            .or_default()
6456                            .extend(stmts);
6457
6458                        if company_idx == 0 && !tb_entries.is_empty() {
6459                            trial_balances.push(PeriodTrialBalance {
6460                                fiscal_year,
6461                                fiscal_period,
6462                                period_start,
6463                                period_end,
6464                                entries: tb_entries,
6465                                framework: framework_str.to_string(),
6466                            });
6467                        }
6468                    }
6469                }
6470
6471                // --- Consolidated: aggregate all entities + apply eliminations ---
6472                // Use the primary (first) company's currency for the consolidated statement
6473                let group_currency = self
6474                    .config
6475                    .companies
6476                    .first()
6477                    .map(|c| c.currency.as_str())
6478                    .unwrap_or("USD");
6479
6480                // Build owned elimination entries for this period
6481                let period_eliminations: Vec<JournalEntry> = elimination_entries
6482                    .iter()
6483                    .filter(|je| {
6484                        je.header.fiscal_year == fiscal_year
6485                            && je.header.fiscal_period == fiscal_period
6486                    })
6487                    .map(|je| (*je).clone())
6488                    .collect();
6489
6490                let (cons_line_items, schedule) = ConsolidationGenerator::consolidate(
6491                    &entity_tb_map,
6492                    &period_eliminations,
6493                    &period_label,
6494                );
6495
6496                // Build a pseudo trial balance from consolidated line items for the
6497                // FinancialStatementGenerator to use (only for cash flow direction).
6498                let cons_tb: Vec<datasynth_generators::TrialBalanceEntry> = schedule
6499                    .line_items
6500                    .iter()
6501                    .map(|li| {
6502                        let net = li.post_elimination_total;
6503                        let (debit, credit) = if net >= rust_decimal::Decimal::ZERO {
6504                            (net, rust_decimal::Decimal::ZERO)
6505                        } else {
6506                            (rust_decimal::Decimal::ZERO, -net)
6507                        };
6508                        datasynth_generators::TrialBalanceEntry {
6509                            account_code: li.account_category.clone(),
6510                            account_name: li.account_category.clone(),
6511                            category: li.account_category.clone(),
6512                            debit_balance: debit,
6513                            credit_balance: credit,
6514                        }
6515                    })
6516                    .collect();
6517
6518                let mut cons_stmts = cons_gen.generate(
6519                    "GROUP",
6520                    group_currency,
6521                    &cons_tb,
6522                    period_start,
6523                    period_end,
6524                    fiscal_year,
6525                    fiscal_period,
6526                    None,
6527                    "SYS-AUTOCLOSE",
6528                );
6529
6530                // Split consolidated line items by statement type.
6531                // The consolidation generator returns BS items first, then IS items,
6532                // identified by their CONS- prefix and category.
6533                let bs_categories: &[&str] = &[
6534                    "CASH",
6535                    "RECEIVABLES",
6536                    "INVENTORY",
6537                    "FIXEDASSETS",
6538                    "PAYABLES",
6539                    "ACCRUEDLIABILITIES",
6540                    "LONGTERMDEBT",
6541                    "EQUITY",
6542                ];
6543                let (bs_items, is_items): (Vec<_>, Vec<_>) =
6544                    cons_line_items.into_iter().partition(|li| {
6545                        let upper = li.label.to_uppercase();
6546                        bs_categories.iter().any(|c| upper == *c)
6547                    });
6548
6549                for stmt in &mut cons_stmts {
6550                    stmt.is_consolidated = true;
6551                    match stmt.statement_type {
6552                        StatementType::BalanceSheet => stmt.line_items = bs_items.clone(),
6553                        StatementType::IncomeStatement => stmt.line_items = is_items.clone(),
6554                        _ => {} // CF and equity change statements keep generator output
6555                    }
6556                }
6557
6558                consolidated_statements.extend(cons_stmts);
6559                consolidation_schedules.push(schedule);
6560            }
6561
6562            // Backward compat: if only 1 company, use existing code path logic
6563            // (prior_cumulative_tb for comparative amounts). Already handled above;
6564            // the prior_ref is omitted to keep this change minimal.
6565            let _ = &mut fs_gen; // suppress unused warning
6566
6567            stats.financial_statement_count = financial_statements.len();
6568            info!(
6569                "Financial statements generated: {} standalone + {} consolidated, JE-derived: {}",
6570                stats.financial_statement_count,
6571                consolidated_statements.len(),
6572                has_journal_entries
6573            );
6574
6575            // ----------------------------------------------------------------
6576            // IFRS 8 / ASC 280: Operating Segment Reporting
6577            // ----------------------------------------------------------------
6578            // Build entity seeds from the company configuration.
6579            let entity_seeds: Vec<SegmentSeed> = self
6580                .config
6581                .companies
6582                .iter()
6583                .map(|c| SegmentSeed {
6584                    code: c.code.clone(),
6585                    name: c.name.clone(),
6586                    currency: c.currency.clone(),
6587                })
6588                .collect();
6589
6590            let mut seg_gen = SegmentGenerator::new(seed + 30);
6591
6592            // Generate one set of segment reports per period.
6593            // We extract consolidated revenue / profit / assets from the consolidated
6594            // financial statements produced above, falling back to simple sums when
6595            // no consolidated statements were generated (single-entity path).
6596            for period in 0..self.config.global.period_months {
6597                let period_end =
6598                    start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6599                let fiscal_year = period_end.year() as u16;
6600                let fiscal_period = period_end.month() as u8;
6601                let period_label = format!("{}-{:02}", fiscal_year, fiscal_period);
6602
6603                use datasynth_core::models::StatementType;
6604
6605                // Try to find consolidated income statement for this period
6606                let cons_is = consolidated_statements.iter().find(|s| {
6607                    s.fiscal_year == fiscal_year
6608                        && s.fiscal_period == fiscal_period
6609                        && s.statement_type == StatementType::IncomeStatement
6610                });
6611                let cons_bs = consolidated_statements.iter().find(|s| {
6612                    s.fiscal_year == fiscal_year
6613                        && s.fiscal_period == fiscal_period
6614                        && s.statement_type == StatementType::BalanceSheet
6615                });
6616
6617                // If consolidated statements not available fall back to the flat list
6618                let is_stmt = cons_is.or_else(|| {
6619                    financial_statements.iter().find(|s| {
6620                        s.fiscal_year == fiscal_year
6621                            && s.fiscal_period == fiscal_period
6622                            && s.statement_type == StatementType::IncomeStatement
6623                    })
6624                });
6625                let bs_stmt = cons_bs.or_else(|| {
6626                    financial_statements.iter().find(|s| {
6627                        s.fiscal_year == fiscal_year
6628                            && s.fiscal_period == fiscal_period
6629                            && s.statement_type == StatementType::BalanceSheet
6630                    })
6631                });
6632
6633                let consolidated_revenue = is_stmt
6634                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6635                    .map(|li| -li.amount) // revenue is stored as negative in IS
6636                    .unwrap_or(rust_decimal::Decimal::ZERO);
6637
6638                let consolidated_profit = is_stmt
6639                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-OI"))
6640                    .map(|li| li.amount)
6641                    .unwrap_or(rust_decimal::Decimal::ZERO);
6642
6643                let consolidated_assets = bs_stmt
6644                    .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-TA"))
6645                    .map(|li| li.amount)
6646                    .unwrap_or(rust_decimal::Decimal::ZERO);
6647
6648                // Skip periods where we have no financial data
6649                if consolidated_revenue == rust_decimal::Decimal::ZERO
6650                    && consolidated_assets == rust_decimal::Decimal::ZERO
6651                {
6652                    continue;
6653                }
6654
6655                let group_code = self
6656                    .config
6657                    .companies
6658                    .first()
6659                    .map(|c| c.code.as_str())
6660                    .unwrap_or("GROUP");
6661
6662                // Compute period depreciation from JEs with document type "CL" hitting account
6663                // 6000 (depreciation expense).  These are generated by phase_period_close.
6664                let total_depr: rust_decimal::Decimal = journal_entries
6665                    .iter()
6666                    .filter(|je| je.header.document_type == "CL")
6667                    .flat_map(|je| je.lines.iter())
6668                    .filter(|l| l.gl_account.starts_with("6000"))
6669                    .map(|l| l.debit_amount)
6670                    .fold(rust_decimal::Decimal::ZERO, |a, v| a + v);
6671                let depr_param = if total_depr > rust_decimal::Decimal::ZERO {
6672                    Some(total_depr)
6673                } else {
6674                    None
6675                };
6676
6677                let (segs, recon) = seg_gen.generate(
6678                    group_code,
6679                    &period_label,
6680                    consolidated_revenue,
6681                    consolidated_profit,
6682                    consolidated_assets,
6683                    &entity_seeds,
6684                    depr_param,
6685                );
6686                segment_reports.extend(segs);
6687                segment_reconciliations.push(recon);
6688            }
6689
6690            info!(
6691                "Segment reports generated: {} segments, {} reconciliations",
6692                segment_reports.len(),
6693                segment_reconciliations.len()
6694            );
6695        }
6696
6697        // Generate bank reconciliations from payment data
6698        if br_enabled && !document_flows.payments.is_empty() {
6699            let employee_ids: Vec<String> = self
6700                .master_data
6701                .employees
6702                .iter()
6703                .map(|e| e.employee_id.clone())
6704                .collect();
6705            let mut br_gen =
6706                BankReconciliationGenerator::new(seed + 25).with_employee_pool(employee_ids);
6707
6708            // Group payments by company code and period
6709            for company in &self.config.companies {
6710                let company_payments: Vec<PaymentReference> = document_flows
6711                    .payments
6712                    .iter()
6713                    .filter(|p| p.header.company_code == company.code)
6714                    .map(|p| PaymentReference {
6715                        id: p.header.document_id.clone(),
6716                        amount: if p.is_vendor { p.amount } else { -p.amount },
6717                        date: p.header.document_date,
6718                        reference: p
6719                            .check_number
6720                            .clone()
6721                            .or_else(|| p.wire_reference.clone())
6722                            .unwrap_or_else(|| p.header.document_id.clone()),
6723                    })
6724                    .collect();
6725
6726                if company_payments.is_empty() {
6727                    continue;
6728                }
6729
6730                let bank_account_id = format!("{}-MAIN", company.code);
6731
6732                // Generate one reconciliation per period
6733                for period in 0..self.config.global.period_months {
6734                    let period_start = start_date + chrono::Months::new(period);
6735                    let period_end =
6736                        start_date + chrono::Months::new(period + 1) - chrono::Days::new(1);
6737
6738                    let period_payments: Vec<PaymentReference> = company_payments
6739                        .iter()
6740                        .filter(|p| p.date >= period_start && p.date <= period_end)
6741                        .cloned()
6742                        .collect();
6743
6744                    let recon = br_gen.generate(
6745                        &company.code,
6746                        &bank_account_id,
6747                        period_start,
6748                        period_end,
6749                        &company.currency,
6750                        &period_payments,
6751                    );
6752                    bank_reconciliations.push(recon);
6753                }
6754            }
6755            info!(
6756                "Bank reconciliations generated: {} reconciliations",
6757                bank_reconciliations.len()
6758            );
6759        }
6760
6761        stats.bank_reconciliation_count = bank_reconciliations.len();
6762        self.check_resources_with_log("post-financial-reporting")?;
6763
6764        if !trial_balances.is_empty() {
6765            info!(
6766                "Period-close trial balances captured: {} periods",
6767                trial_balances.len()
6768            );
6769        }
6770
6771        // Notes to financial statements are generated in a separate post-processing step
6772        // (generate_notes_to_financial_statements) called after accounting_standards and tax
6773        // phases have completed, so that deferred tax and provision data can be wired in.
6774        let notes_to_financial_statements = Vec::new();
6775
6776        Ok(FinancialReportingSnapshot {
6777            financial_statements,
6778            standalone_statements,
6779            consolidated_statements,
6780            consolidation_schedules,
6781            bank_reconciliations,
6782            trial_balances,
6783            segment_reports,
6784            segment_reconciliations,
6785            notes_to_financial_statements,
6786        })
6787    }
6788
6789    /// Populate notes to financial statements using fully-resolved snapshots.
6790    ///
6791    /// This runs *after* `phase_accounting_standards` and `phase_tax_generation` so that
6792    /// deferred-tax balances (IAS 12 / ASC 740) and provision totals (IAS 37 / ASC 450)
6793    /// can be wired into the notes context.  The method mutates
6794    /// `financial_reporting.notes_to_financial_statements` in-place.
6795    fn generate_notes_to_financial_statements(
6796        &self,
6797        financial_reporting: &mut FinancialReportingSnapshot,
6798        accounting_standards: &AccountingStandardsSnapshot,
6799        tax: &TaxSnapshot,
6800        hr: &HrSnapshot,
6801        audit: &AuditSnapshot,
6802        treasury: &TreasurySnapshot,
6803    ) {
6804        use datasynth_config::schema::AccountingFrameworkConfig;
6805        use datasynth_core::models::StatementType;
6806        use datasynth_generators::period_close::notes_generator::{
6807            EnhancedNotesContext, NotesGenerator, NotesGeneratorContext,
6808        };
6809
6810        let seed = self.seed;
6811        let start_date = match NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
6812        {
6813            Ok(d) => d,
6814            Err(_) => return,
6815        };
6816
6817        let mut notes_gen = NotesGenerator::new(seed + 4235);
6818
6819        for company in &self.config.companies {
6820            let last_period_end = start_date
6821                + chrono::Months::new(self.config.global.period_months)
6822                - chrono::Days::new(1);
6823            let fiscal_year = last_period_end.year() as u16;
6824
6825            // Extract relevant amounts from the already-generated financial statements
6826            let entity_is = financial_reporting
6827                .standalone_statements
6828                .get(&company.code)
6829                .and_then(|stmts| {
6830                    stmts.iter().find(|s| {
6831                        s.fiscal_year == fiscal_year
6832                            && s.statement_type == StatementType::IncomeStatement
6833                    })
6834                });
6835            let entity_bs = financial_reporting
6836                .standalone_statements
6837                .get(&company.code)
6838                .and_then(|stmts| {
6839                    stmts.iter().find(|s| {
6840                        s.fiscal_year == fiscal_year
6841                            && s.statement_type == StatementType::BalanceSheet
6842                    })
6843                });
6844
6845            // IS-REV is stored as positive (Fix 12 — credit-normal accounts negated at IS build time)
6846            let revenue_amount = entity_is
6847                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "IS-REV"))
6848                .map(|li| li.amount);
6849            let ppe_gross = entity_bs
6850                .and_then(|s| s.line_items.iter().find(|li| li.line_code == "BS-FA"))
6851                .map(|li| li.amount);
6852
6853            let framework = match self
6854                .config
6855                .accounting_standards
6856                .framework
6857                .unwrap_or_default()
6858            {
6859                AccountingFrameworkConfig::Ifrs | AccountingFrameworkConfig::DualReporting => {
6860                    "IFRS".to_string()
6861                }
6862                _ => "US GAAP".to_string(),
6863            };
6864
6865            // ---- Deferred tax (IAS 12 / ASC 740) ----
6866            // Sum closing DTA and DTL from rollforward entries for this entity.
6867            let (entity_dta, entity_dtl) = {
6868                let mut dta = rust_decimal::Decimal::ZERO;
6869                let mut dtl = rust_decimal::Decimal::ZERO;
6870                for rf in &tax.deferred_tax.rollforwards {
6871                    if rf.entity_code == company.code {
6872                        dta += rf.closing_dta;
6873                        dtl += rf.closing_dtl;
6874                    }
6875                }
6876                (
6877                    if dta > rust_decimal::Decimal::ZERO {
6878                        Some(dta)
6879                    } else {
6880                        None
6881                    },
6882                    if dtl > rust_decimal::Decimal::ZERO {
6883                        Some(dtl)
6884                    } else {
6885                        None
6886                    },
6887                )
6888            };
6889
6890            // ---- Provisions (IAS 37 / ASC 450) ----
6891            // Filter provisions to this entity; sum best_estimate amounts.
6892            let entity_provisions: Vec<_> = accounting_standards
6893                .provisions
6894                .iter()
6895                .filter(|p| p.entity_code == company.code)
6896                .collect();
6897            let provision_count = entity_provisions.len();
6898            let total_provisions = if provision_count > 0 {
6899                Some(entity_provisions.iter().map(|p| p.best_estimate).sum())
6900            } else {
6901                None
6902            };
6903
6904            // ---- Pension data from HR snapshot ----
6905            let entity_pension_plan_count = hr
6906                .pension_plans
6907                .iter()
6908                .filter(|p| p.entity_code == company.code)
6909                .count();
6910            let entity_total_dbo: Option<rust_decimal::Decimal> = {
6911                let sum: rust_decimal::Decimal = hr
6912                    .pension_disclosures
6913                    .iter()
6914                    .filter(|d| {
6915                        hr.pension_plans
6916                            .iter()
6917                            .any(|p| p.id == d.plan_id && p.entity_code == company.code)
6918                    })
6919                    .map(|d| d.net_pension_liability)
6920                    .sum();
6921                let plan_assets_sum: rust_decimal::Decimal = hr
6922                    .pension_plan_assets
6923                    .iter()
6924                    .filter(|a| {
6925                        hr.pension_plans
6926                            .iter()
6927                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6928                    })
6929                    .map(|a| a.fair_value_closing)
6930                    .sum();
6931                if entity_pension_plan_count > 0 {
6932                    Some(sum + plan_assets_sum)
6933                } else {
6934                    None
6935                }
6936            };
6937            let entity_total_plan_assets: Option<rust_decimal::Decimal> = {
6938                let sum: rust_decimal::Decimal = hr
6939                    .pension_plan_assets
6940                    .iter()
6941                    .filter(|a| {
6942                        hr.pension_plans
6943                            .iter()
6944                            .any(|p| p.id == a.plan_id && p.entity_code == company.code)
6945                    })
6946                    .map(|a| a.fair_value_closing)
6947                    .sum();
6948                if entity_pension_plan_count > 0 {
6949                    Some(sum)
6950                } else {
6951                    None
6952                }
6953            };
6954
6955            // ---- Audit data: related parties + subsequent events ----
6956            // Audit snapshot covers all entities; use total counts (common case = single entity).
6957            let rp_count = audit.related_party_transactions.len();
6958            let se_count = audit.subsequent_events.len();
6959            let adjusting_count = audit
6960                .subsequent_events
6961                .iter()
6962                .filter(|e| {
6963                    matches!(
6964                        e.classification,
6965                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
6966                    )
6967                })
6968                .count();
6969
6970            let ctx = NotesGeneratorContext {
6971                entity_code: company.code.clone(),
6972                framework,
6973                period: format!("FY{}", fiscal_year),
6974                period_end: last_period_end,
6975                currency: company.currency.clone(),
6976                revenue_amount,
6977                total_ppe_gross: ppe_gross,
6978                statutory_tax_rate: Some(rust_decimal::Decimal::new(21, 2)),
6979                // Deferred tax from tax snapshot (IAS 12 / ASC 740)
6980                deferred_tax_asset: entity_dta,
6981                deferred_tax_liability: entity_dtl,
6982                // Provisions from accounting_standards snapshot (IAS 37 / ASC 450)
6983                provision_count,
6984                total_provisions,
6985                // Pension data from HR snapshot
6986                pension_plan_count: entity_pension_plan_count,
6987                total_dbo: entity_total_dbo,
6988                total_plan_assets: entity_total_plan_assets,
6989                // Audit data
6990                related_party_transaction_count: rp_count,
6991                subsequent_event_count: se_count,
6992                adjusting_event_count: adjusting_count,
6993                ..NotesGeneratorContext::default()
6994            };
6995
6996            let entity_notes = notes_gen.generate(&ctx);
6997            let standard_note_count = entity_notes.len() as u32;
6998            info!(
6999                "Notes to FS for {}: {} notes generated (DTA={:?}, DTL={:?}, provisions={})",
7000                company.code, standard_note_count, entity_dta, entity_dtl, provision_count,
7001            );
7002            financial_reporting
7003                .notes_to_financial_statements
7004                .extend(entity_notes);
7005
7006            // v2.4: Enhanced notes backed by treasury, manufacturing, and provision data
7007            let debt_instruments: Vec<(String, rust_decimal::Decimal, String)> = treasury
7008                .debt_instruments
7009                .iter()
7010                .filter(|d| d.entity_id == company.code)
7011                .map(|d| {
7012                    (
7013                        format!("{:?}", d.instrument_type),
7014                        d.principal,
7015                        d.maturity_date.to_string(),
7016                    )
7017                })
7018                .collect();
7019
7020            let hedge_count = treasury.hedge_relationships.len();
7021            let effective_hedges = treasury
7022                .hedge_relationships
7023                .iter()
7024                .filter(|h| h.is_effective)
7025                .count();
7026            let total_notional: rust_decimal::Decimal = treasury
7027                .hedging_instruments
7028                .iter()
7029                .map(|h| h.notional_amount)
7030                .sum();
7031            let total_fair_value: rust_decimal::Decimal = treasury
7032                .hedging_instruments
7033                .iter()
7034                .map(|h| h.fair_value)
7035                .sum();
7036
7037            // Join provision_movements with provisions to get entity/type info
7038            let entity_provision_ids: std::collections::HashSet<&str> = accounting_standards
7039                .provisions
7040                .iter()
7041                .filter(|p| p.entity_code == company.code)
7042                .map(|p| p.id.as_str())
7043                .collect();
7044            let provision_movements: Vec<(
7045                String,
7046                rust_decimal::Decimal,
7047                rust_decimal::Decimal,
7048                rust_decimal::Decimal,
7049            )> = accounting_standards
7050                .provision_movements
7051                .iter()
7052                .filter(|m| entity_provision_ids.contains(m.provision_id.as_str()))
7053                .map(|m| {
7054                    let prov_type = accounting_standards
7055                        .provisions
7056                        .iter()
7057                        .find(|p| p.id == m.provision_id)
7058                        .map(|p| format!("{:?}", p.provision_type))
7059                        .unwrap_or_else(|| "Unknown".to_string());
7060                    (prov_type, m.opening, m.additions, m.closing)
7061                })
7062                .collect();
7063
7064            let enhanced_ctx = EnhancedNotesContext {
7065                entity_code: company.code.clone(),
7066                period: format!("FY{}", fiscal_year),
7067                currency: company.currency.clone(),
7068                // Inventory breakdown: best-effort using zero (would need balance tracker)
7069                finished_goods_value: rust_decimal::Decimal::ZERO,
7070                wip_value: rust_decimal::Decimal::ZERO,
7071                raw_materials_value: rust_decimal::Decimal::ZERO,
7072                debt_instruments,
7073                hedge_count,
7074                effective_hedges,
7075                total_notional,
7076                total_fair_value,
7077                provision_movements,
7078            };
7079
7080            let enhanced_notes =
7081                notes_gen.generate_enhanced_notes(&enhanced_ctx, standard_note_count + 1);
7082            if !enhanced_notes.is_empty() {
7083                info!(
7084                    "Enhanced notes for {}: {} supplementary notes (debt={}, hedges={}, provisions={})",
7085                    company.code,
7086                    enhanced_notes.len(),
7087                    enhanced_ctx.debt_instruments.len(),
7088                    hedge_count,
7089                    enhanced_ctx.provision_movements.len(),
7090                );
7091                financial_reporting
7092                    .notes_to_financial_statements
7093                    .extend(enhanced_notes);
7094            }
7095        }
7096    }
7097
7098    /// Build trial balance entries by aggregating actual journal entry debits and credits per account.
7099    ///
7100    /// This ensures the trial balance is coherent with the JEs: every debit and credit
7101    /// posted in the journal entries flows through to the trial balance, using the real
7102    /// GL account numbers from the CoA.
7103    fn build_trial_balance_from_entries(
7104        journal_entries: &[JournalEntry],
7105        coa: &ChartOfAccounts,
7106        company_code: &str,
7107        fiscal_year: u16,
7108        fiscal_period: u8,
7109        framework: &str,
7110    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7111        use rust_decimal::Decimal;
7112
7113        // Accumulate total debits and credits per GL account
7114        let mut account_debits: HashMap<String, Decimal> = HashMap::new();
7115        let mut account_credits: HashMap<String, Decimal> = HashMap::new();
7116
7117        for je in journal_entries {
7118            // Filter to matching company, fiscal year, and period
7119            if je.header.company_code != company_code
7120                || je.header.fiscal_year != fiscal_year
7121                || je.header.fiscal_period != fiscal_period
7122            {
7123                continue;
7124            }
7125
7126            for line in &je.lines {
7127                let acct = &line.gl_account;
7128                *account_debits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.debit_amount;
7129                *account_credits.entry(acct.clone()).or_insert(Decimal::ZERO) += line.credit_amount;
7130            }
7131        }
7132
7133        // Build a TrialBalanceEntry for each account that had activity
7134        let mut all_accounts: Vec<&String> = account_debits
7135            .keys()
7136            .chain(account_credits.keys())
7137            .collect::<std::collections::HashSet<_>>()
7138            .into_iter()
7139            .collect();
7140        all_accounts.sort();
7141
7142        let mut entries = Vec::new();
7143
7144        for acct_number in all_accounts {
7145            let debit = account_debits
7146                .get(acct_number)
7147                .copied()
7148                .unwrap_or(Decimal::ZERO);
7149            let credit = account_credits
7150                .get(acct_number)
7151                .copied()
7152                .unwrap_or(Decimal::ZERO);
7153
7154            if debit.is_zero() && credit.is_zero() {
7155                continue;
7156            }
7157
7158            // Look up account name from CoA, fall back to "Account {code}"
7159            let account_name = coa
7160                .get_account(acct_number)
7161                .map(|gl| gl.short_description.clone())
7162                .unwrap_or_else(|| format!("Account {acct_number}"));
7163
7164            // Map account code prefix to the category strings expected by
7165            // FinancialStatementGenerator (Cash, Receivables, Inventory,
7166            // FixedAssets, Payables, AccruedLiabilities, Revenue, CostOfSales,
7167            // OperatingExpenses).
7168            let category = Self::category_from_account_code(acct_number, framework);
7169
7170            entries.push(datasynth_generators::TrialBalanceEntry {
7171                account_code: acct_number.clone(),
7172                account_name,
7173                category,
7174                debit_balance: debit,
7175                credit_balance: credit,
7176            });
7177        }
7178
7179        entries
7180    }
7181
7182    /// Build a cumulative trial balance by aggregating all JEs from the start up to
7183    /// (and including) the given period end date.
7184    ///
7185    /// Balance sheet accounts (assets, liabilities, equity) use cumulative balances
7186    /// while income statement accounts (revenue, expenses) show only the current period.
7187    /// The two are merged into a single Vec for the FinancialStatementGenerator.
7188    #[allow(clippy::too_many_arguments)]
7189    fn build_cumulative_trial_balance(
7190        journal_entries: &[JournalEntry],
7191        coa: &ChartOfAccounts,
7192        company_code: &str,
7193        start_date: NaiveDate,
7194        period_end: NaiveDate,
7195        fiscal_year: u16,
7196        fiscal_period: u8,
7197        framework: &str,
7198    ) -> Vec<datasynth_generators::TrialBalanceEntry> {
7199        use rust_decimal::Decimal;
7200
7201        // Accumulate debits/credits for balance sheet accounts (cumulative from start)
7202        let mut bs_debits: HashMap<String, Decimal> = HashMap::new();
7203        let mut bs_credits: HashMap<String, Decimal> = HashMap::new();
7204
7205        // Accumulate debits/credits for income statement accounts (current period only)
7206        let mut is_debits: HashMap<String, Decimal> = HashMap::new();
7207        let mut is_credits: HashMap<String, Decimal> = HashMap::new();
7208
7209        for je in journal_entries {
7210            if je.header.company_code != company_code {
7211                continue;
7212            }
7213
7214            for line in &je.lines {
7215                let acct = &line.gl_account;
7216                // Framework-aware BS bucketing — fixes the Defect A
7217                // mis-classification where US-style prefix tables routed
7218                // SKR/PCG balance-sheet accounts through the P&L bucket
7219                // (or vice versa), giving the resulting TB an asymmetric
7220                // time window with no integrity invariant left to test.
7221                let is_bs_account = Self::is_balance_sheet_account(acct, framework);
7222
7223                if is_bs_account {
7224                    // Balance sheet: accumulate from start through period_end
7225                    if je.header.document_date <= period_end
7226                        && je.header.document_date >= start_date
7227                    {
7228                        *bs_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7229                            line.debit_amount;
7230                        *bs_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7231                            line.credit_amount;
7232                    }
7233                } else {
7234                    // Income statement: current period only
7235                    if je.header.fiscal_year == fiscal_year
7236                        && je.header.fiscal_period == fiscal_period
7237                    {
7238                        *is_debits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7239                            line.debit_amount;
7240                        *is_credits.entry(acct.clone()).or_insert(Decimal::ZERO) +=
7241                            line.credit_amount;
7242                    }
7243                }
7244            }
7245        }
7246
7247        // Merge all accounts
7248        let mut all_accounts: std::collections::HashSet<String> = std::collections::HashSet::new();
7249        all_accounts.extend(bs_debits.keys().cloned());
7250        all_accounts.extend(bs_credits.keys().cloned());
7251        all_accounts.extend(is_debits.keys().cloned());
7252        all_accounts.extend(is_credits.keys().cloned());
7253
7254        let mut sorted_accounts: Vec<String> = all_accounts.into_iter().collect();
7255        sorted_accounts.sort();
7256
7257        let mut entries = Vec::new();
7258
7259        for acct_number in &sorted_accounts {
7260            let category = Self::category_from_account_code(acct_number, framework);
7261            let is_bs_account = Self::is_balance_sheet_account(acct_number, framework);
7262
7263            let (debit, credit) = if is_bs_account {
7264                (
7265                    bs_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7266                    bs_credits
7267                        .get(acct_number)
7268                        .copied()
7269                        .unwrap_or(Decimal::ZERO),
7270                )
7271            } else {
7272                (
7273                    is_debits.get(acct_number).copied().unwrap_or(Decimal::ZERO),
7274                    is_credits
7275                        .get(acct_number)
7276                        .copied()
7277                        .unwrap_or(Decimal::ZERO),
7278                )
7279            };
7280
7281            if debit.is_zero() && credit.is_zero() {
7282                continue;
7283            }
7284
7285            let account_name = coa
7286                .get_account(acct_number)
7287                .map(|gl| gl.short_description.clone())
7288                .unwrap_or_else(|| format!("Account {acct_number}"));
7289
7290            entries.push(datasynth_generators::TrialBalanceEntry {
7291                account_code: acct_number.clone(),
7292                account_name,
7293                category,
7294                debit_balance: debit,
7295                credit_balance: credit,
7296            });
7297        }
7298
7299        entries
7300    }
7301
7302    /// Build a JE-derived cash flow statement using the indirect method.
7303    ///
7304    /// Compares current and prior cumulative trial balances to derive working capital
7305    /// changes, producing a coherent cash flow statement tied to actual journal entries.
7306    fn build_cash_flow_from_trial_balances(
7307        current_tb: &[datasynth_generators::TrialBalanceEntry],
7308        prior_tb: Option<&[datasynth_generators::TrialBalanceEntry]>,
7309        net_income: rust_decimal::Decimal,
7310    ) -> Vec<CashFlowItem> {
7311        use rust_decimal::Decimal;
7312
7313        // Helper: aggregate a TB by category and return net (debit - credit)
7314        let aggregate =
7315            |tb: &[datasynth_generators::TrialBalanceEntry]| -> HashMap<String, Decimal> {
7316                let mut map: HashMap<String, Decimal> = HashMap::new();
7317                for entry in tb {
7318                    let net = entry.debit_balance - entry.credit_balance;
7319                    *map.entry(entry.category.clone()).or_default() += net;
7320                }
7321                map
7322            };
7323
7324        let current = aggregate(current_tb);
7325        let prior = prior_tb.map(aggregate);
7326
7327        // Get balance for a category, defaulting to zero
7328        let get = |map: &HashMap<String, Decimal>, key: &str| -> Decimal {
7329            *map.get(key).unwrap_or(&Decimal::ZERO)
7330        };
7331
7332        // Compute change: current - prior (or current if no prior)
7333        let change = |key: &str| -> Decimal {
7334            let curr = get(&current, key);
7335            match &prior {
7336                Some(p) => curr - get(p, key),
7337                None => curr,
7338            }
7339        };
7340
7341        // Operating activities (indirect method)
7342        // Depreciation add-back: approximate from FixedAssets decrease
7343        let fixed_asset_change = change("FixedAssets");
7344        let depreciation_addback = if fixed_asset_change < Decimal::ZERO {
7345            -fixed_asset_change
7346        } else {
7347            Decimal::ZERO
7348        };
7349
7350        // Working capital changes (increase in assets = cash outflow, increase in liabilities = cash inflow)
7351        let ar_change = change("Receivables");
7352        let inventory_change = change("Inventory");
7353        // AP and AccruedLiabilities are credit-normal: negative net means larger balance = cash inflow
7354        let ap_change = change("Payables");
7355        let accrued_change = change("AccruedLiabilities");
7356
7357        let operating_cf = net_income + depreciation_addback - ar_change - inventory_change
7358            + (-ap_change)
7359            + (-accrued_change);
7360
7361        // Investing activities
7362        let capex = if fixed_asset_change > Decimal::ZERO {
7363            -fixed_asset_change
7364        } else {
7365            Decimal::ZERO
7366        };
7367        let investing_cf = capex;
7368
7369        // Financing activities
7370        let debt_change = -change("LongTermDebt");
7371        let equity_change = -change("Equity");
7372        let financing_cf = debt_change + equity_change;
7373
7374        let net_change = operating_cf + investing_cf + financing_cf;
7375
7376        vec![
7377            CashFlowItem {
7378                item_code: "CF-NI".to_string(),
7379                label: "Net Income".to_string(),
7380                category: CashFlowCategory::Operating,
7381                amount: net_income,
7382                amount_prior: None,
7383                sort_order: 1,
7384                is_total: false,
7385            },
7386            CashFlowItem {
7387                item_code: "CF-DEP".to_string(),
7388                label: "Depreciation & Amortization".to_string(),
7389                category: CashFlowCategory::Operating,
7390                amount: depreciation_addback,
7391                amount_prior: None,
7392                sort_order: 2,
7393                is_total: false,
7394            },
7395            CashFlowItem {
7396                item_code: "CF-AR".to_string(),
7397                label: "Change in Accounts Receivable".to_string(),
7398                category: CashFlowCategory::Operating,
7399                amount: -ar_change,
7400                amount_prior: None,
7401                sort_order: 3,
7402                is_total: false,
7403            },
7404            CashFlowItem {
7405                item_code: "CF-AP".to_string(),
7406                label: "Change in Accounts Payable".to_string(),
7407                category: CashFlowCategory::Operating,
7408                amount: -ap_change,
7409                amount_prior: None,
7410                sort_order: 4,
7411                is_total: false,
7412            },
7413            CashFlowItem {
7414                item_code: "CF-INV".to_string(),
7415                label: "Change in Inventory".to_string(),
7416                category: CashFlowCategory::Operating,
7417                amount: -inventory_change,
7418                amount_prior: None,
7419                sort_order: 5,
7420                is_total: false,
7421            },
7422            CashFlowItem {
7423                item_code: "CF-OP".to_string(),
7424                label: "Net Cash from Operating Activities".to_string(),
7425                category: CashFlowCategory::Operating,
7426                amount: operating_cf,
7427                amount_prior: None,
7428                sort_order: 6,
7429                is_total: true,
7430            },
7431            CashFlowItem {
7432                item_code: "CF-CAPEX".to_string(),
7433                label: "Capital Expenditures".to_string(),
7434                category: CashFlowCategory::Investing,
7435                amount: capex,
7436                amount_prior: None,
7437                sort_order: 7,
7438                is_total: false,
7439            },
7440            CashFlowItem {
7441                item_code: "CF-INV-T".to_string(),
7442                label: "Net Cash from Investing Activities".to_string(),
7443                category: CashFlowCategory::Investing,
7444                amount: investing_cf,
7445                amount_prior: None,
7446                sort_order: 8,
7447                is_total: true,
7448            },
7449            CashFlowItem {
7450                item_code: "CF-DEBT".to_string(),
7451                label: "Net Borrowings / (Repayments)".to_string(),
7452                category: CashFlowCategory::Financing,
7453                amount: debt_change,
7454                amount_prior: None,
7455                sort_order: 9,
7456                is_total: false,
7457            },
7458            CashFlowItem {
7459                item_code: "CF-EQ".to_string(),
7460                label: "Equity Changes".to_string(),
7461                category: CashFlowCategory::Financing,
7462                amount: equity_change,
7463                amount_prior: None,
7464                sort_order: 10,
7465                is_total: false,
7466            },
7467            CashFlowItem {
7468                item_code: "CF-FIN-T".to_string(),
7469                label: "Net Cash from Financing Activities".to_string(),
7470                category: CashFlowCategory::Financing,
7471                amount: financing_cf,
7472                amount_prior: None,
7473                sort_order: 11,
7474                is_total: true,
7475            },
7476            CashFlowItem {
7477                item_code: "CF-NET".to_string(),
7478                label: "Net Change in Cash".to_string(),
7479                category: CashFlowCategory::Operating,
7480                amount: net_change,
7481                amount_prior: None,
7482                sort_order: 12,
7483                is_total: true,
7484            },
7485        ]
7486    }
7487
7488    /// Calculate net income from a set of trial balance entries.
7489    ///
7490    /// Revenue is credit-normal (negative net = positive revenue), expenses are debit-normal.
7491    fn calculate_net_income_from_tb(
7492        tb: &[datasynth_generators::TrialBalanceEntry],
7493    ) -> rust_decimal::Decimal {
7494        use rust_decimal::Decimal;
7495
7496        let mut aggregated: HashMap<String, Decimal> = HashMap::new();
7497        for entry in tb {
7498            let net = entry.debit_balance - entry.credit_balance;
7499            *aggregated.entry(entry.category.clone()).or_default() += net;
7500        }
7501
7502        let revenue = *aggregated.get("Revenue").unwrap_or(&Decimal::ZERO);
7503        let cogs = *aggregated.get("CostOfSales").unwrap_or(&Decimal::ZERO);
7504        let opex = *aggregated
7505            .get("OperatingExpenses")
7506            .unwrap_or(&Decimal::ZERO);
7507        let other_income = *aggregated.get("OtherIncome").unwrap_or(&Decimal::ZERO);
7508        let other_expenses = *aggregated.get("OtherExpenses").unwrap_or(&Decimal::ZERO);
7509
7510        // revenue is negative (credit-normal), expenses are positive (debit-normal)
7511        // other_income is typically negative (credit), other_expenses is typically positive
7512        let operating_income = revenue - cogs - opex - other_expenses - other_income;
7513        let tax_rate = Decimal::new(25, 2); // 0.25
7514        let tax = operating_income * tax_rate;
7515        operating_income - tax
7516    }
7517
7518    /// Map a GL account code to the category string expected by FinancialStatementGenerator.
7519    ///
7520    /// Uses the first two digits of the account code to classify into the categories
7521    /// that the financial statement generator aggregates on: Cash, Receivables, Inventory,
7522    /// FixedAssets, Payables, AccruedLiabilities, LongTermDebt, Equity, Revenue, CostOfSales,
7523    /// OperatingExpenses, OtherIncome, OtherExpenses.
7524    /// Map an account code to the orchestrator's 13-bucket category string
7525    /// (`"Cash"` / `"Receivables"` / `"Inventory"` / `"FixedAssets"` /
7526    /// `"Payables"` / `"AccruedLiabilities"` / `"LongTermDebt"` /
7527    /// `"Equity"` / `"Revenue"` / `"CostOfSales"` / `"OperatingExpenses"`
7528    /// / `"OtherIncome"` / `"OtherExpenses"`).
7529    ///
7530    /// `framework` controls which numbering convention is applied:
7531    ///
7532    /// - `"us_gaap"` / `"ifrs"` / `"dual_reporting"` — US-style 4-digit
7533    ///   chart (1xxx assets, 2xxx liabilities, 3xxx equity, 4xxx revenue,
7534    ///   5xxx COGS, 6xxx OpEx, 7xxx other income, 8xxx other expense).
7535    /// - `"french_gaap"` — French PCG (1 = capital/liabilities, 2 = fixed
7536    ///   assets, 3 = inventory, 4 = third parties, 5 = cash, 6 = expenses,
7537    ///   7 = revenue).
7538    /// - `"german_gaap"` / `"hgb"` — German SKR04 (0 = fixed assets,
7539    ///   1 = current assets, 2 = equity, 3 = liabilities, 4 = revenue,
7540    ///   5 = COGS, 6 = OpEx, 7 = financial, 8 = tax/extraordinary).
7541    ///
7542    /// Unknown frameworks fall back to US-style.
7543    fn category_from_account_code(code: &str, framework: &str) -> String {
7544        match framework {
7545            "german_gaap" | "GermanGaap" | "hgb" => Self::skr_category(code),
7546            "french_gaap" | "FrenchGaap" => Self::pcg_category(code),
7547            _ => Self::us_gaap_category(code),
7548        }
7549        .to_string()
7550    }
7551
7552    fn us_gaap_category(code: &str) -> &'static str {
7553        let prefix: String = code.chars().take(2).collect();
7554        match prefix.as_str() {
7555            "10" => "Cash",
7556            "11" => "Receivables",
7557            "12" | "13" | "14" => "Inventory",
7558            "15" | "16" | "17" | "18" | "19" => "FixedAssets",
7559            "20" => "Payables",
7560            "21" | "22" | "23" | "24" => "AccruedLiabilities",
7561            "25" | "26" | "27" | "28" | "29" => "LongTermDebt",
7562            "30" | "31" | "32" | "33" | "34" | "35" | "36" | "37" | "38" | "39" => "Equity",
7563            "40" | "41" | "42" | "43" | "44" => "Revenue",
7564            "50" | "51" | "52" => "CostOfSales",
7565            "60" | "61" | "62" | "63" | "64" | "65" | "66" | "67" | "68" | "69" => {
7566                "OperatingExpenses"
7567            }
7568            "70" | "71" | "72" | "73" | "74" => "OtherIncome",
7569            "80" | "81" | "82" | "83" | "84" | "85" | "86" | "87" | "88" | "89" => "OtherExpenses",
7570            _ => "OperatingExpenses",
7571        }
7572    }
7573
7574    /// SKR04 (German GAAP) prefix → orchestrator category.
7575    ///
7576    /// 0 = fixed assets, 1 = current assets (10-12 cash, 13-14 receivables,
7577    /// 15-19 inventory), 2 = equity, 3 = liabilities (3-31 payables,
7578    /// 32-37 accrued, 38-39 long-term debt), 4 = revenue, 5 = COGS,
7579    /// 6 = OpEx, 7 = financial income, 8 = tax/extraordinary expense.
7580    fn skr_category(code: &str) -> &'static str {
7581        let first = code.chars().next().and_then(|c| c.to_digit(10));
7582        let prefix: String = code.chars().take(2).collect();
7583        match first {
7584            Some(0) => "FixedAssets",
7585            Some(1) => match prefix.as_str() {
7586                "10" | "11" | "12" => "Cash",
7587                "13" | "14" => "Receivables",
7588                _ => "Inventory",
7589            },
7590            Some(2) => "Equity",
7591            Some(3) => match prefix.as_str() {
7592                "30" | "31" => "Payables",
7593                "32" | "33" | "34" | "35" | "36" | "37" => "AccruedLiabilities",
7594                _ => "LongTermDebt",
7595            },
7596            Some(4) => "Revenue",
7597            Some(5) => "CostOfSales",
7598            Some(6) => "OperatingExpenses",
7599            Some(7) => "OtherIncome",
7600            Some(8) => "OtherExpenses",
7601            _ => "OperatingExpenses",
7602        }
7603    }
7604
7605    /// French PCG prefix → orchestrator category.
7606    ///
7607    /// 10-14 = equity, 15-19 = liabilities (provisions, debts),
7608    /// 2 = fixed assets, 3 = inventory, 40 = payables, 41 = receivables,
7609    /// 42-49 = liabilities (personnel, tax, group), 5 = cash, 6 = expenses,
7610    /// 7 = revenue.
7611    fn pcg_category(code: &str) -> &'static str {
7612        let first = code.chars().next().and_then(|c| c.to_digit(10));
7613        let second = code.chars().nth(1).and_then(|c| c.to_digit(10));
7614        match first {
7615            Some(1) => match second {
7616                Some(0..=4) => "Equity",
7617                Some(5) => "AccruedLiabilities",
7618                _ => "LongTermDebt",
7619            },
7620            Some(2) => "FixedAssets",
7621            Some(3) => "Inventory",
7622            Some(4) => match second {
7623                Some(0) => "Payables",
7624                Some(1) => "Receivables",
7625                _ => "AccruedLiabilities",
7626            },
7627            Some(5) => "Cash",
7628            Some(6) => "OperatingExpenses",
7629            Some(7) => "Revenue",
7630            Some(8) | Some(9) => "OperatingExpenses",
7631            _ => "OperatingExpenses",
7632        }
7633    }
7634
7635    /// Test whether an account code maps to a balance-sheet line under
7636    /// the given framework. Drives the cumulative-vs-period bucketing in
7637    /// [`Self::build_cumulative_trial_balance`].
7638    ///
7639    /// Delegates to the framework-aware classifier in
7640    /// `datasynth-core::framework_accounts` so SKR (German) and PCG
7641    /// (French) codes are recognised, not silently routed through a
7642    /// US-style prefix table.
7643    fn is_balance_sheet_account(code: &str, framework: &str) -> bool {
7644        // `AccountType` here is the `balance::AccountType` imported at
7645        // the top of the file; `FrameworkAccounts::classify_account_type`
7646        // returns the same enum, so no cross-namespace mapping is needed.
7647        let fa = datasynth_core::framework_accounts::FrameworkAccounts::for_framework(framework);
7648        matches!(
7649            fa.classify_account_type(code),
7650            AccountType::Asset
7651                | AccountType::ContraAsset
7652                | AccountType::Liability
7653                | AccountType::ContraLiability
7654                | AccountType::Equity
7655                | AccountType::ContraEquity
7656        )
7657    }
7658
7659    /// Phase 16: Generate HR data (payroll runs, time entries, expense reports).
7660    fn phase_hr_data(
7661        &mut self,
7662        stats: &mut EnhancedGenerationStatistics,
7663    ) -> SynthResult<HrSnapshot> {
7664        if !self.phase_config.generate_hr {
7665            debug!("Phase 16: Skipped (HR generation disabled)");
7666            return Ok(HrSnapshot::default());
7667        }
7668
7669        info!("Phase 16: Generating HR Data (Payroll, Time Entries, Expenses)");
7670
7671        let seed = self.seed;
7672        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7673            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
7674        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
7675        let company_code = self
7676            .config
7677            .companies
7678            .first()
7679            .map(|c| c.code.as_str())
7680            .unwrap_or("1000");
7681        let currency = self
7682            .config
7683            .companies
7684            .first()
7685            .map(|c| c.currency.as_str())
7686            .unwrap_or("USD");
7687
7688        let employee_ids: Vec<String> = self
7689            .master_data
7690            .employees
7691            .iter()
7692            .map(|e| e.employee_id.clone())
7693            .collect();
7694
7695        if employee_ids.is_empty() {
7696            debug!("Phase 16: Skipped (no employees available)");
7697            return Ok(HrSnapshot::default());
7698        }
7699
7700        // Extract cost-center pool from master data employees for cross-reference
7701        // coherence. Fabricated IDs (e.g. "CC-123") are replaced by real values.
7702        let cost_center_ids: Vec<String> = self
7703            .master_data
7704            .employees
7705            .iter()
7706            .filter_map(|e| e.cost_center.clone())
7707            .collect::<std::collections::HashSet<_>>()
7708            .into_iter()
7709            .collect();
7710
7711        let mut snapshot = HrSnapshot::default();
7712
7713        // Generate payroll runs (one per month)
7714        if self.config.hr.payroll.enabled {
7715            let mut payroll_gen = datasynth_generators::PayrollGenerator::new(seed + 330)
7716                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7717
7718            // Look up country pack for payroll deductions and labels
7719            let payroll_pack = self.primary_pack();
7720
7721            // Store the pack on the generator so generate() resolves
7722            // localized deduction rates and labels from it.
7723            payroll_gen.set_country_pack(payroll_pack.clone());
7724
7725            let employees_with_salary: Vec<(
7726                String,
7727                rust_decimal::Decimal,
7728                Option<String>,
7729                Option<String>,
7730            )> = self
7731                .master_data
7732                .employees
7733                .iter()
7734                .map(|e| {
7735                    // Use the employee's actual annual base salary.
7736                    // Fall back to $60,000 / yr if somehow zero.
7737                    let annual = if e.base_salary > rust_decimal::Decimal::ZERO {
7738                        e.base_salary
7739                    } else {
7740                        rust_decimal::Decimal::from(60_000)
7741                    };
7742                    (
7743                        e.employee_id.clone(),
7744                        annual, // annual salary — PayrollGenerator divides by 12 for monthly base
7745                        e.cost_center.clone(),
7746                        e.department_id.clone(),
7747                    )
7748                })
7749                .collect();
7750
7751            // Use generate_with_changes when employee change history is available
7752            // so that salary adjustments, transfers, etc. are reflected in payroll.
7753            let change_history = &self.master_data.employee_change_history;
7754            let has_changes = !change_history.is_empty();
7755            if has_changes {
7756                debug!(
7757                    "Payroll will incorporate {} employee change events",
7758                    change_history.len()
7759                );
7760            }
7761
7762            for month in 0..self.config.global.period_months {
7763                let period_start = start_date + chrono::Months::new(month);
7764                let period_end = start_date + chrono::Months::new(month + 1) - chrono::Days::new(1);
7765                let (run, items) = if has_changes {
7766                    payroll_gen.generate_with_changes(
7767                        company_code,
7768                        &employees_with_salary,
7769                        period_start,
7770                        period_end,
7771                        currency,
7772                        change_history,
7773                    )
7774                } else {
7775                    payroll_gen.generate(
7776                        company_code,
7777                        &employees_with_salary,
7778                        period_start,
7779                        period_end,
7780                        currency,
7781                    )
7782                };
7783                snapshot.payroll_runs.push(run);
7784                snapshot.payroll_run_count += 1;
7785                snapshot.payroll_line_item_count += items.len();
7786                snapshot.payroll_line_items.extend(items);
7787            }
7788        }
7789
7790        // Generate time entries
7791        if self.config.hr.time_attendance.enabled {
7792            let mut time_gen = datasynth_generators::TimeEntryGenerator::new(seed + 31)
7793                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7794            // v3.4.2: when a temporal context is configured, time entries
7795            // respect holidays (not just weekends) and submitted_at lag
7796            // snaps to business days.
7797            if let Some(ctx) = &self.temporal_context {
7798                time_gen.set_temporal_context(Arc::clone(ctx));
7799            }
7800            let entries = time_gen.generate(
7801                &employee_ids,
7802                start_date,
7803                end_date,
7804                &self.config.hr.time_attendance,
7805            );
7806            snapshot.time_entry_count = entries.len();
7807            snapshot.time_entries = entries;
7808        }
7809
7810        // Generate expense reports
7811        if self.config.hr.expenses.enabled {
7812            let mut expense_gen = datasynth_generators::ExpenseReportGenerator::new(seed + 32)
7813                .with_pools(employee_ids.clone(), cost_center_ids.clone());
7814            expense_gen.set_country_pack(self.primary_pack().clone());
7815            // v3.4.2: snap submission / approval / paid / line-item dates
7816            // to business days when temporal_context is present.
7817            if let Some(ctx) = &self.temporal_context {
7818                expense_gen.set_temporal_context(Arc::clone(ctx));
7819            }
7820            let company_currency = self
7821                .config
7822                .companies
7823                .first()
7824                .map(|c| c.currency.as_str())
7825                .unwrap_or("USD");
7826            let reports = expense_gen.generate_with_currency(
7827                &employee_ids,
7828                start_date,
7829                end_date,
7830                &self.config.hr.expenses,
7831                company_currency,
7832            );
7833            snapshot.expense_report_count = reports.len();
7834            snapshot.expense_reports = reports;
7835        }
7836
7837        // Generate benefit enrollments (gated on payroll, since benefits require employees)
7838        if self.config.hr.payroll.enabled {
7839            let mut benefit_gen = datasynth_generators::BenefitEnrollmentGenerator::new(seed + 33);
7840            let employee_pairs: Vec<(String, String)> = self
7841                .master_data
7842                .employees
7843                .iter()
7844                .map(|e| (e.employee_id.clone(), e.display_name.clone()))
7845                .collect();
7846            let enrollments =
7847                benefit_gen.generate(company_code, &employee_pairs, start_date, currency);
7848            snapshot.benefit_enrollment_count = enrollments.len();
7849            snapshot.benefit_enrollments = enrollments;
7850        }
7851
7852        // Generate defined benefit pension plans (IAS 19 / ASC 715)
7853        if self.phase_config.generate_hr {
7854            let entity_name = self
7855                .config
7856                .companies
7857                .first()
7858                .map(|c| c.name.as_str())
7859                .unwrap_or("Entity");
7860            let period_months = self.config.global.period_months;
7861            let period_label = {
7862                let y = start_date.year();
7863                let m = start_date.month();
7864                if period_months >= 12 {
7865                    format!("FY{y}")
7866                } else {
7867                    format!("{y}-{m:02}")
7868                }
7869            };
7870            let reporting_date =
7871                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7872
7873            // Compute average annual salary from actual payroll data when available.
7874            // PayrollRun.total_gross covers all employees for one pay period; we sum
7875            // across all runs and divide by employee_count to get per-employee total,
7876            // then annualise for sub-annual periods.
7877            let avg_salary: Option<rust_decimal::Decimal> = {
7878                let employee_count = employee_ids.len();
7879                if self.config.hr.payroll.enabled
7880                    && employee_count > 0
7881                    && !snapshot.payroll_runs.is_empty()
7882                {
7883                    // Sum total gross pay across all payroll runs for this company
7884                    let total_gross: rust_decimal::Decimal = snapshot
7885                        .payroll_runs
7886                        .iter()
7887                        .filter(|r| r.company_code == company_code)
7888                        .map(|r| r.total_gross)
7889                        .sum();
7890                    if total_gross > rust_decimal::Decimal::ZERO {
7891                        // Annualise: total_gross covers `period_months` months of pay
7892                        let annual_total = if period_months > 0 && period_months < 12 {
7893                            total_gross * rust_decimal::Decimal::from(12u32)
7894                                / rust_decimal::Decimal::from(period_months)
7895                        } else {
7896                            total_gross
7897                        };
7898                        Some(
7899                            (annual_total / rust_decimal::Decimal::from(employee_count))
7900                                .round_dp(2),
7901                        )
7902                    } else {
7903                        None
7904                    }
7905                } else {
7906                    None
7907                }
7908            };
7909
7910            let mut pension_gen =
7911                datasynth_generators::PensionGenerator::new(seed.wrapping_add(34));
7912            let pension_snap = pension_gen.generate(
7913                company_code,
7914                entity_name,
7915                &period_label,
7916                reporting_date,
7917                employee_ids.len(),
7918                currency,
7919                avg_salary,
7920                period_months,
7921            );
7922            snapshot.pension_plan_count = pension_snap.plans.len();
7923            snapshot.pension_plans = pension_snap.plans;
7924            snapshot.pension_obligations = pension_snap.obligations;
7925            snapshot.pension_plan_assets = pension_snap.plan_assets;
7926            snapshot.pension_disclosures = pension_snap.disclosures;
7927            // Pension JEs are returned here so they can be added to entries
7928            // in the caller (stored temporarily on snapshot for transfer).
7929            // We embed them in the hr snapshot for simplicity; the orchestrator
7930            // will extract and extend `entries`.
7931            snapshot.pension_journal_entries = pension_snap.journal_entries;
7932        }
7933
7934        // Generate stock-based compensation (ASC 718 / IFRS 2)
7935        if self.phase_config.generate_hr && !employee_ids.is_empty() {
7936            let period_months = self.config.global.period_months;
7937            let period_label = {
7938                let y = start_date.year();
7939                let m = start_date.month();
7940                if period_months >= 12 {
7941                    format!("FY{y}")
7942                } else {
7943                    format!("{y}-{m:02}")
7944                }
7945            };
7946            let reporting_date =
7947                start_date + chrono::Months::new(period_months) - chrono::Days::new(1);
7948
7949            let mut stock_comp_gen =
7950                datasynth_generators::StockCompGenerator::new(seed.wrapping_add(35));
7951            let stock_snap = stock_comp_gen.generate(
7952                company_code,
7953                &employee_ids,
7954                start_date,
7955                &period_label,
7956                reporting_date,
7957                currency,
7958            );
7959            snapshot.stock_grant_count = stock_snap.grants.len();
7960            snapshot.stock_grants = stock_snap.grants;
7961            snapshot.stock_comp_expenses = stock_snap.expenses;
7962            snapshot.stock_comp_journal_entries = stock_snap.journal_entries;
7963        }
7964
7965        stats.payroll_run_count = snapshot.payroll_run_count;
7966        stats.time_entry_count = snapshot.time_entry_count;
7967        stats.expense_report_count = snapshot.expense_report_count;
7968        stats.benefit_enrollment_count = snapshot.benefit_enrollment_count;
7969        stats.pension_plan_count = snapshot.pension_plan_count;
7970        stats.stock_grant_count = snapshot.stock_grant_count;
7971
7972        info!(
7973            "HR data generated: {} payroll runs ({} line items), {} time entries, {} expense reports, {} benefit enrollments, {} pension plans, {} stock grants",
7974            snapshot.payroll_run_count, snapshot.payroll_line_item_count,
7975            snapshot.time_entry_count, snapshot.expense_report_count,
7976            snapshot.benefit_enrollment_count, snapshot.pension_plan_count,
7977            snapshot.stock_grant_count
7978        );
7979        self.check_resources_with_log("post-hr")?;
7980
7981        Ok(snapshot)
7982    }
7983
7984    /// Phase 17: Generate accounting standards data (revenue recognition, impairment, ECL).
7985    fn phase_accounting_standards(
7986        &mut self,
7987        ar_aging_reports: &[datasynth_core::models::subledger::ar::ARAgingReport],
7988        journal_entries: &[JournalEntry],
7989        stats: &mut EnhancedGenerationStatistics,
7990    ) -> SynthResult<AccountingStandardsSnapshot> {
7991        if !self.phase_config.generate_accounting_standards {
7992            debug!("Phase 17: Skipped (accounting standards generation disabled)");
7993            return Ok(AccountingStandardsSnapshot::default());
7994        }
7995        info!("Phase 17: Generating Accounting Standards Data");
7996
7997        let seed = self.seed;
7998        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
7999            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8000        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8001        let company_code = self
8002            .config
8003            .companies
8004            .first()
8005            .map(|c| c.code.as_str())
8006            .unwrap_or("1000");
8007        let currency = self
8008            .config
8009            .companies
8010            .first()
8011            .map(|c| c.currency.as_str())
8012            .unwrap_or("USD");
8013
8014        // Convert config framework to standards framework.
8015        // If the user explicitly set a framework in the YAML config, use that.
8016        // Otherwise, fall back to the country pack's accounting.framework field,
8017        // and if that is also absent or unrecognised, default to US GAAP.
8018        let framework = match self.config.accounting_standards.framework {
8019            Some(datasynth_config::schema::AccountingFrameworkConfig::UsGaap) => {
8020                datasynth_standards::framework::AccountingFramework::UsGaap
8021            }
8022            Some(datasynth_config::schema::AccountingFrameworkConfig::Ifrs) => {
8023                datasynth_standards::framework::AccountingFramework::Ifrs
8024            }
8025            Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting) => {
8026                datasynth_standards::framework::AccountingFramework::DualReporting
8027            }
8028            Some(datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap) => {
8029                datasynth_standards::framework::AccountingFramework::FrenchGaap
8030            }
8031            Some(datasynth_config::schema::AccountingFrameworkConfig::GermanGaap) => {
8032                datasynth_standards::framework::AccountingFramework::GermanGaap
8033            }
8034            None => {
8035                // Derive framework from the primary company's country pack
8036                let pack = self.primary_pack();
8037                let pack_fw = pack.accounting.framework.as_str();
8038                match pack_fw {
8039                    "ifrs" => datasynth_standards::framework::AccountingFramework::Ifrs,
8040                    "dual_reporting" => {
8041                        datasynth_standards::framework::AccountingFramework::DualReporting
8042                    }
8043                    "french_gaap" => {
8044                        datasynth_standards::framework::AccountingFramework::FrenchGaap
8045                    }
8046                    "german_gaap" | "hgb" => {
8047                        datasynth_standards::framework::AccountingFramework::GermanGaap
8048                    }
8049                    // "us_gaap" or any other/unrecognised value falls back to US GAAP
8050                    _ => datasynth_standards::framework::AccountingFramework::UsGaap,
8051                }
8052            }
8053        };
8054
8055        let mut snapshot = AccountingStandardsSnapshot::default();
8056
8057        // Revenue recognition
8058        if self.config.accounting_standards.revenue_recognition.enabled {
8059            let customer_ids: Vec<String> = self
8060                .master_data
8061                .customers
8062                .iter()
8063                .map(|c| c.customer_id.clone())
8064                .collect();
8065
8066            if !customer_ids.is_empty() {
8067                let mut rev_gen = datasynth_generators::RevenueRecognitionGenerator::new(seed + 40);
8068                let contracts = rev_gen.generate(
8069                    company_code,
8070                    &customer_ids,
8071                    start_date,
8072                    end_date,
8073                    currency,
8074                    &self.config.accounting_standards.revenue_recognition,
8075                    framework,
8076                );
8077                snapshot.revenue_contract_count = contracts.len();
8078                snapshot.contracts = contracts;
8079            }
8080        }
8081
8082        // Impairment testing
8083        if self.config.accounting_standards.impairment.enabled {
8084            let asset_data: Vec<(String, String, rust_decimal::Decimal)> = self
8085                .master_data
8086                .assets
8087                .iter()
8088                .map(|a| {
8089                    (
8090                        a.asset_id.clone(),
8091                        a.description.clone(),
8092                        a.acquisition_cost,
8093                    )
8094                })
8095                .collect();
8096
8097            if !asset_data.is_empty() {
8098                let mut imp_gen = datasynth_generators::ImpairmentGenerator::new(seed + 41);
8099                let tests = imp_gen.generate(
8100                    company_code,
8101                    &asset_data,
8102                    end_date,
8103                    &self.config.accounting_standards.impairment,
8104                    framework,
8105                );
8106                snapshot.impairment_test_count = tests.len();
8107                snapshot.impairment_tests = tests;
8108            }
8109        }
8110
8111        // Business combinations (IFRS 3 / ASC 805)
8112        if self
8113            .config
8114            .accounting_standards
8115            .business_combinations
8116            .enabled
8117        {
8118            let bc_config = &self.config.accounting_standards.business_combinations;
8119            let framework_str = match framework {
8120                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8121                _ => "US_GAAP",
8122            };
8123            let mut bc_gen = BusinessCombinationGenerator::new(seed + 42);
8124            let bc_snap = bc_gen.generate(
8125                company_code,
8126                currency,
8127                start_date,
8128                end_date,
8129                bc_config.acquisition_count,
8130                framework_str,
8131            );
8132            snapshot.business_combination_count = bc_snap.combinations.len();
8133            snapshot.business_combination_journal_entries = bc_snap.journal_entries;
8134            snapshot.business_combinations = bc_snap.combinations;
8135        }
8136
8137        // Expected Credit Loss (IFRS 9 / ASC 326)
8138        if self
8139            .config
8140            .accounting_standards
8141            .expected_credit_loss
8142            .enabled
8143        {
8144            let ecl_config = &self.config.accounting_standards.expected_credit_loss;
8145            let framework_str = match framework {
8146                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS_9",
8147                _ => "ASC_326",
8148            };
8149
8150            // Use AR aging data from the subledger snapshot if available;
8151            // otherwise generate synthetic bucket exposures.
8152            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8153
8154            let mut ecl_gen = EclGenerator::new(seed + 43);
8155
8156            // Collect combined bucket totals across all company AR aging reports.
8157            let bucket_exposures: Vec<(
8158                datasynth_core::models::subledger::ar::AgingBucket,
8159                rust_decimal::Decimal,
8160            )> = if ar_aging_reports.is_empty() {
8161                // No AR aging data — synthesise plausible bucket exposures.
8162                use datasynth_core::models::subledger::ar::AgingBucket;
8163                vec![
8164                    (
8165                        AgingBucket::Current,
8166                        rust_decimal::Decimal::from(500_000_u32),
8167                    ),
8168                    (
8169                        AgingBucket::Days1To30,
8170                        rust_decimal::Decimal::from(120_000_u32),
8171                    ),
8172                    (
8173                        AgingBucket::Days31To60,
8174                        rust_decimal::Decimal::from(45_000_u32),
8175                    ),
8176                    (
8177                        AgingBucket::Days61To90,
8178                        rust_decimal::Decimal::from(15_000_u32),
8179                    ),
8180                    (
8181                        AgingBucket::Over90Days,
8182                        rust_decimal::Decimal::from(8_000_u32),
8183                    ),
8184                ]
8185            } else {
8186                use datasynth_core::models::subledger::ar::AgingBucket;
8187                // Sum bucket totals from all reports.
8188                let mut totals: std::collections::HashMap<AgingBucket, rust_decimal::Decimal> =
8189                    std::collections::HashMap::new();
8190                for report in ar_aging_reports {
8191                    for (bucket, amount) in &report.bucket_totals {
8192                        *totals.entry(*bucket).or_default() += amount;
8193                    }
8194                }
8195                AgingBucket::all()
8196                    .into_iter()
8197                    .map(|b| (b, totals.get(&b).copied().unwrap_or_default()))
8198                    .collect()
8199            };
8200
8201            let ecl_snap = ecl_gen.generate(
8202                company_code,
8203                end_date,
8204                &bucket_exposures,
8205                ecl_config,
8206                &period_label,
8207                framework_str,
8208            );
8209
8210            snapshot.ecl_model_count = ecl_snap.ecl_models.len();
8211            snapshot.ecl_models = ecl_snap.ecl_models;
8212            snapshot.ecl_provision_movements = ecl_snap.provision_movements;
8213            snapshot.ecl_journal_entries = ecl_snap.journal_entries;
8214        }
8215
8216        // Provisions and contingencies (IAS 37 / ASC 450)
8217        {
8218            let framework_str = match framework {
8219                datasynth_standards::framework::AccountingFramework::Ifrs => "IFRS",
8220                _ => "US_GAAP",
8221            };
8222
8223            // Compute actual revenue from the journal entries generated so far.
8224            // The `journal_entries` slice passed to this phase contains all GL entries
8225            // up to and including Period Close. Fall back to a minimum of 100_000 to
8226            // avoid degenerate zero-based provision amounts on first-period datasets.
8227            let revenue_proxy = Self::compute_company_revenue(journal_entries, company_code)
8228                .max(rust_decimal::Decimal::from(100_000_u32));
8229
8230            let period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8231
8232            let mut prov_gen = ProvisionGenerator::new(seed + 44);
8233            let prov_snap = prov_gen.generate(
8234                company_code,
8235                currency,
8236                revenue_proxy,
8237                end_date,
8238                &period_label,
8239                framework_str,
8240                None, // prior_opening: no carry-forward data in single-period runs
8241            );
8242
8243            snapshot.provision_count = prov_snap.provisions.len();
8244            snapshot.provisions = prov_snap.provisions;
8245            snapshot.provision_movements = prov_snap.movements;
8246            snapshot.contingent_liabilities = prov_snap.contingent_liabilities;
8247            snapshot.provision_journal_entries = prov_snap.journal_entries;
8248        }
8249
8250        // IAS 21 Functional Currency Translation
8251        // For each company whose functional currency differs from the presentation
8252        // currency, generate a CurrencyTranslationResult with CTA (OCI).
8253        {
8254            let ias21_period_label = format!("{}-{:02}", end_date.year(), end_date.month());
8255
8256            let presentation_currency = self
8257                .config
8258                .global
8259                .presentation_currency
8260                .clone()
8261                .unwrap_or_else(|| self.config.global.group_currency.clone());
8262
8263            // Build a minimal rate table populated with approximate rates from
8264            // the FX model base rates (USD-based) so we can do the translation.
8265            let mut rate_table = FxRateTable::new(&presentation_currency);
8266
8267            // Populate with base rates against USD; if presentation_currency is
8268            // not USD we do a best-effort two-step conversion using the table's
8269            // triangulation support.
8270            let base_rates = base_rates_usd();
8271            for (ccy, rate) in &base_rates {
8272                rate_table.add_rate(FxRate::new(
8273                    ccy,
8274                    "USD",
8275                    RateType::Closing,
8276                    end_date,
8277                    *rate,
8278                    "SYNTHETIC",
8279                ));
8280                // Average rate = 98% of closing (approximation).
8281                // 0.98 = 98/100 = Decimal::new(98, 2)
8282                let avg = (*rate * rust_decimal::Decimal::new(98, 2)).round_dp(6);
8283                rate_table.add_rate(FxRate::new(
8284                    ccy,
8285                    "USD",
8286                    RateType::Average,
8287                    end_date,
8288                    avg,
8289                    "SYNTHETIC",
8290                ));
8291            }
8292
8293            let mut translation_results = Vec::new();
8294            for company in &self.config.companies {
8295                // Compute per-company revenue from actual JEs; fall back to 100_000 minimum
8296                // to ensure the translation produces non-trivial CTA amounts.
8297                let company_revenue = Self::compute_company_revenue(journal_entries, &company.code)
8298                    .max(rust_decimal::Decimal::from(100_000_u32));
8299
8300                let func_ccy = company
8301                    .functional_currency
8302                    .clone()
8303                    .unwrap_or_else(|| company.currency.clone());
8304
8305                let result = datasynth_generators::fx::FunctionalCurrencyTranslator::translate(
8306                    &company.code,
8307                    &func_ccy,
8308                    &presentation_currency,
8309                    &ias21_period_label,
8310                    end_date,
8311                    company_revenue,
8312                    &rate_table,
8313                );
8314                translation_results.push(result);
8315            }
8316
8317            snapshot.currency_translation_count = translation_results.len();
8318            snapshot.currency_translation_results = translation_results;
8319        }
8320
8321        stats.revenue_contract_count = snapshot.revenue_contract_count;
8322        stats.impairment_test_count = snapshot.impairment_test_count;
8323        stats.business_combination_count = snapshot.business_combination_count;
8324        stats.ecl_model_count = snapshot.ecl_model_count;
8325        stats.provision_count = snapshot.provision_count;
8326
8327        // ------------------------------------------------------------
8328        // v3.3.1: Lease accounting (IFRS 16 / ASC 842)
8329        // ------------------------------------------------------------
8330        if self.config.accounting_standards.leases.enabled {
8331            use datasynth_generators::standards::LeaseGenerator;
8332            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8333                .unwrap_or_else(|_| {
8334                    NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded 2025-01-01 is valid")
8335                });
8336            let framework =
8337                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8338            let mut lease_gen = LeaseGenerator::new(self.seed + 9500);
8339            for company in &self.config.companies {
8340                let leases = lease_gen.generate(
8341                    &company.code,
8342                    start_date,
8343                    &self.config.accounting_standards.leases,
8344                    framework,
8345                );
8346                snapshot.lease_count += leases.len();
8347                snapshot.leases.extend(leases);
8348            }
8349            info!("v3.3.1 lease accounting: {} leases", snapshot.lease_count);
8350        }
8351
8352        // ------------------------------------------------------------
8353        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
8354        // ------------------------------------------------------------
8355        if self.config.accounting_standards.fair_value.enabled {
8356            use datasynth_generators::standards::FairValueGenerator;
8357            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8358                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8359                + chrono::Months::new(self.config.global.period_months);
8360            let framework =
8361                Self::resolve_accounting_framework(self.config.accounting_standards.framework);
8362            let mut fv_gen = FairValueGenerator::new(self.seed + 9600);
8363            for company in &self.config.companies {
8364                let measurements = fv_gen.generate(
8365                    &company.code,
8366                    end_date,
8367                    &company.currency,
8368                    &self.config.accounting_standards.fair_value,
8369                    framework,
8370                );
8371                snapshot.fair_value_measurement_count += measurements.len();
8372                snapshot.fair_value_measurements.extend(measurements);
8373            }
8374            info!(
8375                "v3.3.1 fair value measurements: {}",
8376                snapshot.fair_value_measurement_count
8377            );
8378        }
8379
8380        // ------------------------------------------------------------
8381        // v3.3.1: Framework reconciliation (dual reporting only)
8382        // ------------------------------------------------------------
8383        if self.config.accounting_standards.generate_differences
8384            && matches!(
8385                self.config.accounting_standards.framework,
8386                Some(datasynth_config::schema::AccountingFrameworkConfig::DualReporting)
8387            )
8388        {
8389            use datasynth_generators::standards::FrameworkReconciliationGenerator;
8390            let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8391                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2025, 1, 1).expect("hardcoded valid"))
8392                + chrono::Months::new(self.config.global.period_months);
8393            let mut recon_gen = FrameworkReconciliationGenerator::new(self.seed + 9700);
8394            for company in &self.config.companies {
8395                let (records, reconciliation) = recon_gen.generate(&company.code, end_date);
8396                snapshot.framework_difference_count += records.len();
8397                snapshot.framework_differences.extend(records);
8398                snapshot.framework_reconciliations.push(reconciliation);
8399            }
8400            info!(
8401                "v3.3.1 framework reconciliation: {} differences across {} entities",
8402                snapshot.framework_difference_count,
8403                snapshot.framework_reconciliations.len()
8404            );
8405        }
8406
8407        info!(
8408            "Accounting standards data generated: {} revenue contracts, {} impairment tests, {} business combinations, {} ECL models, {} provisions, {} IAS 21 translations, {} leases, {} FV measurements, {} framework differences",
8409            snapshot.revenue_contract_count,
8410            snapshot.impairment_test_count,
8411            snapshot.business_combination_count,
8412            snapshot.ecl_model_count,
8413            snapshot.provision_count,
8414            snapshot.currency_translation_count,
8415            snapshot.lease_count,
8416            snapshot.fair_value_measurement_count,
8417            snapshot.framework_difference_count,
8418        );
8419        self.check_resources_with_log("post-accounting-standards")?;
8420
8421        Ok(snapshot)
8422    }
8423
8424    /// v3.3.1: helper to resolve the accounting-standards framework enum
8425    /// from config into the `datasynth_standards::framework::AccountingFramework`
8426    /// type expected by standards generators. Falls back to US GAAP.
8427    fn resolve_accounting_framework(
8428        cfg: Option<datasynth_config::schema::AccountingFrameworkConfig>,
8429    ) -> datasynth_standards::framework::AccountingFramework {
8430        use datasynth_config::schema::AccountingFrameworkConfig as Cfg;
8431        use datasynth_standards::framework::AccountingFramework as Fw;
8432        match cfg {
8433            Some(Cfg::Ifrs) => Fw::Ifrs,
8434            Some(Cfg::DualReporting) => Fw::DualReporting,
8435            Some(Cfg::FrenchGaap) => Fw::FrenchGaap,
8436            Some(Cfg::GermanGaap) => Fw::GermanGaap,
8437            _ => Fw::UsGaap,
8438        }
8439    }
8440
8441    /// Phase 18: Generate manufacturing data (production orders, quality inspections, cycle counts).
8442    fn phase_manufacturing(
8443        &mut self,
8444        stats: &mut EnhancedGenerationStatistics,
8445    ) -> SynthResult<ManufacturingSnapshot> {
8446        if !self.phase_config.generate_manufacturing {
8447            debug!("Phase 18: Skipped (manufacturing generation disabled)");
8448            return Ok(ManufacturingSnapshot::default());
8449        }
8450        info!("Phase 18: Generating Manufacturing Data");
8451
8452        let seed = self.seed;
8453        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8454            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8455        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8456        let company_code = self
8457            .config
8458            .companies
8459            .first()
8460            .map(|c| c.code.as_str())
8461            .unwrap_or("1000");
8462
8463        let material_data: Vec<(String, String)> = self
8464            .master_data
8465            .materials
8466            .iter()
8467            .map(|m| (m.material_id.clone(), m.description.clone()))
8468            .collect();
8469
8470        if material_data.is_empty() {
8471            debug!("Phase 18: Skipped (no materials available)");
8472            return Ok(ManufacturingSnapshot::default());
8473        }
8474
8475        let mut snapshot = ManufacturingSnapshot::default();
8476
8477        // Generate production orders
8478        let mut prod_gen = datasynth_generators::ProductionOrderGenerator::new(seed + 350);
8479        // v3.4.3: snap planned / actual / operation dates to business days.
8480        if let Some(ctx) = &self.temporal_context {
8481            prod_gen.set_temporal_context(Arc::clone(ctx));
8482        }
8483        let production_orders = prod_gen.generate(
8484            company_code,
8485            &material_data,
8486            start_date,
8487            end_date,
8488            &self.config.manufacturing.production_orders,
8489            &self.config.manufacturing.costing,
8490            &self.config.manufacturing.routing,
8491        );
8492        snapshot.production_order_count = production_orders.len();
8493
8494        // Generate quality inspections from production orders
8495        let inspection_data: Vec<(String, String, String)> = production_orders
8496            .iter()
8497            .map(|po| {
8498                (
8499                    po.order_id.clone(),
8500                    po.material_id.clone(),
8501                    po.material_description.clone(),
8502                )
8503            })
8504            .collect();
8505
8506        snapshot.production_orders = production_orders;
8507
8508        if !inspection_data.is_empty() {
8509            let mut qi_gen = datasynth_generators::QualityInspectionGenerator::new(seed + 351);
8510            let inspections = qi_gen.generate(company_code, &inspection_data, end_date);
8511            snapshot.quality_inspection_count = inspections.len();
8512            snapshot.quality_inspections = inspections;
8513        }
8514
8515        // Generate cycle counts (one per month)
8516        let storage_locations: Vec<(String, String)> = material_data
8517            .iter()
8518            .enumerate()
8519            .map(|(i, (mid, _))| (mid.clone(), format!("SL-{:03}", (i % 10) + 1)))
8520            .collect();
8521
8522        let employee_ids: Vec<String> = self
8523            .master_data
8524            .employees
8525            .iter()
8526            .map(|e| e.employee_id.clone())
8527            .collect();
8528        let mut cc_gen = datasynth_generators::CycleCountGenerator::new(seed + 352)
8529            .with_employee_pool(employee_ids);
8530        let mut cycle_count_total = 0usize;
8531        for month in 0..self.config.global.period_months {
8532            let count_date = start_date + chrono::Months::new(month);
8533            let items_per_count = storage_locations.len().clamp(10, 50);
8534            let cc = cc_gen.generate(
8535                company_code,
8536                &storage_locations,
8537                count_date,
8538                items_per_count,
8539            );
8540            snapshot.cycle_counts.push(cc);
8541            cycle_count_total += 1;
8542        }
8543        snapshot.cycle_count_count = cycle_count_total;
8544
8545        // Generate BOM components
8546        let mut bom_gen = datasynth_generators::BomGenerator::new(seed + 353);
8547        let bom_components = bom_gen.generate(company_code, &material_data);
8548        snapshot.bom_component_count = bom_components.len();
8549        snapshot.bom_components = bom_components;
8550
8551        // Generate inventory movements — link GoodsIssue movements to real production order IDs
8552        let currency = self
8553            .config
8554            .companies
8555            .first()
8556            .map(|c| c.currency.as_str())
8557            .unwrap_or("USD");
8558        let production_order_ids: Vec<String> = snapshot
8559            .production_orders
8560            .iter()
8561            .map(|po| po.order_id.clone())
8562            .collect();
8563        let mut inv_mov_gen = datasynth_generators::InventoryMovementGenerator::new(seed + 354);
8564        let inventory_movements = inv_mov_gen.generate_with_production_orders(
8565            company_code,
8566            &material_data,
8567            start_date,
8568            end_date,
8569            2,
8570            currency,
8571            &production_order_ids,
8572        );
8573        snapshot.inventory_movement_count = inventory_movements.len();
8574        snapshot.inventory_movements = inventory_movements;
8575
8576        stats.production_order_count = snapshot.production_order_count;
8577        stats.quality_inspection_count = snapshot.quality_inspection_count;
8578        stats.cycle_count_count = snapshot.cycle_count_count;
8579        stats.bom_component_count = snapshot.bom_component_count;
8580        stats.inventory_movement_count = snapshot.inventory_movement_count;
8581
8582        info!(
8583            "Manufacturing data generated: {} production orders, {} quality inspections, {} cycle counts, {} BOM components, {} inventory movements",
8584            snapshot.production_order_count, snapshot.quality_inspection_count, snapshot.cycle_count_count,
8585            snapshot.bom_component_count, snapshot.inventory_movement_count
8586        );
8587        self.check_resources_with_log("post-manufacturing")?;
8588
8589        Ok(snapshot)
8590    }
8591
8592    /// Phase 19: Generate sales quotes, management KPIs, and budgets.
8593    fn phase_sales_kpi_budgets(
8594        &mut self,
8595        coa: &Arc<ChartOfAccounts>,
8596        financial_reporting: &FinancialReportingSnapshot,
8597        entries: &[JournalEntry],
8598        stats: &mut EnhancedGenerationStatistics,
8599    ) -> SynthResult<SalesKpiBudgetsSnapshot> {
8600        if !self.phase_config.generate_sales_kpi_budgets {
8601            debug!("Phase 19: Skipped (sales/KPI/budget generation disabled)");
8602            return Ok(SalesKpiBudgetsSnapshot::default());
8603        }
8604        info!("Phase 19: Generating Sales Quotes, KPIs, and Budgets");
8605
8606        let seed = self.seed;
8607        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8608            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8609        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
8610        let company_code = self
8611            .config
8612            .companies
8613            .first()
8614            .map(|c| c.code.as_str())
8615            .unwrap_or("1000");
8616
8617        let mut snapshot = SalesKpiBudgetsSnapshot::default();
8618
8619        // Sales Quotes
8620        if self.config.sales_quotes.enabled {
8621            let customer_data: Vec<(String, String)> = self
8622                .master_data
8623                .customers
8624                .iter()
8625                .map(|c| (c.customer_id.clone(), c.name.clone()))
8626                .collect();
8627            let material_data: Vec<(String, String)> = self
8628                .master_data
8629                .materials
8630                .iter()
8631                .map(|m| (m.material_id.clone(), m.description.clone()))
8632                .collect();
8633
8634            if !customer_data.is_empty() && !material_data.is_empty() {
8635                let employee_ids: Vec<String> = self
8636                    .master_data
8637                    .employees
8638                    .iter()
8639                    .map(|e| e.employee_id.clone())
8640                    .collect();
8641                let customer_ids: Vec<String> = self
8642                    .master_data
8643                    .customers
8644                    .iter()
8645                    .map(|c| c.customer_id.clone())
8646                    .collect();
8647                let company_currency = self
8648                    .config
8649                    .companies
8650                    .first()
8651                    .map(|c| c.currency.as_str())
8652                    .unwrap_or("USD");
8653
8654                let mut quote_gen = datasynth_generators::SalesQuoteGenerator::new(seed + 60)
8655                    .with_pools(employee_ids, customer_ids);
8656                let quotes = quote_gen.generate_with_currency(
8657                    company_code,
8658                    &customer_data,
8659                    &material_data,
8660                    start_date,
8661                    end_date,
8662                    &self.config.sales_quotes,
8663                    company_currency,
8664                );
8665                snapshot.sales_quote_count = quotes.len();
8666                snapshot.sales_quotes = quotes;
8667            }
8668        }
8669
8670        // Management KPIs
8671        if self.config.financial_reporting.management_kpis.enabled {
8672            let mut kpi_gen = datasynth_generators::KpiGenerator::new(seed + 61);
8673            let mut kpis = kpi_gen.generate(
8674                company_code,
8675                start_date,
8676                end_date,
8677                &self.config.financial_reporting.management_kpis,
8678            );
8679
8680            // Override financial KPIs with actual data from financial statements
8681            {
8682                use rust_decimal::Decimal;
8683
8684                if let Some(income_stmt) =
8685                    financial_reporting.financial_statements.iter().find(|fs| {
8686                        fs.statement_type == StatementType::IncomeStatement
8687                            && fs.company_code == company_code
8688                    })
8689                {
8690                    // Extract revenue and COGS from income statement line items
8691                    let total_revenue: Decimal = income_stmt
8692                        .line_items
8693                        .iter()
8694                        .filter(|li| li.section.contains("Revenue") && !li.is_total)
8695                        .map(|li| li.amount)
8696                        .sum();
8697                    let total_cogs: Decimal = income_stmt
8698                        .line_items
8699                        .iter()
8700                        .filter(|li| {
8701                            (li.section.contains("Cost") || li.line_code.starts_with("IS-COGS"))
8702                                && !li.is_total
8703                        })
8704                        .map(|li| li.amount.abs())
8705                        .sum();
8706                    let total_opex: Decimal = income_stmt
8707                        .line_items
8708                        .iter()
8709                        .filter(|li| {
8710                            li.section.contains("Expense")
8711                                && !li.is_total
8712                                && !li.section.contains("Cost")
8713                        })
8714                        .map(|li| li.amount.abs())
8715                        .sum();
8716
8717                    if total_revenue > Decimal::ZERO {
8718                        let hundred = Decimal::from(100);
8719                        let gross_margin_pct =
8720                            ((total_revenue - total_cogs) * hundred / total_revenue).round_dp(2);
8721                        let operating_income = total_revenue - total_cogs - total_opex;
8722                        let op_margin_pct =
8723                            (operating_income * hundred / total_revenue).round_dp(2);
8724
8725                        // Override gross margin and operating margin KPIs
8726                        for kpi in &mut kpis {
8727                            if kpi.name == "Gross Margin" {
8728                                kpi.value = gross_margin_pct;
8729                            } else if kpi.name == "Operating Margin" {
8730                                kpi.value = op_margin_pct;
8731                            }
8732                        }
8733                    }
8734                }
8735
8736                // Override Current Ratio from balance sheet
8737                if let Some(bs) = financial_reporting.financial_statements.iter().find(|fs| {
8738                    fs.statement_type == StatementType::BalanceSheet
8739                        && fs.company_code == company_code
8740                }) {
8741                    let current_assets: Decimal = bs
8742                        .line_items
8743                        .iter()
8744                        .filter(|li| li.section.contains("Current Assets") && !li.is_total)
8745                        .map(|li| li.amount)
8746                        .sum();
8747                    let current_liabilities: Decimal = bs
8748                        .line_items
8749                        .iter()
8750                        .filter(|li| li.section.contains("Current Liabilities") && !li.is_total)
8751                        .map(|li| li.amount.abs())
8752                        .sum();
8753
8754                    if current_liabilities > Decimal::ZERO {
8755                        let current_ratio = (current_assets / current_liabilities).round_dp(2);
8756                        for kpi in &mut kpis {
8757                            if kpi.name == "Current Ratio" {
8758                                kpi.value = current_ratio;
8759                            }
8760                        }
8761                    }
8762                }
8763            }
8764
8765            snapshot.kpi_count = kpis.len();
8766            snapshot.kpis = kpis;
8767        }
8768
8769        // Budgets
8770        if self.config.financial_reporting.budgets.enabled {
8771            let account_data: Vec<(String, String)> = coa
8772                .accounts
8773                .iter()
8774                .map(|a| (a.account_number.clone(), a.short_description.clone()))
8775                .collect();
8776
8777            if !account_data.is_empty() {
8778                let fiscal_year = start_date.year() as u32;
8779                let mut budget_gen = datasynth_generators::BudgetGenerator::new(seed + 62);
8780                let budget = budget_gen.generate(
8781                    company_code,
8782                    fiscal_year,
8783                    &account_data,
8784                    &self.config.financial_reporting.budgets,
8785                );
8786                snapshot.budget_line_count = budget.line_items.len();
8787                snapshot.budgets.push(budget);
8788            }
8789        }
8790
8791        // Phase-2 evidence layer: external expectations (ISA 520, aggregate deviation) + evidence
8792        // anchors (ISA 505, external corroboration). Both consume the per-account actual-vs-legitimate
8793        // split derived from the fraud-flagged journal entries, so they share one pass over `entries`.
8794        let want_expectations = self
8795            .config
8796            .financial_reporting
8797            .external_expectations
8798            .enabled;
8799        let want_anchors = self.config.financial_reporting.evidence_anchors.enabled;
8800        if want_expectations || want_anchors {
8801            use std::collections::HashMap;
8802            // account -> (actual_total, legitimate_total, je_count)
8803            let mut totals: HashMap<String, (Decimal, Decimal, u32)> = HashMap::new();
8804            for je in entries {
8805                let is_fraud = je.header.is_fraud;
8806                let mut touched: Vec<&str> = Vec::new();
8807                for line in &je.lines {
8808                    let amt = line.debit_amount.abs() + line.credit_amount.abs();
8809                    let e = totals.entry(line.gl_account.clone()).or_insert((
8810                        Decimal::ZERO,
8811                        Decimal::ZERO,
8812                        0,
8813                    ));
8814                    e.0 += amt;
8815                    if !is_fraud {
8816                        e.1 += amt;
8817                    }
8818                    if !touched.contains(&line.gl_account.as_str()) {
8819                        touched.push(line.gl_account.as_str());
8820                        e.2 += 1;
8821                    }
8822                }
8823            }
8824            let fiscal_year = start_date.year();
8825
8826            // ISA 520 — substantive-analytics expectations (aggregate deviation)
8827            if want_expectations {
8828                let accounts: Vec<
8829                    datasynth_generators::external_expectation_generator::AccountActuals,
8830                > = coa
8831                    .accounts
8832                    .iter()
8833                    .filter_map(|a| {
8834                        totals.get(&a.account_number).map(|(actual, legit, _)| {
8835                            datasynth_generators::external_expectation_generator::AccountActuals {
8836                                account_code: a.account_number.clone(),
8837                                account_description: a.short_description.clone(),
8838                                account_type: a.account_type,
8839                                actual_total: *actual,
8840                                legit_total: *legit,
8841                            }
8842                        })
8843                    })
8844                    .collect();
8845                if !accounts.is_empty() {
8846                    let mut exp_gen =
8847                        datasynth_generators::ExternalExpectationsGenerator::new(seed + 64);
8848                    let expectations = exp_gen.generate(
8849                        company_code,
8850                        fiscal_year,
8851                        &accounts,
8852                        &self.config.financial_reporting.external_expectations,
8853                    );
8854                    let flagged = expectations.iter().filter(|e| e.exceeds_band).count();
8855                    info!(
8856                        "External expectations: {} material accounts scored, {} exceed the ISA-520 band",
8857                        expectations.len(),
8858                        flagged
8859                    );
8860                    snapshot.external_expectations = expectations;
8861                }
8862            }
8863
8864            // ISA 505 — external-corroboration evidence anchors (dangling-node detection)
8865            if want_anchors {
8866                let accounts: Vec<
8867                    datasynth_generators::evidence_anchor_generator::AccountActivity,
8868                > = coa
8869                    .accounts
8870                    .iter()
8871                    .filter_map(|a| {
8872                        totals.get(&a.account_number).map(|(actual, legit, n)| {
8873                            datasynth_generators::evidence_anchor_generator::AccountActivity {
8874                                account_code: a.account_number.clone(),
8875                                account_description: a.short_description.clone(),
8876                                account_type: a.account_type,
8877                                total_activity: *actual,
8878                                fraud_activity: *actual - *legit,
8879                                transaction_count: *n,
8880                            }
8881                        })
8882                    })
8883                    .collect();
8884                if !accounts.is_empty() {
8885                    let mut anchor_gen =
8886                        datasynth_generators::EvidenceAnchorGenerator::new(seed + 65);
8887                    let anchors = anchor_gen.generate(
8888                        company_code,
8889                        fiscal_year,
8890                        &accounts,
8891                        &self.config.financial_reporting.evidence_anchors,
8892                    );
8893                    let dangling = anchors.iter().filter(|a| a.is_dangling).count();
8894                    info!(
8895                        "Evidence anchors: {} material accounts scored, {} dangling (uncorroborated)",
8896                        anchors.len(),
8897                        dangling
8898                    );
8899                    snapshot.evidence_anchors = anchors;
8900                }
8901            }
8902        }
8903
8904        stats.sales_quote_count = snapshot.sales_quote_count;
8905        stats.kpi_count = snapshot.kpi_count;
8906        stats.budget_line_count = snapshot.budget_line_count;
8907
8908        info!(
8909            "Sales/KPI/Budget data generated: {} quotes, {} KPIs, {} budget lines",
8910            snapshot.sales_quote_count, snapshot.kpi_count, snapshot.budget_line_count
8911        );
8912        self.check_resources_with_log("post-sales-kpi-budgets")?;
8913
8914        Ok(snapshot)
8915    }
8916
8917    /// Compute pre-tax income for a single company from actual journal entries.
8918    ///
8919    /// Pre-tax income = Σ revenue account net credits − Σ expense account net debits.
8920    /// Revenue accounts (4xxx) are credit-normal; expense accounts (5xxx, 6xxx, 7xxx) are
8921    /// debit-normal.  The calculation mirrors `DeferredTaxGenerator::estimate_pre_tax_income`
8922    /// and the period-close engine so that all three use a consistent definition.
8923    fn compute_pre_tax_income(
8924        company_code: &str,
8925        journal_entries: &[JournalEntry],
8926    ) -> rust_decimal::Decimal {
8927        use datasynth_core::accounts::AccountCategory;
8928        use rust_decimal::Decimal;
8929
8930        let mut total_revenue = Decimal::ZERO;
8931        let mut total_expenses = Decimal::ZERO;
8932
8933        for je in journal_entries {
8934            if je.header.company_code != company_code {
8935                continue;
8936            }
8937            for line in &je.lines {
8938                let cat = AccountCategory::from_account(&line.gl_account);
8939                match cat {
8940                    AccountCategory::Revenue => {
8941                        total_revenue += line.credit_amount - line.debit_amount;
8942                    }
8943                    AccountCategory::Cogs
8944                    | AccountCategory::OperatingExpense
8945                    | AccountCategory::OtherIncomeExpense => {
8946                        total_expenses += line.debit_amount - line.credit_amount;
8947                    }
8948                    _ => {}
8949                }
8950            }
8951        }
8952
8953        let pti = (total_revenue - total_expenses).round_dp(2);
8954        if pti == rust_decimal::Decimal::ZERO {
8955            // No income statement activity yet — fall back to a synthetic value so the
8956            // tax provision generator can still produce meaningful output.
8957            rust_decimal::Decimal::from(1_000_000u32)
8958        } else {
8959            pti
8960        }
8961    }
8962
8963    /// Phase 20: Generate tax jurisdictions, tax codes, and tax lines from invoices.
8964    fn phase_tax_generation(
8965        &mut self,
8966        document_flows: &DocumentFlowSnapshot,
8967        journal_entries: &[JournalEntry],
8968        stats: &mut EnhancedGenerationStatistics,
8969    ) -> SynthResult<TaxSnapshot> {
8970        if !self.phase_config.generate_tax {
8971            debug!("Phase 20: Skipped (tax generation disabled)");
8972            return Ok(TaxSnapshot::default());
8973        }
8974        info!("Phase 20: Generating Tax Data");
8975
8976        let seed = self.seed;
8977        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
8978            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
8979        let fiscal_year = start_date.year();
8980        let company_code = self
8981            .config
8982            .companies
8983            .first()
8984            .map(|c| c.code.as_str())
8985            .unwrap_or("1000");
8986
8987        let mut gen = datasynth_generators::TaxCodeGenerator::with_config(
8988            seed + 370,
8989            self.config.tax.clone(),
8990        );
8991
8992        let pack = self.primary_pack().clone();
8993        let (jurisdictions, codes) =
8994            gen.generate_from_country_pack(&pack, company_code, fiscal_year);
8995
8996        // Generate tax provisions for each company
8997        let mut provisions = Vec::new();
8998        if self.config.tax.provisions.enabled {
8999            let mut provision_gen = datasynth_generators::TaxProvisionGenerator::new(seed + 371);
9000            for company in &self.config.companies {
9001                let pre_tax_income = Self::compute_pre_tax_income(&company.code, journal_entries);
9002                let statutory_rate = rust_decimal::Decimal::new(
9003                    (self.config.tax.provisions.statutory_rate.clamp(0.0, 1.0) * 100.0) as i64,
9004                    2,
9005                );
9006                let provision = provision_gen.generate(
9007                    &company.code,
9008                    start_date,
9009                    pre_tax_income,
9010                    statutory_rate,
9011                );
9012                provisions.push(provision);
9013            }
9014        }
9015
9016        // Generate tax lines from document invoices
9017        let mut tax_lines = Vec::new();
9018        if !codes.is_empty() {
9019            let mut tax_line_gen = datasynth_generators::TaxLineGenerator::new(
9020                datasynth_generators::TaxLineGeneratorConfig::default(),
9021                codes.clone(),
9022                seed + 372,
9023            );
9024
9025            // Tax lines from vendor invoices (input tax)
9026            // Use the first company's country as buyer country
9027            let buyer_country = self
9028                .config
9029                .companies
9030                .first()
9031                .map(|c| c.country.as_str())
9032                .unwrap_or("US");
9033            for vi in &document_flows.vendor_invoices {
9034                let lines = tax_line_gen.generate_for_document(
9035                    datasynth_core::models::TaxableDocumentType::VendorInvoice,
9036                    &vi.header.document_id,
9037                    buyer_country, // seller approx same country
9038                    buyer_country,
9039                    vi.payable_amount,
9040                    vi.header.document_date,
9041                    None,
9042                );
9043                tax_lines.extend(lines);
9044            }
9045
9046            // Tax lines from customer invoices (output tax)
9047            for ci in &document_flows.customer_invoices {
9048                let lines = tax_line_gen.generate_for_document(
9049                    datasynth_core::models::TaxableDocumentType::CustomerInvoice,
9050                    &ci.header.document_id,
9051                    buyer_country, // seller is the company
9052                    buyer_country,
9053                    ci.total_gross_amount,
9054                    ci.header.document_date,
9055                    None,
9056                );
9057                tax_lines.extend(lines);
9058            }
9059        }
9060
9061        // Generate deferred tax data (IAS 12 / ASC 740) for each company
9062        let deferred_tax = {
9063            let companies: Vec<(&str, &str)> = self
9064                .config
9065                .companies
9066                .iter()
9067                .map(|c| (c.code.as_str(), c.country.as_str()))
9068                .collect();
9069            let mut deferred_gen = datasynth_generators::DeferredTaxGenerator::new(seed + 373);
9070            deferred_gen.generate(&companies, start_date, journal_entries)
9071        };
9072
9073        // Build a document_id → posting_date map so each tax JE uses its
9074        // source document's date rather than a blanket period-end date.
9075        let mut doc_dates: std::collections::HashMap<String, NaiveDate> =
9076            std::collections::HashMap::new();
9077        for vi in &document_flows.vendor_invoices {
9078            doc_dates.insert(vi.header.document_id.clone(), vi.header.document_date);
9079        }
9080        for ci in &document_flows.customer_invoices {
9081            doc_dates.insert(ci.header.document_id.clone(), ci.header.document_date);
9082        }
9083
9084        // Generate tax posting JEs (tax payable/receivable) from computed tax lines
9085        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9086        let tax_posting_journal_entries = if !tax_lines.is_empty() {
9087            let jes = datasynth_generators::TaxPostingGenerator::generate_tax_posting_jes(
9088                &tax_lines,
9089                company_code,
9090                &doc_dates,
9091                end_date,
9092            );
9093            debug!("Generated {} tax posting JEs", jes.len());
9094            jes
9095        } else {
9096            Vec::new()
9097        };
9098
9099        let snapshot = TaxSnapshot {
9100            jurisdiction_count: jurisdictions.len(),
9101            code_count: codes.len(),
9102            jurisdictions,
9103            codes,
9104            tax_provisions: provisions,
9105            tax_lines,
9106            tax_returns: Vec::new(),
9107            withholding_records: Vec::new(),
9108            tax_anomaly_labels: Vec::new(),
9109            deferred_tax,
9110            tax_posting_journal_entries,
9111        };
9112
9113        stats.tax_jurisdiction_count = snapshot.jurisdiction_count;
9114        stats.tax_code_count = snapshot.code_count;
9115        stats.tax_provision_count = snapshot.tax_provisions.len();
9116        stats.tax_line_count = snapshot.tax_lines.len();
9117
9118        info!(
9119            "Tax data generated: {} jurisdictions, {} codes, {} provisions, {} temp diffs, {} deferred JEs, {} tax posting JEs",
9120            snapshot.jurisdiction_count,
9121            snapshot.code_count,
9122            snapshot.tax_provisions.len(),
9123            snapshot.deferred_tax.temporary_differences.len(),
9124            snapshot.deferred_tax.journal_entries.len(),
9125            snapshot.tax_posting_journal_entries.len(),
9126        );
9127        self.check_resources_with_log("post-tax")?;
9128
9129        Ok(snapshot)
9130    }
9131
9132    /// Phase 21: Generate ESG data (emissions, energy, water, waste, social, governance, disclosures).
9133    fn phase_esg_generation(
9134        &mut self,
9135        document_flows: &DocumentFlowSnapshot,
9136        manufacturing: &ManufacturingSnapshot,
9137        stats: &mut EnhancedGenerationStatistics,
9138    ) -> SynthResult<EsgSnapshot> {
9139        if !self.phase_config.generate_esg {
9140            debug!("Phase 21: Skipped (ESG generation disabled)");
9141            return Ok(EsgSnapshot::default());
9142        }
9143        let degradation = self.check_resources()?;
9144        if degradation >= DegradationLevel::Reduced {
9145            debug!(
9146                "Phase skipped due to resource pressure (degradation: {:?})",
9147                degradation
9148            );
9149            return Ok(EsgSnapshot::default());
9150        }
9151        info!("Phase 21: Generating ESG Data");
9152
9153        let seed = self.seed;
9154        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9155            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9156        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9157        let entity_id = self
9158            .config
9159            .companies
9160            .first()
9161            .map(|c| c.code.as_str())
9162            .unwrap_or("1000");
9163
9164        let esg_cfg = &self.config.esg;
9165        let mut snapshot = EsgSnapshot::default();
9166
9167        // Energy consumption (feeds into scope 1 & 2 emissions)
9168        let mut energy_gen = datasynth_generators::EnergyGenerator::new(
9169            esg_cfg.environmental.energy.clone(),
9170            seed + 80,
9171        );
9172        let energy_records = energy_gen.generate(entity_id, start_date, end_date);
9173
9174        // Water usage
9175        let facility_count = esg_cfg.environmental.energy.facility_count;
9176        let mut water_gen = datasynth_generators::WaterGenerator::new(seed + 81, facility_count);
9177        snapshot.water = water_gen.generate(entity_id, start_date, end_date);
9178
9179        // Waste
9180        let mut waste_gen = datasynth_generators::WasteGenerator::new(
9181            seed + 82,
9182            esg_cfg.environmental.waste.diversion_target,
9183            facility_count,
9184        );
9185        snapshot.waste = waste_gen.generate(entity_id, start_date, end_date);
9186
9187        // Emissions (scope 1, 2, 3)
9188        let mut emission_gen =
9189            datasynth_generators::EmissionGenerator::new(esg_cfg.environmental.clone(), seed + 83);
9190
9191        // Build EnergyInput from energy_records
9192        let mut energy_inputs: Vec<datasynth_generators::EnergyInput> = energy_records
9193            .iter()
9194            .map(|e| datasynth_generators::EnergyInput {
9195                facility_id: e.facility_id.clone(),
9196                energy_type: match e.energy_source {
9197                    EnergySourceType::NaturalGas => {
9198                        datasynth_generators::EnergyInputType::NaturalGas
9199                    }
9200                    EnergySourceType::Diesel => datasynth_generators::EnergyInputType::Diesel,
9201                    EnergySourceType::Coal => datasynth_generators::EnergyInputType::Coal,
9202                    _ => datasynth_generators::EnergyInputType::Electricity,
9203                },
9204                consumption_kwh: e.consumption_kwh,
9205                period: e.period,
9206            })
9207            .collect();
9208
9209        // v2.4: Bridge manufacturing production data → energy inputs for Scope 1/2
9210        if !manufacturing.production_orders.is_empty() {
9211            let mfg_energy = datasynth_generators::EmissionGenerator::energy_from_production(
9212                &manufacturing.production_orders,
9213                rust_decimal::Decimal::new(50, 0), // 50 kWh per machine hour
9214                rust_decimal::Decimal::new(2, 0),  // 2 kWh natural gas per unit
9215            );
9216            if !mfg_energy.is_empty() {
9217                info!(
9218                    "ESG: {} energy inputs derived from {} production orders",
9219                    mfg_energy.len(),
9220                    manufacturing.production_orders.len(),
9221                );
9222                energy_inputs.extend(mfg_energy);
9223            }
9224        }
9225
9226        let mut emissions = Vec::new();
9227        emissions.extend(emission_gen.generate_scope1(entity_id, &energy_inputs));
9228        emissions.extend(emission_gen.generate_scope2(entity_id, &energy_inputs));
9229
9230        // Scope 3: use vendor spend data from actual payments
9231        let vendor_payment_totals: HashMap<String, rust_decimal::Decimal> = {
9232            let mut totals: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9233            for payment in &document_flows.payments {
9234                if payment.is_vendor {
9235                    *totals
9236                        .entry(payment.business_partner_id.clone())
9237                        .or_default() += payment.amount;
9238                }
9239            }
9240            totals
9241        };
9242        let vendor_spend: Vec<datasynth_generators::VendorSpendInput> = self
9243            .master_data
9244            .vendors
9245            .iter()
9246            .map(|v| {
9247                let spend = vendor_payment_totals
9248                    .get(&v.vendor_id)
9249                    .copied()
9250                    .unwrap_or_else(|| rust_decimal::Decimal::new(10000, 0));
9251                datasynth_generators::VendorSpendInput {
9252                    vendor_id: v.vendor_id.clone(),
9253                    category: format!("{:?}", v.vendor_type).to_lowercase(),
9254                    spend,
9255                    country: v.country.clone(),
9256                }
9257            })
9258            .collect();
9259        if !vendor_spend.is_empty() {
9260            emissions.extend(emission_gen.generate_scope3_purchased_goods(
9261                entity_id,
9262                &vendor_spend,
9263                start_date,
9264                end_date,
9265            ));
9266        }
9267
9268        // Business travel & commuting (scope 3)
9269        let headcount = self.master_data.employees.len() as u32;
9270        if headcount > 0 {
9271            let travel_spend = rust_decimal::Decimal::new(headcount as i64 * 2000, 0);
9272            emissions.extend(emission_gen.generate_scope3_business_travel(
9273                entity_id,
9274                travel_spend,
9275                start_date,
9276            ));
9277            emissions
9278                .extend(emission_gen.generate_scope3_commuting(entity_id, headcount, start_date));
9279        }
9280
9281        snapshot.emission_count = emissions.len();
9282        snapshot.emissions = emissions;
9283        snapshot.energy = energy_records;
9284
9285        // Social: Workforce diversity, pay equity, safety
9286        let mut workforce_gen =
9287            datasynth_generators::WorkforceGenerator::new(esg_cfg.social.clone(), seed + 84);
9288        let total_headcount = headcount.max(100);
9289        snapshot.diversity =
9290            workforce_gen.generate_diversity(entity_id, total_headcount, start_date);
9291        snapshot.pay_equity = workforce_gen.generate_pay_equity(entity_id, start_date);
9292
9293        // v2.4: Derive additional workforce diversity metrics from actual employee data
9294        if !self.master_data.employees.is_empty() {
9295            let hr_diversity = workforce_gen.generate_diversity_from_employees(
9296                entity_id,
9297                &self.master_data.employees,
9298                end_date,
9299            );
9300            if !hr_diversity.is_empty() {
9301                info!(
9302                    "ESG: {} diversity metrics derived from {} actual employees",
9303                    hr_diversity.len(),
9304                    self.master_data.employees.len(),
9305                );
9306                snapshot.diversity.extend(hr_diversity);
9307            }
9308        }
9309
9310        snapshot.safety_incidents = workforce_gen.generate_safety_incidents(
9311            entity_id,
9312            facility_count,
9313            start_date,
9314            end_date,
9315        );
9316
9317        // Compute safety metrics
9318        let total_hours = total_headcount as u64 * 2000; // ~2000 hours/employee/year
9319        let safety_metric = workforce_gen.compute_safety_metrics(
9320            entity_id,
9321            &snapshot.safety_incidents,
9322            total_hours,
9323            start_date,
9324        );
9325        snapshot.safety_metrics = vec![safety_metric];
9326
9327        // Governance
9328        let mut gov_gen = datasynth_generators::GovernanceGenerator::new(
9329            seed + 85,
9330            esg_cfg.governance.board_size,
9331            esg_cfg.governance.independence_target,
9332        );
9333        snapshot.governance = vec![gov_gen.generate(entity_id, start_date)];
9334
9335        // Supplier ESG assessments
9336        let mut supplier_gen = datasynth_generators::SupplierEsgGenerator::new(
9337            esg_cfg.supply_chain_esg.clone(),
9338            seed + 86,
9339        );
9340        let vendor_inputs: Vec<datasynth_generators::VendorInput> = self
9341            .master_data
9342            .vendors
9343            .iter()
9344            .map(|v| datasynth_generators::VendorInput {
9345                vendor_id: v.vendor_id.clone(),
9346                country: v.country.clone(),
9347                industry: format!("{:?}", v.vendor_type).to_lowercase(),
9348                quality_score: None,
9349            })
9350            .collect();
9351        snapshot.supplier_assessments =
9352            supplier_gen.generate(entity_id, &vendor_inputs, start_date);
9353
9354        // Disclosures
9355        let mut disclosure_gen = datasynth_generators::DisclosureGenerator::new(
9356            seed + 87,
9357            esg_cfg.reporting.clone(),
9358            esg_cfg.climate_scenarios.clone(),
9359        );
9360        snapshot.materiality = disclosure_gen.generate_materiality(entity_id, start_date);
9361        snapshot.disclosures = disclosure_gen.generate_disclosures(
9362            entity_id,
9363            &snapshot.materiality,
9364            start_date,
9365            end_date,
9366        );
9367        snapshot.climate_scenarios = disclosure_gen.generate_climate_scenarios(entity_id);
9368        snapshot.disclosure_count = snapshot.disclosures.len();
9369
9370        // Anomaly injection
9371        if esg_cfg.anomaly_rate > 0.0 {
9372            let mut anomaly_injector =
9373                datasynth_generators::EsgAnomalyInjector::new(seed + 88, esg_cfg.anomaly_rate);
9374            let mut labels = Vec::new();
9375            labels.extend(anomaly_injector.inject_greenwashing(&mut snapshot.emissions));
9376            labels.extend(anomaly_injector.inject_diversity_stagnation(&mut snapshot.diversity));
9377            labels.extend(
9378                anomaly_injector.inject_supply_chain_risk(&mut snapshot.supplier_assessments),
9379            );
9380            labels.extend(anomaly_injector.inject_data_quality_gaps(&mut snapshot.safety_metrics));
9381            labels.extend(anomaly_injector.inject_missing_disclosures(&mut snapshot.materiality));
9382            snapshot.anomaly_labels = labels;
9383        }
9384
9385        stats.esg_emission_count = snapshot.emission_count;
9386        stats.esg_disclosure_count = snapshot.disclosure_count;
9387
9388        info!(
9389            "ESG data generated: {} emissions, {} disclosures, {} supplier assessments",
9390            snapshot.emission_count,
9391            snapshot.disclosure_count,
9392            snapshot.supplier_assessments.len()
9393        );
9394        self.check_resources_with_log("post-esg")?;
9395
9396        Ok(snapshot)
9397    }
9398
9399    /// Phase 22: Generate Treasury data (cash management, hedging, debt, pooling, guarantees, netting).
9400    fn phase_treasury_data(
9401        &mut self,
9402        document_flows: &DocumentFlowSnapshot,
9403        subledger: &SubledgerSnapshot,
9404        intercompany: &IntercompanySnapshot,
9405        stats: &mut EnhancedGenerationStatistics,
9406    ) -> SynthResult<TreasurySnapshot> {
9407        if !self.phase_config.generate_treasury {
9408            debug!("Phase 22: Skipped (treasury generation disabled)");
9409            return Ok(TreasurySnapshot::default());
9410        }
9411        let degradation = self.check_resources()?;
9412        if degradation >= DegradationLevel::Reduced {
9413            debug!(
9414                "Phase skipped due to resource pressure (degradation: {:?})",
9415                degradation
9416            );
9417            return Ok(TreasurySnapshot::default());
9418        }
9419        info!("Phase 22: Generating Treasury Data");
9420
9421        let seed = self.seed;
9422        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9423            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9424        let currency = self
9425            .config
9426            .companies
9427            .first()
9428            .map(|c| c.currency.as_str())
9429            .unwrap_or("USD");
9430        let entity_id = self
9431            .config
9432            .companies
9433            .first()
9434            .map(|c| c.code.as_str())
9435            .unwrap_or("1000");
9436
9437        let mut snapshot = TreasurySnapshot::default();
9438
9439        // Generate debt instruments
9440        let mut debt_gen = datasynth_generators::treasury::DebtGenerator::new(
9441            self.config.treasury.debt.clone(),
9442            seed + 90,
9443        );
9444        snapshot.debt_instruments = debt_gen.generate(entity_id, currency, start_date);
9445
9446        // Generate hedging instruments (IR swaps for floating-rate debt)
9447        let mut hedge_gen = datasynth_generators::treasury::HedgingGenerator::new(
9448            self.config.treasury.hedging.clone(),
9449            seed + 91,
9450        );
9451        for debt in &snapshot.debt_instruments {
9452            if debt.rate_type == InterestRateType::Variable {
9453                let swap = hedge_gen.generate_ir_swap(
9454                    currency,
9455                    debt.principal,
9456                    debt.origination_date,
9457                    debt.maturity_date,
9458                );
9459                snapshot.hedging_instruments.push(swap);
9460            }
9461        }
9462
9463        // Build FX exposures from foreign-currency payments and generate
9464        // FX forwards + hedge relationship designations via generate() API.
9465        {
9466            let mut fx_map: HashMap<String, (rust_decimal::Decimal, NaiveDate)> = HashMap::new();
9467            for payment in &document_flows.payments {
9468                if payment.currency != currency {
9469                    let entry = fx_map
9470                        .entry(payment.currency.clone())
9471                        .or_insert((rust_decimal::Decimal::ZERO, payment.header.document_date));
9472                    entry.0 += payment.amount;
9473                    // Use the latest settlement date among grouped payments
9474                    if payment.header.document_date > entry.1 {
9475                        entry.1 = payment.header.document_date;
9476                    }
9477                }
9478            }
9479            if !fx_map.is_empty() {
9480                let fx_exposures: Vec<datasynth_generators::treasury::FxExposure> = fx_map
9481                    .into_iter()
9482                    .map(|(foreign_ccy, (net_amount, settlement_date))| {
9483                        datasynth_generators::treasury::FxExposure {
9484                            currency_pair: format!("{foreign_ccy}/{currency}"),
9485                            foreign_currency: foreign_ccy,
9486                            net_amount,
9487                            settlement_date,
9488                            description: "AP payment FX exposure".to_string(),
9489                        }
9490                    })
9491                    .collect();
9492                let (fx_instruments, fx_relationships) =
9493                    hedge_gen.generate(start_date, &fx_exposures);
9494                snapshot.hedging_instruments.extend(fx_instruments);
9495                snapshot.hedge_relationships.extend(fx_relationships);
9496            }
9497        }
9498
9499        // Inject anomalies if configured
9500        if self.config.treasury.anomaly_rate > 0.0 {
9501            let mut anomaly_injector = datasynth_generators::treasury::TreasuryAnomalyInjector::new(
9502                seed + 92,
9503                self.config.treasury.anomaly_rate,
9504            );
9505            let mut labels = Vec::new();
9506            labels.extend(
9507                anomaly_injector.inject_into_hedge_relationships(&mut snapshot.hedge_relationships),
9508            );
9509            snapshot.treasury_anomaly_labels = labels;
9510        }
9511
9512        // Generate cash positions from payment flows
9513        if self.config.treasury.cash_positioning.enabled {
9514            let mut cash_flows: Vec<datasynth_generators::treasury::CashFlow> = Vec::new();
9515
9516            // AP payments as outflows
9517            for payment in &document_flows.payments {
9518                cash_flows.push(datasynth_generators::treasury::CashFlow {
9519                    date: payment.header.document_date,
9520                    account_id: format!("{entity_id}-MAIN"),
9521                    amount: payment.amount,
9522                    direction: datasynth_generators::treasury::CashFlowDirection::Outflow,
9523                });
9524            }
9525
9526            // Customer receipts (from O2C chains) as inflows
9527            for chain in &document_flows.o2c_chains {
9528                if let Some(ref receipt) = chain.customer_receipt {
9529                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9530                        date: receipt.header.document_date,
9531                        account_id: format!("{entity_id}-MAIN"),
9532                        amount: receipt.amount,
9533                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9534                    });
9535                }
9536                // Remainder receipts (follow-up to partial payments)
9537                for receipt in &chain.remainder_receipts {
9538                    cash_flows.push(datasynth_generators::treasury::CashFlow {
9539                        date: receipt.header.document_date,
9540                        account_id: format!("{entity_id}-MAIN"),
9541                        amount: receipt.amount,
9542                        direction: datasynth_generators::treasury::CashFlowDirection::Inflow,
9543                    });
9544                }
9545            }
9546
9547            if !cash_flows.is_empty() {
9548                let mut cash_gen = datasynth_generators::treasury::CashPositionGenerator::new(
9549                    self.config.treasury.cash_positioning.clone(),
9550                    seed + 93,
9551                );
9552                let account_id = format!("{entity_id}-MAIN");
9553                snapshot.cash_positions = cash_gen.generate(
9554                    entity_id,
9555                    &account_id,
9556                    currency,
9557                    &cash_flows,
9558                    start_date,
9559                    start_date + chrono::Months::new(self.config.global.period_months),
9560                    rust_decimal::Decimal::new(1_000_000, 0), // Default opening balance
9561                );
9562            }
9563        }
9564
9565        // Generate cash forecasts from AR/AP aging
9566        if self.config.treasury.cash_forecasting.enabled {
9567            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9568
9569            // Build AR aging items from subledger AR invoices
9570            let ar_items: Vec<datasynth_generators::treasury::ArAgingItem> = subledger
9571                .ar_invoices
9572                .iter()
9573                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9574                .map(|inv| {
9575                    let days_past_due = if inv.due_date < end_date {
9576                        (end_date - inv.due_date).num_days().max(0) as u32
9577                    } else {
9578                        0
9579                    };
9580                    datasynth_generators::treasury::ArAgingItem {
9581                        expected_date: inv.due_date,
9582                        amount: inv.amount_remaining,
9583                        days_past_due,
9584                        document_id: inv.invoice_number.clone(),
9585                    }
9586                })
9587                .collect();
9588
9589            // Build AP aging items from subledger AP invoices
9590            let ap_items: Vec<datasynth_generators::treasury::ApAgingItem> = subledger
9591                .ap_invoices
9592                .iter()
9593                .filter(|inv| inv.amount_remaining > rust_decimal::Decimal::ZERO)
9594                .map(|inv| datasynth_generators::treasury::ApAgingItem {
9595                    payment_date: inv.due_date,
9596                    amount: inv.amount_remaining,
9597                    document_id: inv.invoice_number.clone(),
9598                })
9599                .collect();
9600
9601            let mut forecast_gen = datasynth_generators::treasury::CashForecastGenerator::new(
9602                self.config.treasury.cash_forecasting.clone(),
9603                seed + 94,
9604            );
9605            let forecast = forecast_gen.generate(
9606                entity_id,
9607                currency,
9608                end_date,
9609                &ar_items,
9610                &ap_items,
9611                &[], // scheduled disbursements - empty for now
9612            );
9613            snapshot.cash_forecasts.push(forecast);
9614        }
9615
9616        // Generate cash pools and sweeps
9617        if self.config.treasury.cash_pooling.enabled && !snapshot.cash_positions.is_empty() {
9618            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9619            let mut pool_gen = datasynth_generators::treasury::CashPoolGenerator::new(
9620                self.config.treasury.cash_pooling.clone(),
9621                seed + 95,
9622            );
9623
9624            // Create a pool from available accounts
9625            let account_ids: Vec<String> = snapshot
9626                .cash_positions
9627                .iter()
9628                .map(|cp| cp.bank_account_id.clone())
9629                .collect::<std::collections::HashSet<_>>()
9630                .into_iter()
9631                .collect();
9632
9633            if let Some(pool) =
9634                pool_gen.create_pool(&format!("{entity_id}_MAIN_POOL"), currency, &account_ids)
9635            {
9636                // Generate sweeps - build participant balances from last cash position per account
9637                let mut latest_balances: HashMap<String, rust_decimal::Decimal> = HashMap::new();
9638                for cp in &snapshot.cash_positions {
9639                    latest_balances.insert(cp.bank_account_id.clone(), cp.closing_balance);
9640                }
9641
9642                let participant_balances: Vec<datasynth_generators::treasury::AccountBalance> =
9643                    latest_balances
9644                        .into_iter()
9645                        .filter(|(id, _)| pool.participant_accounts.contains(id))
9646                        .map(
9647                            |(id, balance)| datasynth_generators::treasury::AccountBalance {
9648                                account_id: id,
9649                                balance,
9650                            },
9651                        )
9652                        .collect();
9653
9654                let sweeps =
9655                    pool_gen.generate_sweeps(&pool, end_date, currency, &participant_balances);
9656                snapshot.cash_pool_sweeps = sweeps;
9657                snapshot.cash_pools.push(pool);
9658            }
9659        }
9660
9661        // Generate bank guarantees
9662        if self.config.treasury.bank_guarantees.enabled {
9663            let vendor_names: Vec<String> = self
9664                .master_data
9665                .vendors
9666                .iter()
9667                .map(|v| v.name.clone())
9668                .collect();
9669            if !vendor_names.is_empty() {
9670                let mut bg_gen = datasynth_generators::treasury::BankGuaranteeGenerator::new(
9671                    self.config.treasury.bank_guarantees.clone(),
9672                    seed + 96,
9673                );
9674                snapshot.bank_guarantees =
9675                    bg_gen.generate(entity_id, currency, start_date, &vendor_names);
9676            }
9677        }
9678
9679        // Generate netting runs from intercompany matched pairs
9680        if self.config.treasury.netting.enabled && !intercompany.matched_pairs.is_empty() {
9681            let entity_ids: Vec<String> = self
9682                .config
9683                .companies
9684                .iter()
9685                .map(|c| c.code.clone())
9686                .collect();
9687            let ic_amounts: Vec<(String, String, rust_decimal::Decimal)> = intercompany
9688                .matched_pairs
9689                .iter()
9690                .map(|mp| {
9691                    (
9692                        mp.seller_company.clone(),
9693                        mp.buyer_company.clone(),
9694                        mp.amount,
9695                    )
9696                })
9697                .collect();
9698            if entity_ids.len() >= 2 {
9699                let mut netting_gen = datasynth_generators::treasury::NettingRunGenerator::new(
9700                    self.config.treasury.netting.clone(),
9701                    seed + 97,
9702                );
9703                snapshot.netting_runs = netting_gen.generate(
9704                    &entity_ids,
9705                    currency,
9706                    start_date,
9707                    self.config.global.period_months,
9708                    &ic_amounts,
9709                );
9710            }
9711        }
9712
9713        // Generate treasury journal entries from the instruments we just created.
9714        {
9715            use datasynth_generators::treasury::TreasuryAccounting;
9716
9717            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9718            let mut treasury_jes = Vec::new();
9719
9720            // Debt interest accrual JEs
9721            if !snapshot.debt_instruments.is_empty() {
9722                let debt_jes =
9723                    TreasuryAccounting::generate_debt_jes(&snapshot.debt_instruments, end_date);
9724                debug!("Generated {} debt interest accrual JEs", debt_jes.len());
9725                treasury_jes.extend(debt_jes);
9726            }
9727
9728            // Hedge mark-to-market JEs
9729            if !snapshot.hedging_instruments.is_empty() {
9730                let hedge_jes = TreasuryAccounting::generate_hedge_jes(
9731                    &snapshot.hedging_instruments,
9732                    &snapshot.hedge_relationships,
9733                    end_date,
9734                    entity_id,
9735                );
9736                debug!("Generated {} hedge MTM JEs", hedge_jes.len());
9737                treasury_jes.extend(hedge_jes);
9738            }
9739
9740            // Cash pool sweep JEs
9741            if !snapshot.cash_pool_sweeps.is_empty() {
9742                let sweep_jes = TreasuryAccounting::generate_cash_pool_sweep_jes(
9743                    &snapshot.cash_pool_sweeps,
9744                    entity_id,
9745                );
9746                debug!("Generated {} cash pool sweep JEs", sweep_jes.len());
9747                treasury_jes.extend(sweep_jes);
9748            }
9749
9750            if !treasury_jes.is_empty() {
9751                debug!("Total treasury journal entries: {}", treasury_jes.len());
9752            }
9753            snapshot.journal_entries = treasury_jes;
9754        }
9755
9756        stats.treasury_debt_instrument_count = snapshot.debt_instruments.len();
9757        stats.treasury_hedging_instrument_count = snapshot.hedging_instruments.len();
9758        stats.cash_position_count = snapshot.cash_positions.len();
9759        stats.cash_forecast_count = snapshot.cash_forecasts.len();
9760        stats.cash_pool_count = snapshot.cash_pools.len();
9761
9762        info!(
9763            "Treasury data generated: {} debt instruments, {} hedging instruments, {} cash positions, {} forecasts, {} pools, {} guarantees, {} netting runs, {} JEs",
9764            snapshot.debt_instruments.len(),
9765            snapshot.hedging_instruments.len(),
9766            snapshot.cash_positions.len(),
9767            snapshot.cash_forecasts.len(),
9768            snapshot.cash_pools.len(),
9769            snapshot.bank_guarantees.len(),
9770            snapshot.netting_runs.len(),
9771            snapshot.journal_entries.len(),
9772        );
9773        self.check_resources_with_log("post-treasury")?;
9774
9775        Ok(snapshot)
9776    }
9777
9778    /// Phase 23: Generate Project Accounting data (projects, costs, revenue, EVM, milestones).
9779    fn phase_project_accounting(
9780        &mut self,
9781        document_flows: &DocumentFlowSnapshot,
9782        hr: &HrSnapshot,
9783        stats: &mut EnhancedGenerationStatistics,
9784    ) -> SynthResult<ProjectAccountingSnapshot> {
9785        if !self.phase_config.generate_project_accounting {
9786            debug!("Phase 23: Skipped (project accounting disabled)");
9787            return Ok(ProjectAccountingSnapshot::default());
9788        }
9789        let degradation = self.check_resources()?;
9790        if degradation >= DegradationLevel::Reduced {
9791            debug!(
9792                "Phase skipped due to resource pressure (degradation: {:?})",
9793                degradation
9794            );
9795            return Ok(ProjectAccountingSnapshot::default());
9796        }
9797        info!("Phase 23: Generating Project Accounting Data");
9798
9799        let seed = self.seed;
9800        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9801            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
9802        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
9803        let company_code = self
9804            .config
9805            .companies
9806            .first()
9807            .map(|c| c.code.as_str())
9808            .unwrap_or("1000");
9809
9810        let mut snapshot = ProjectAccountingSnapshot::default();
9811
9812        // Generate projects with WBS hierarchies
9813        let mut project_gen = datasynth_generators::project_accounting::ProjectGenerator::new(
9814            self.config.project_accounting.clone(),
9815            seed + 95,
9816        );
9817        let pool = project_gen.generate(company_code, start_date, end_date);
9818        snapshot.projects = pool.projects.clone();
9819
9820        // Link source documents to projects for cost allocation
9821        {
9822            let mut source_docs: Vec<datasynth_generators::project_accounting::SourceDocument> =
9823                Vec::new();
9824
9825            // Time entries
9826            for te in &hr.time_entries {
9827                let total_hours = te.hours_regular + te.hours_overtime;
9828                if total_hours > 0.0 {
9829                    source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9830                        id: te.entry_id.clone(),
9831                        entity_id: company_code.to_string(),
9832                        date: te.date,
9833                        amount: rust_decimal::Decimal::from_f64_retain(total_hours * 75.0)
9834                            .unwrap_or(rust_decimal::Decimal::ZERO),
9835                        source_type: CostSourceType::TimeEntry,
9836                        hours: Some(
9837                            rust_decimal::Decimal::from_f64_retain(total_hours)
9838                                .unwrap_or(rust_decimal::Decimal::ZERO),
9839                        ),
9840                    });
9841                }
9842            }
9843
9844            // Expense reports
9845            for er in &hr.expense_reports {
9846                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9847                    id: er.report_id.clone(),
9848                    entity_id: company_code.to_string(),
9849                    date: er.submission_date,
9850                    amount: er.total_amount,
9851                    source_type: CostSourceType::ExpenseReport,
9852                    hours: None,
9853                });
9854            }
9855
9856            // Purchase orders
9857            for po in &document_flows.purchase_orders {
9858                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9859                    id: po.header.document_id.clone(),
9860                    entity_id: company_code.to_string(),
9861                    date: po.header.document_date,
9862                    amount: po.total_net_amount,
9863                    source_type: CostSourceType::PurchaseOrder,
9864                    hours: None,
9865                });
9866            }
9867
9868            // Vendor invoices
9869            for vi in &document_flows.vendor_invoices {
9870                source_docs.push(datasynth_generators::project_accounting::SourceDocument {
9871                    id: vi.header.document_id.clone(),
9872                    entity_id: company_code.to_string(),
9873                    date: vi.header.document_date,
9874                    amount: vi.payable_amount,
9875                    source_type: CostSourceType::VendorInvoice,
9876                    hours: None,
9877                });
9878            }
9879
9880            if !source_docs.is_empty() && !pool.projects.is_empty() {
9881                let mut cost_gen =
9882                    datasynth_generators::project_accounting::ProjectCostGenerator::new(
9883                        self.config.project_accounting.cost_allocation.clone(),
9884                        seed + 99,
9885                    );
9886                snapshot.cost_lines = cost_gen.link_documents(&pool, &source_docs);
9887            }
9888        }
9889
9890        // Generate change orders
9891        if self.config.project_accounting.change_orders.enabled {
9892            let mut co_gen = datasynth_generators::project_accounting::ChangeOrderGenerator::new(
9893                self.config.project_accounting.change_orders.clone(),
9894                seed + 96,
9895            );
9896            snapshot.change_orders = co_gen.generate(&pool.projects, start_date, end_date);
9897        }
9898
9899        // Generate milestones
9900        if self.config.project_accounting.milestones.enabled {
9901            let mut ms_gen = datasynth_generators::project_accounting::MilestoneGenerator::new(
9902                self.config.project_accounting.milestones.clone(),
9903                seed + 97,
9904            );
9905            snapshot.milestones = ms_gen.generate(&pool.projects, start_date, end_date, end_date);
9906        }
9907
9908        // Generate earned value metrics (needs cost lines, so only if we have projects)
9909        if self.config.project_accounting.earned_value.enabled && !snapshot.projects.is_empty() {
9910            let mut evm_gen = datasynth_generators::project_accounting::EarnedValueGenerator::new(
9911                self.config.project_accounting.earned_value.clone(),
9912                seed + 98,
9913            );
9914            snapshot.earned_value_metrics =
9915                evm_gen.generate(&pool.projects, &snapshot.cost_lines, start_date, end_date);
9916        }
9917
9918        // Wire ProjectRevenueGenerator: generate PoC revenue recognition for customer projects.
9919        if self.config.project_accounting.revenue_recognition.enabled
9920            && !snapshot.projects.is_empty()
9921            && !snapshot.cost_lines.is_empty()
9922        {
9923            use datasynth_generators::project_accounting::RevenueGenerator;
9924            let rev_config = self.config.project_accounting.revenue_recognition.clone();
9925            let avg_contract_value =
9926                rust_decimal::Decimal::from_f64_retain(rev_config.avg_contract_value)
9927                    .unwrap_or(rust_decimal::Decimal::new(500_000, 0));
9928
9929            // Build contract value tuples: only customer-type projects get revenue recognition.
9930            // Estimated total cost = 80% of contract value (standard 20% gross margin proxy).
9931            let contract_values: Vec<(String, rust_decimal::Decimal, rust_decimal::Decimal)> =
9932                snapshot
9933                    .projects
9934                    .iter()
9935                    .filter(|p| {
9936                        matches!(
9937                            p.project_type,
9938                            datasynth_core::models::ProjectType::Customer
9939                        )
9940                    })
9941                    .map(|p| {
9942                        let cv = if p.budget > rust_decimal::Decimal::ZERO {
9943                            (p.budget * rust_decimal::Decimal::new(125, 2)).round_dp(2)
9944                        // budget × 1.25 → contract value
9945                        } else {
9946                            avg_contract_value
9947                        };
9948                        let etc = (cv * rust_decimal::Decimal::new(80, 2)).round_dp(2); // 80% cost ratio
9949                        (p.project_id.clone(), cv, etc)
9950                    })
9951                    .collect();
9952
9953            if !contract_values.is_empty() {
9954                let mut rev_gen = RevenueGenerator::new(rev_config, seed + 99);
9955                snapshot.revenue_records = rev_gen.generate(
9956                    &snapshot.projects,
9957                    &snapshot.cost_lines,
9958                    &contract_values,
9959                    start_date,
9960                    end_date,
9961                );
9962                debug!(
9963                    "Generated {} revenue recognition records for {} customer projects",
9964                    snapshot.revenue_records.len(),
9965                    contract_values.len()
9966                );
9967            }
9968        }
9969
9970        stats.project_count = snapshot.projects.len();
9971        stats.project_change_order_count = snapshot.change_orders.len();
9972        stats.project_cost_line_count = snapshot.cost_lines.len();
9973
9974        info!(
9975            "Project accounting generated: {} projects, {} change orders, {} milestones, {} EVM records",
9976            snapshot.projects.len(),
9977            snapshot.change_orders.len(),
9978            snapshot.milestones.len(),
9979            snapshot.earned_value_metrics.len()
9980        );
9981        self.check_resources_with_log("post-project-accounting")?;
9982
9983        Ok(snapshot)
9984    }
9985
9986    /// Phase 24: Generate process evolution and organizational events.
9987    fn phase_evolution_events(
9988        &mut self,
9989        stats: &mut EnhancedGenerationStatistics,
9990    ) -> SynthResult<(Vec<ProcessEvolutionEvent>, Vec<OrganizationalEvent>)> {
9991        if !self.phase_config.generate_evolution_events {
9992            debug!("Phase 24: Skipped (evolution events disabled)");
9993            return Ok((Vec::new(), Vec::new()));
9994        }
9995        info!("Phase 24: Generating Process Evolution + Organizational Events");
9996
9997        let seed = self.seed;
9998        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
9999            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10000        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10001
10002        // Process evolution events
10003        let mut proc_gen =
10004            datasynth_generators::process_evolution_generator::ProcessEvolutionGenerator::new(
10005                seed + 100,
10006            );
10007        let process_events = proc_gen.generate_events(start_date, end_date);
10008
10009        // Organizational events
10010        let company_codes: Vec<String> = self
10011            .config
10012            .companies
10013            .iter()
10014            .map(|c| c.code.clone())
10015            .collect();
10016        let mut org_gen =
10017            datasynth_generators::organizational_event_generator::OrganizationalEventGenerator::new(
10018                seed + 101,
10019            );
10020        let org_events = org_gen.generate_events(start_date, end_date, &company_codes);
10021
10022        stats.process_evolution_event_count = process_events.len();
10023        stats.organizational_event_count = org_events.len();
10024
10025        info!(
10026            "Evolution events generated: {} process evolution, {} organizational",
10027            process_events.len(),
10028            org_events.len()
10029        );
10030        self.check_resources_with_log("post-evolution-events")?;
10031
10032        Ok((process_events, org_events))
10033    }
10034
10035    /// Phase 24b: Generate disruption events (outages, migrations, process changes,
10036    /// data recovery, and regulatory changes).
10037    fn phase_disruption_events(
10038        &self,
10039        stats: &mut EnhancedGenerationStatistics,
10040    ) -> SynthResult<Vec<datasynth_generators::disruption::DisruptionEvent>> {
10041        if !self.config.organizational_events.enabled {
10042            debug!("Phase 24b: Skipped (organizational events disabled)");
10043            return Ok(Vec::new());
10044        }
10045        info!("Phase 24b: Generating Disruption Events");
10046
10047        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10048            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10049        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
10050
10051        let company_codes: Vec<String> = self
10052            .config
10053            .companies
10054            .iter()
10055            .map(|c| c.code.clone())
10056            .collect();
10057
10058        let mut gen = datasynth_generators::disruption::DisruptionGenerator::new(self.seed + 150);
10059        let events = gen.generate(start_date, end_date, &company_codes);
10060
10061        stats.disruption_event_count = events.len();
10062        info!("Disruption events generated: {} events", events.len());
10063        self.check_resources_with_log("post-disruption-events")?;
10064
10065        Ok(events)
10066    }
10067
10068    /// Phase 25: Generate counterfactual (original, mutated) JE pairs for ML training.
10069    ///
10070    /// Produces paired examples where each pair contains the original clean JE
10071    /// and a controlled mutation (scaled amount, shifted date, self-approval, or
10072    /// split transaction). Useful for training anomaly detection models with
10073    /// known ground truth.
10074    fn phase_counterfactuals(
10075        &self,
10076        journal_entries: &[JournalEntry],
10077        stats: &mut EnhancedGenerationStatistics,
10078    ) -> SynthResult<Vec<datasynth_generators::counterfactual::CounterfactualPair>> {
10079        if !self.phase_config.generate_counterfactuals || journal_entries.is_empty() {
10080            debug!("Phase 25: Skipped (counterfactual generation disabled or no JEs)");
10081            return Ok(Vec::new());
10082        }
10083        info!("Phase 25: Generating Counterfactual Pairs for ML Training");
10084
10085        use datasynth_generators::counterfactual::{CounterfactualGenerator, CounterfactualSpec};
10086
10087        let mut gen = CounterfactualGenerator::new(self.seed + 110);
10088
10089        // Rotating set of specs to produce diverse mutation types
10090        let specs = [
10091            CounterfactualSpec::ScaleAmount { factor: 2.5 },
10092            CounterfactualSpec::ShiftDate { days: -14 },
10093            CounterfactualSpec::SelfApprove,
10094            CounterfactualSpec::SplitTransaction { split_count: 3 },
10095        ];
10096
10097        let pairs: Vec<_> = journal_entries
10098            .iter()
10099            .enumerate()
10100            .map(|(i, je)| {
10101                let spec = &specs[i % specs.len()];
10102                gen.generate(je, spec)
10103            })
10104            .collect();
10105
10106        stats.counterfactual_pair_count = pairs.len();
10107        info!(
10108            "Counterfactual pairs generated: {} pairs from {} journal entries",
10109            pairs.len(),
10110            journal_entries.len()
10111        );
10112        self.check_resources_with_log("post-counterfactuals")?;
10113
10114        Ok(pairs)
10115    }
10116
10117    /// Phase 26: Inject fraud red-flag indicators onto P2P/O2C documents.
10118    ///
10119    /// Uses the anomaly labels (from Phase 8) to determine which documents are
10120    /// fraudulent, then generates probabilistic red flags on all chain documents.
10121    /// Non-fraud documents also receive red flags at a lower rate (false positives)
10122    /// to produce realistic ML training data.
10123    fn phase_red_flags(
10124        &self,
10125        anomaly_labels: &AnomalyLabels,
10126        document_flows: &DocumentFlowSnapshot,
10127        stats: &mut EnhancedGenerationStatistics,
10128    ) -> SynthResult<Vec<datasynth_generators::fraud::RedFlag>> {
10129        if !self.config.fraud.enabled {
10130            debug!("Phase 26: Skipped (fraud generation disabled)");
10131            return Ok(Vec::new());
10132        }
10133        info!("Phase 26: Generating Fraud Red-Flag Indicators");
10134
10135        use datasynth_generators::fraud::RedFlagGenerator;
10136
10137        let generator = RedFlagGenerator::new();
10138        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(self.seed + 120);
10139
10140        // Build a set of document IDs that are known-fraudulent from anomaly labels.
10141        let fraud_doc_ids: std::collections::HashSet<&str> = anomaly_labels
10142            .labels
10143            .iter()
10144            .filter(|label| label.anomaly_type.is_intentional())
10145            .map(|label| label.document_id.as_str())
10146            .collect();
10147
10148        let mut flags = Vec::new();
10149
10150        // Iterate P2P chains: use the purchase order document ID as the chain key.
10151        for chain in &document_flows.p2p_chains {
10152            let doc_id = &chain.purchase_order.header.document_id;
10153            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10154            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10155        }
10156
10157        // Iterate O2C chains: use the sales order document ID as the chain key.
10158        for chain in &document_flows.o2c_chains {
10159            let doc_id = &chain.sales_order.header.document_id;
10160            let is_fraud = fraud_doc_ids.contains(doc_id.as_str());
10161            flags.extend(generator.inject_flags(doc_id, is_fraud, &mut rng));
10162        }
10163
10164        stats.red_flag_count = flags.len();
10165        info!(
10166            "Red flags generated: {} flags across {} P2P + {} O2C chains ({} fraud docs)",
10167            flags.len(),
10168            document_flows.p2p_chains.len(),
10169            document_flows.o2c_chains.len(),
10170            fraud_doc_ids.len()
10171        );
10172        self.check_resources_with_log("post-red-flags")?;
10173
10174        Ok(flags)
10175    }
10176
10177    /// Phase 26b: Generate collusion rings from employee/vendor pools.
10178    ///
10179    /// Gated on `fraud.enabled && fraud.clustering_enabled`. Uses the
10180    /// `CollusionRingGenerator` to create 1-3 coordinated fraud networks and
10181    /// advance them over the simulation period.
10182    fn phase_collusion_rings(
10183        &mut self,
10184        stats: &mut EnhancedGenerationStatistics,
10185    ) -> SynthResult<Vec<datasynth_generators::fraud::CollusionRing>> {
10186        if !(self.config.fraud.enabled && self.config.fraud.clustering_enabled) {
10187            debug!("Phase 26b: Skipped (fraud collusion generation disabled)");
10188            return Ok(Vec::new());
10189        }
10190        info!("Phase 26b: Generating Collusion Rings");
10191
10192        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10193            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10194        let months = self.config.global.period_months;
10195
10196        let employee_ids: Vec<String> = self
10197            .master_data
10198            .employees
10199            .iter()
10200            .map(|e| e.employee_id.clone())
10201            .collect();
10202        let vendor_ids: Vec<String> = self
10203            .master_data
10204            .vendors
10205            .iter()
10206            .map(|v| v.vendor_id.clone())
10207            .collect();
10208
10209        let mut generator =
10210            datasynth_generators::fraud::CollusionRingGenerator::new(self.seed + 160);
10211        let rings = generator.generate(&employee_ids, &vendor_ids, start_date, months);
10212
10213        stats.collusion_ring_count = rings.len();
10214        info!(
10215            "Collusion rings generated: {} rings, total members: {}",
10216            rings.len(),
10217            rings
10218                .iter()
10219                .map(datasynth_generators::fraud::CollusionRing::size)
10220                .sum::<usize>()
10221        );
10222        self.check_resources_with_log("post-collusion-rings")?;
10223
10224        Ok(rings)
10225    }
10226
10227    /// Phase 27: Generate bi-temporal version chains for vendor entities.
10228    ///
10229    /// Creates `TemporalVersionChain<Vendor>` records that model how vendor
10230    /// master data changes over time, supporting bi-temporal audit queries.
10231    fn phase_temporal_attributes(
10232        &mut self,
10233        stats: &mut EnhancedGenerationStatistics,
10234    ) -> SynthResult<
10235        Vec<datasynth_core::models::TemporalVersionChain<datasynth_core::models::Vendor>>,
10236    > {
10237        if !self.config.temporal_attributes.enabled {
10238            debug!("Phase 27: Skipped (temporal attributes disabled)");
10239            return Ok(Vec::new());
10240        }
10241        info!("Phase 27: Generating Bi-Temporal Vendor Version Chains");
10242
10243        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10244            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10245
10246        // Build a TemporalAttributeConfig from the user's config.
10247        // Since Phase 27 is already gated on temporal_attributes.enabled,
10248        // default to enabling version chains so users get actual mutations.
10249        let generate_version_chains = self.config.temporal_attributes.generate_version_chains
10250            || self.config.temporal_attributes.enabled;
10251        let temporal_config = {
10252            let ta = &self.config.temporal_attributes;
10253            datasynth_generators::temporal::TemporalAttributeConfigBuilder::new()
10254                .enabled(ta.enabled)
10255                .closed_probability(ta.valid_time.closed_probability)
10256                .avg_validity_days(ta.valid_time.avg_validity_days)
10257                .avg_recording_delay(ta.transaction_time.avg_recording_delay_seconds)
10258                .with_version_chains(if generate_version_chains {
10259                    ta.avg_versions_per_entity
10260                } else {
10261                    1.0
10262                })
10263                .build()
10264        };
10265        // Apply backdating settings if configured
10266        let temporal_config = if self
10267            .config
10268            .temporal_attributes
10269            .transaction_time
10270            .allow_backdating
10271        {
10272            let mut c = temporal_config;
10273            c.transaction_time.allow_backdating = true;
10274            c.transaction_time.backdating_probability = self
10275                .config
10276                .temporal_attributes
10277                .transaction_time
10278                .backdating_probability;
10279            c.transaction_time.max_backdate_days = self
10280                .config
10281                .temporal_attributes
10282                .transaction_time
10283                .max_backdate_days;
10284            c
10285        } else {
10286            temporal_config
10287        };
10288        let mut gen = datasynth_generators::temporal::TemporalAttributeGenerator::new(
10289            temporal_config,
10290            self.seed + 130,
10291            start_date,
10292        );
10293
10294        let uuid_factory = datasynth_core::DeterministicUuidFactory::new(
10295            self.seed + 130,
10296            datasynth_core::GeneratorType::Vendor,
10297        );
10298
10299        let chains: Vec<_> = self
10300            .master_data
10301            .vendors
10302            .iter()
10303            .map(|vendor| {
10304                let id = uuid_factory.next();
10305                gen.generate_version_chain(vendor.clone(), id)
10306            })
10307            .collect();
10308
10309        stats.temporal_version_chain_count = chains.len();
10310        info!("Temporal version chains generated: {} chains", chains.len());
10311        self.check_resources_with_log("post-temporal-attributes")?;
10312
10313        Ok(chains)
10314    }
10315
10316    /// Phase 28: Build entity relationship graph and cross-process links.
10317    ///
10318    /// Part 1 (gated on `relationship_strength.enabled`): builds an
10319    /// `EntityGraph` from master-data vendor/customer entities and
10320    /// journal-entry-derived transaction summaries.
10321    ///
10322    /// Part 2 (gated on `cross_process_links.enabled`): extracts
10323    /// `GoodsReceiptRef` / `DeliveryRef` from document flow chains and
10324    /// generates inventory-movement cross-process links.
10325    fn phase_entity_relationships(
10326        &self,
10327        journal_entries: &[JournalEntry],
10328        document_flows: &DocumentFlowSnapshot,
10329        stats: &mut EnhancedGenerationStatistics,
10330    ) -> SynthResult<(
10331        Option<datasynth_core::models::EntityGraph>,
10332        Vec<datasynth_core::models::CrossProcessLink>,
10333    )> {
10334        use datasynth_generators::relationships::{
10335            DeliveryRef, EntityGraphConfig, EntityGraphGenerator, EntitySummary, GoodsReceiptRef,
10336            TransactionSummary,
10337        };
10338
10339        let rs_enabled = self.config.relationship_strength.enabled;
10340        let cpl_enabled = self.config.cross_process_links.enabled
10341            || (!document_flows.p2p_chains.is_empty() && !document_flows.o2c_chains.is_empty());
10342
10343        if !rs_enabled && !cpl_enabled {
10344            debug!(
10345                "Phase 28: Skipped (relationship_strength and cross_process_links both disabled)"
10346            );
10347            return Ok((None, Vec::new()));
10348        }
10349
10350        info!("Phase 28: Generating Entity Relationship Graph + Cross-Process Links");
10351
10352        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10353            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10354
10355        let company_code = self
10356            .config
10357            .companies
10358            .first()
10359            .map(|c| c.code.as_str())
10360            .unwrap_or("1000");
10361
10362        // Build the generator with matching config flags
10363        let gen_config = EntityGraphConfig {
10364            enabled: rs_enabled,
10365            cross_process: datasynth_generators::relationships::CrossProcessConfig {
10366                enable_inventory_links: self.config.cross_process_links.inventory_p2p_o2c,
10367                enable_return_flows: false,
10368                enable_payment_links: self.config.cross_process_links.payment_bank_reconciliation,
10369                enable_ic_bilateral: self.config.cross_process_links.intercompany_bilateral,
10370                // Use higher link rate for small datasets to avoid probabilistic empty results
10371                inventory_link_rate: if document_flows.p2p_chains.len() <= 10 {
10372                    1.0
10373                } else {
10374                    0.30
10375                },
10376                ..Default::default()
10377            },
10378            strength_config: datasynth_generators::relationships::StrengthConfig {
10379                transaction_volume_weight: self
10380                    .config
10381                    .relationship_strength
10382                    .calculation
10383                    .transaction_volume_weight,
10384                transaction_count_weight: self
10385                    .config
10386                    .relationship_strength
10387                    .calculation
10388                    .transaction_count_weight,
10389                duration_weight: self
10390                    .config
10391                    .relationship_strength
10392                    .calculation
10393                    .relationship_duration_weight,
10394                recency_weight: self.config.relationship_strength.calculation.recency_weight,
10395                mutual_connections_weight: self
10396                    .config
10397                    .relationship_strength
10398                    .calculation
10399                    .mutual_connections_weight,
10400                recency_half_life_days: self
10401                    .config
10402                    .relationship_strength
10403                    .calculation
10404                    .recency_half_life_days,
10405            },
10406            ..Default::default()
10407        };
10408
10409        let mut gen = EntityGraphGenerator::with_config(self.seed + 140, gen_config);
10410
10411        // --- Part 1: Entity Relationship Graph ---
10412        let entity_graph = if rs_enabled {
10413            // Build EntitySummary lists from master data
10414            let vendor_summaries: Vec<EntitySummary> = self
10415                .master_data
10416                .vendors
10417                .iter()
10418                .map(|v| {
10419                    EntitySummary::new(
10420                        &v.vendor_id,
10421                        &v.name,
10422                        datasynth_core::models::GraphEntityType::Vendor,
10423                        start_date,
10424                    )
10425                })
10426                .collect();
10427
10428            let customer_summaries: Vec<EntitySummary> = self
10429                .master_data
10430                .customers
10431                .iter()
10432                .map(|c| {
10433                    EntitySummary::new(
10434                        &c.customer_id,
10435                        &c.name,
10436                        datasynth_core::models::GraphEntityType::Customer,
10437                        start_date,
10438                    )
10439                })
10440                .collect();
10441
10442            // Build transaction summaries from journal entries.
10443            // Key = (company_code, trading_partner) for entries that have a
10444            // trading partner.  This captures intercompany flows and any JE
10445            // whose line items carry a trading_partner reference.
10446            let mut txn_summaries: std::collections::HashMap<(String, String), TransactionSummary> =
10447                std::collections::HashMap::new();
10448
10449            for je in journal_entries {
10450                let cc = je.header.company_code.clone();
10451                let posting_date = je.header.posting_date;
10452                for line in &je.lines {
10453                    if let Some(ref tp) = line.trading_partner {
10454                        let amount = if line.debit_amount > line.credit_amount {
10455                            line.debit_amount
10456                        } else {
10457                            line.credit_amount
10458                        };
10459                        let entry = txn_summaries
10460                            .entry((cc.clone(), tp.clone()))
10461                            .or_insert_with(|| TransactionSummary {
10462                                total_volume: rust_decimal::Decimal::ZERO,
10463                                transaction_count: 0,
10464                                first_transaction_date: posting_date,
10465                                last_transaction_date: posting_date,
10466                                related_entities: std::collections::HashSet::new(),
10467                            });
10468                        entry.total_volume += amount;
10469                        entry.transaction_count += 1;
10470                        if posting_date < entry.first_transaction_date {
10471                            entry.first_transaction_date = posting_date;
10472                        }
10473                        if posting_date > entry.last_transaction_date {
10474                            entry.last_transaction_date = posting_date;
10475                        }
10476                        entry.related_entities.insert(cc.clone());
10477                    }
10478                }
10479            }
10480
10481            // Also extract transaction relationships from document flow chains.
10482            // P2P chains: Company → Vendor relationships
10483            for chain in &document_flows.p2p_chains {
10484                let cc = chain.purchase_order.header.company_code.clone();
10485                let vendor_id = chain.purchase_order.vendor_id.clone();
10486                let po_date = chain.purchase_order.header.document_date;
10487                let amount = chain.purchase_order.total_net_amount;
10488
10489                let entry = txn_summaries
10490                    .entry((cc.clone(), vendor_id))
10491                    .or_insert_with(|| TransactionSummary {
10492                        total_volume: rust_decimal::Decimal::ZERO,
10493                        transaction_count: 0,
10494                        first_transaction_date: po_date,
10495                        last_transaction_date: po_date,
10496                        related_entities: std::collections::HashSet::new(),
10497                    });
10498                entry.total_volume += amount;
10499                entry.transaction_count += 1;
10500                if po_date < entry.first_transaction_date {
10501                    entry.first_transaction_date = po_date;
10502                }
10503                if po_date > entry.last_transaction_date {
10504                    entry.last_transaction_date = po_date;
10505                }
10506                entry.related_entities.insert(cc);
10507            }
10508
10509            // O2C chains: Company → Customer relationships
10510            for chain in &document_flows.o2c_chains {
10511                let cc = chain.sales_order.header.company_code.clone();
10512                let customer_id = chain.sales_order.customer_id.clone();
10513                let so_date = chain.sales_order.header.document_date;
10514                let amount = chain.sales_order.total_net_amount;
10515
10516                let entry = txn_summaries
10517                    .entry((cc.clone(), customer_id))
10518                    .or_insert_with(|| TransactionSummary {
10519                        total_volume: rust_decimal::Decimal::ZERO,
10520                        transaction_count: 0,
10521                        first_transaction_date: so_date,
10522                        last_transaction_date: so_date,
10523                        related_entities: std::collections::HashSet::new(),
10524                    });
10525                entry.total_volume += amount;
10526                entry.transaction_count += 1;
10527                if so_date < entry.first_transaction_date {
10528                    entry.first_transaction_date = so_date;
10529                }
10530                if so_date > entry.last_transaction_date {
10531                    entry.last_transaction_date = so_date;
10532                }
10533                entry.related_entities.insert(cc);
10534            }
10535
10536            let as_of_date = journal_entries
10537                .last()
10538                .map(|je| je.header.posting_date)
10539                .unwrap_or(start_date);
10540
10541            let graph = gen.generate_entity_graph(
10542                company_code,
10543                as_of_date,
10544                &vendor_summaries,
10545                &customer_summaries,
10546                &txn_summaries,
10547            );
10548
10549            info!(
10550                "Entity relationship graph: {} nodes, {} edges",
10551                graph.nodes.len(),
10552                graph.edges.len()
10553            );
10554            stats.entity_relationship_node_count = graph.nodes.len();
10555            stats.entity_relationship_edge_count = graph.edges.len();
10556            Some(graph)
10557        } else {
10558            None
10559        };
10560
10561        // --- Part 2: Cross-Process Links ---
10562        let cross_process_links = if cpl_enabled {
10563            // Build GoodsReceiptRef from P2P chains
10564            let gr_refs: Vec<GoodsReceiptRef> = document_flows
10565                .p2p_chains
10566                .iter()
10567                .flat_map(|chain| {
10568                    let vendor_id = chain.purchase_order.vendor_id.clone();
10569                    let cc = chain.purchase_order.header.company_code.clone();
10570                    chain.goods_receipts.iter().flat_map(move |gr| {
10571                        gr.items.iter().filter_map({
10572                            let doc_id = gr.header.document_id.clone();
10573                            let v_id = vendor_id.clone();
10574                            let company = cc.clone();
10575                            let receipt_date = gr.header.document_date;
10576                            move |item| {
10577                                item.base
10578                                    .material_id
10579                                    .as_ref()
10580                                    .map(|mat_id| GoodsReceiptRef {
10581                                        document_id: doc_id.clone(),
10582                                        material_id: mat_id.clone(),
10583                                        quantity: item.base.quantity,
10584                                        receipt_date,
10585                                        vendor_id: v_id.clone(),
10586                                        company_code: company.clone(),
10587                                    })
10588                            }
10589                        })
10590                    })
10591                })
10592                .collect();
10593
10594            // Build DeliveryRef from O2C chains
10595            let del_refs: Vec<DeliveryRef> = document_flows
10596                .o2c_chains
10597                .iter()
10598                .flat_map(|chain| {
10599                    let customer_id = chain.sales_order.customer_id.clone();
10600                    let cc = chain.sales_order.header.company_code.clone();
10601                    chain.deliveries.iter().flat_map(move |del| {
10602                        let delivery_date = del.actual_gi_date.unwrap_or(del.planned_gi_date);
10603                        del.items.iter().filter_map({
10604                            let doc_id = del.header.document_id.clone();
10605                            let c_id = customer_id.clone();
10606                            let company = cc.clone();
10607                            move |item| {
10608                                item.base.material_id.as_ref().map(|mat_id| DeliveryRef {
10609                                    document_id: doc_id.clone(),
10610                                    material_id: mat_id.clone(),
10611                                    quantity: item.base.quantity,
10612                                    delivery_date,
10613                                    customer_id: c_id.clone(),
10614                                    company_code: company.clone(),
10615                                })
10616                            }
10617                        })
10618                    })
10619                })
10620                .collect();
10621
10622            let links = gen.generate_cross_process_links(&gr_refs, &del_refs);
10623            info!("Cross-process links generated: {} links", links.len());
10624            stats.cross_process_link_count = links.len();
10625            links
10626        } else {
10627            Vec::new()
10628        };
10629
10630        self.check_resources_with_log("post-entity-relationships")?;
10631        Ok((entity_graph, cross_process_links))
10632    }
10633
10634    /// Phase 29: Generate industry-specific GL accounts via factory dispatch.
10635    fn phase_industry_data(
10636        &self,
10637        stats: &mut EnhancedGenerationStatistics,
10638    ) -> Option<datasynth_generators::industry::factory::IndustryOutput> {
10639        if !self.config.industry_specific.enabled {
10640            return None;
10641        }
10642        info!("Phase 29: Generating industry-specific data");
10643        let output = datasynth_generators::industry::factory::generate_industry_output(
10644            self.config.global.industry,
10645        );
10646        stats.industry_gl_account_count = output.gl_accounts.len();
10647        info!(
10648            "Industry data generated: {} GL accounts for {:?}",
10649            output.gl_accounts.len(),
10650            self.config.global.industry
10651        );
10652        Some(output)
10653    }
10654
10655    /// Phase 3b: Generate opening balances for each company.
10656    ///
10657    /// # Order of precedence
10658    ///
10659    /// 1. **v5.3 chain carryover** (ShardContext.opening_balances non-empty):
10660    ///    convert each EntityOpeningBalance into a
10661    ///    GeneratedOpeningBalance per company. This branch runs
10662    ///    UNCONDITIONALLY — even when `balance.generate_opening_balances`
10663    ///    is `false` — so a non-overlay preset that gets driven through
10664    ///    `group generate-chain` still applies the prior-year carry-
10665    ///    forward instead of silently dropping it.
10666    /// 2. **`generate_opening_balances` flag**: if off (and no carryover),
10667    ///    return empty Vec.
10668    /// 3. **OpeningBalanceGenerator**: industry-mix sampler for the
10669    ///    period-0 engagement.
10670    fn phase_opening_balances(
10671        &mut self,
10672        coa: &Arc<ChartOfAccounts>,
10673        stats: &mut EnhancedGenerationStatistics,
10674    ) -> SynthResult<Vec<GeneratedOpeningBalance>> {
10675        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10676            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10677        let fiscal_year = start_date.year();
10678
10679        // 1. v5.3 chain carryover — runs unconditionally when present.
10680        if let Some(ctx) = &self.shard_context {
10681            if !ctx.opening_balances.is_empty() {
10682                info!(
10683                    "Phase 3b: applying v5.3 opening-balance carryover ({} accounts × {} companies)",
10684                    ctx.opening_balances.len(),
10685                    self.config.companies.len(),
10686                );
10687                let mut results = Vec::new();
10688                for company in &self.config.companies {
10689                    let balances: std::collections::HashMap<String, rust_decimal::Decimal> = ctx
10690                        .opening_balances
10691                        .iter()
10692                        .map(|ob| (ob.account_code.clone(), ob.net_balance()))
10693                        .collect();
10694                    let total_assets = ctx
10695                        .opening_balances
10696                        .iter()
10697                        .filter(|ob| {
10698                            matches!(
10699                                ob.account_type,
10700                                AccountType::Asset | AccountType::ContraAsset
10701                            )
10702                        })
10703                        .map(|ob| ob.net_balance())
10704                        .sum::<rust_decimal::Decimal>();
10705                    let total_liabilities = ctx
10706                        .opening_balances
10707                        .iter()
10708                        .filter(|ob| {
10709                            matches!(
10710                                ob.account_type,
10711                                AccountType::Liability | AccountType::ContraLiability
10712                            )
10713                        })
10714                        .map(|ob| ob.net_balance())
10715                        .sum::<rust_decimal::Decimal>();
10716                    let total_equity = ctx
10717                        .opening_balances
10718                        .iter()
10719                        .filter(|ob| {
10720                            matches!(
10721                                ob.account_type,
10722                                AccountType::Equity | AccountType::ContraEquity
10723                            )
10724                        })
10725                        .map(|ob| ob.net_balance())
10726                        .sum::<rust_decimal::Decimal>();
10727                    let is_balanced = (total_assets - total_liabilities - total_equity).abs()
10728                        < rust_decimal::Decimal::ONE;
10729                    results.push(GeneratedOpeningBalance {
10730                        company_code: company.code.clone(),
10731                        as_of_date: start_date,
10732                        balances,
10733                        total_assets,
10734                        total_liabilities,
10735                        total_equity,
10736                        is_balanced,
10737                        calculated_ratios: datasynth_core::models::balance::CalculatedRatios {
10738                            current_ratio: None,
10739                            quick_ratio: None,
10740                            debt_to_equity: None,
10741                            working_capital: rust_decimal::Decimal::ZERO,
10742                        },
10743                    });
10744                }
10745                stats.opening_balance_count = results.len();
10746                self.check_resources_with_log("post-opening-balances")?;
10747                return Ok(results);
10748            }
10749        }
10750
10751        // 2. Generator path is opt-in via the config flag.
10752        if !self.config.balance.generate_opening_balances {
10753            debug!("Phase 3b: Skipped (opening balance generation disabled)");
10754            return Ok(Vec::new());
10755        }
10756        info!("Phase 3b: Generating Opening Balances");
10757
10758        // 3. OpeningBalanceGenerator — industry-mix sampler for period 0.
10759        let industry = match self.config.global.industry {
10760            IndustrySector::Manufacturing => IndustryType::Manufacturing,
10761            IndustrySector::Retail => IndustryType::Retail,
10762            IndustrySector::FinancialServices => IndustryType::Financial,
10763            IndustrySector::Healthcare => IndustryType::Healthcare,
10764            IndustrySector::Technology => IndustryType::Technology,
10765            _ => IndustryType::Manufacturing,
10766        };
10767
10768        let config = datasynth_generators::OpeningBalanceConfig {
10769            industry,
10770            ..Default::default()
10771        };
10772        let mut gen =
10773            datasynth_generators::OpeningBalanceGenerator::with_seed(config, self.seed + 200);
10774
10775        let mut results = Vec::new();
10776        for company in &self.config.companies {
10777            let spec = OpeningBalanceSpec::new(
10778                company.code.clone(),
10779                start_date,
10780                fiscal_year,
10781                company.currency.clone(),
10782                rust_decimal::Decimal::new(10_000_000, 0),
10783                industry,
10784            );
10785            let ob = gen.generate(&spec, coa, start_date, &company.code);
10786            results.push(ob);
10787        }
10788
10789        stats.opening_balance_count = results.len();
10790        info!("Opening balances generated: {} companies", results.len());
10791        self.check_resources_with_log("post-opening-balances")?;
10792
10793        Ok(results)
10794    }
10795
10796    /// Phase 9b: Reconcile GL control accounts to subledger balances.
10797    fn phase_subledger_reconciliation(
10798        &mut self,
10799        subledger: &SubledgerSnapshot,
10800        entries: &[JournalEntry],
10801        stats: &mut EnhancedGenerationStatistics,
10802    ) -> SynthResult<Vec<datasynth_generators::ReconciliationResult>> {
10803        if !self.config.balance.reconcile_subledgers {
10804            debug!("Phase 9b: Skipped (subledger reconciliation disabled)");
10805            return Ok(Vec::new());
10806        }
10807        info!("Phase 9b: Reconciling GL to subledger balances");
10808
10809        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
10810            .map(|d| d + chrono::Months::new(self.config.global.period_months))
10811            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
10812
10813        // Build GL balance map from journal entries using a balance tracker
10814        let tracker_config = BalanceTrackerConfig {
10815            validate_on_each_entry: false,
10816            track_history: false,
10817            fail_on_validation_error: false,
10818            ..Default::default()
10819        };
10820        let recon_currency = self
10821            .config
10822            .companies
10823            .first()
10824            .map(|c| c.currency.clone())
10825            .unwrap_or_else(|| "USD".to_string());
10826        let mut tracker = RunningBalanceTracker::new_with_currency(tracker_config, recon_currency);
10827        let validation_errors = tracker.apply_entries(entries);
10828        if !validation_errors.is_empty() {
10829            warn!(
10830                error_count = validation_errors.len(),
10831                "Balance tracker encountered validation errors during subledger reconciliation"
10832            );
10833            for err in &validation_errors {
10834                debug!("Balance validation error: {:?}", err);
10835            }
10836        }
10837
10838        let mut engine = datasynth_generators::ReconciliationEngine::new(
10839            datasynth_generators::ReconciliationConfig::default(),
10840        );
10841
10842        let mut results = Vec::new();
10843        let company_code = self
10844            .config
10845            .companies
10846            .first()
10847            .map(|c| c.code.as_str())
10848            .unwrap_or("1000");
10849
10850        // Reconcile AR
10851        if !subledger.ar_invoices.is_empty() {
10852            let gl_balance = tracker
10853                .get_account_balance(
10854                    company_code,
10855                    datasynth_core::accounts::control_accounts::AR_CONTROL,
10856                )
10857                .map(|b| b.closing_balance)
10858                .unwrap_or_default();
10859            let ar_refs: Vec<&ARInvoice> = subledger.ar_invoices.iter().collect();
10860            results.push(engine.reconcile_ar(company_code, end_date, gl_balance, &ar_refs));
10861        }
10862
10863        // Reconcile AP
10864        if !subledger.ap_invoices.is_empty() {
10865            let gl_balance = tracker
10866                .get_account_balance(
10867                    company_code,
10868                    datasynth_core::accounts::control_accounts::AP_CONTROL,
10869                )
10870                .map(|b| b.closing_balance)
10871                .unwrap_or_default();
10872            let ap_refs: Vec<&APInvoice> = subledger.ap_invoices.iter().collect();
10873            results.push(engine.reconcile_ap(company_code, end_date, gl_balance, &ap_refs));
10874        }
10875
10876        // Reconcile FA
10877        if !subledger.fa_records.is_empty() {
10878            let gl_asset_balance = tracker
10879                .get_account_balance(
10880                    company_code,
10881                    datasynth_core::accounts::control_accounts::FIXED_ASSETS,
10882                )
10883                .map(|b| b.closing_balance)
10884                .unwrap_or_default();
10885            let gl_accum_depr_balance = tracker
10886                .get_account_balance(
10887                    company_code,
10888                    datasynth_core::accounts::control_accounts::ACCUMULATED_DEPRECIATION,
10889                )
10890                .map(|b| b.closing_balance)
10891                .unwrap_or_default();
10892            let fa_refs: Vec<&datasynth_core::models::subledger::fa::FixedAssetRecord> =
10893                subledger.fa_records.iter().collect();
10894            let (asset_recon, depr_recon) = engine.reconcile_fa(
10895                company_code,
10896                end_date,
10897                gl_asset_balance,
10898                gl_accum_depr_balance,
10899                &fa_refs,
10900            );
10901            results.push(asset_recon);
10902            results.push(depr_recon);
10903        }
10904
10905        // Reconcile Inventory
10906        if !subledger.inventory_positions.is_empty() {
10907            let gl_balance = tracker
10908                .get_account_balance(
10909                    company_code,
10910                    datasynth_core::accounts::control_accounts::INVENTORY,
10911                )
10912                .map(|b| b.closing_balance)
10913                .unwrap_or_default();
10914            let inv_refs: Vec<&datasynth_core::models::subledger::inventory::InventoryPosition> =
10915                subledger.inventory_positions.iter().collect();
10916            results.push(engine.reconcile_inventory(company_code, end_date, gl_balance, &inv_refs));
10917        }
10918
10919        stats.subledger_reconciliation_count = results.len();
10920        let passed = results.iter().filter(|r| r.is_balanced()).count();
10921        let failed = results.len() - passed;
10922        info!(
10923            "Subledger reconciliation: {} checks, {} passed, {} failed",
10924            results.len(),
10925            passed,
10926            failed
10927        );
10928        self.check_resources_with_log("post-subledger-reconciliation")?;
10929
10930        Ok(results)
10931    }
10932
10933    /// Generate the chart of accounts.
10934    fn generate_coa(&mut self) -> SynthResult<Arc<ChartOfAccounts>> {
10935        let pb = self.create_progress_bar(1, "Generating Chart of Accounts");
10936
10937        let coa_framework = self.resolve_coa_framework();
10938
10939        let mut gen = ChartOfAccountsGenerator::new(
10940            self.config.chart_of_accounts.complexity,
10941            self.config.global.industry,
10942            self.seed,
10943        )
10944        .with_coa_framework(coa_framework)
10945        // v5.7.0 — honour the opt-in industry-pack expansion flag.
10946        .with_expand_industry_subaccounts(
10947            self.config.chart_of_accounts.expand_industry_subaccounts,
10948        );
10949
10950        let mut built = gen.generate();
10951        // v4.4.1: propagate the accounting framework label from config
10952        // onto the CoA struct so SDK consumers can read it without
10953        // cross-referencing the config (they previously saw null).
10954        if self.config.accounting_standards.enabled {
10955            use datasynth_config::schema::AccountingFrameworkConfig;
10956            built.accounting_framework = self.config.accounting_standards.framework.map(|f| {
10957                match f {
10958                    AccountingFrameworkConfig::UsGaap => "us_gaap",
10959                    AccountingFrameworkConfig::Ifrs => "ifrs",
10960                    AccountingFrameworkConfig::FrenchGaap => "french_gaap",
10961                    AccountingFrameworkConfig::GermanGaap => "german_gaap",
10962                    AccountingFrameworkConfig::DualReporting => "dual_reporting",
10963                }
10964                .to_string()
10965            });
10966        }
10967        // SP4.2 W8.2 + W7.1 — remap synthetic account numbers to corpus
10968        // ones first (W8.2), then enrich descriptions via the overlay (W7.1).
10969        // Applied before Arc::new so we only build one Arc (no clone needed).
10970        if let Some(ref cached) = self.cached_priors {
10971            if let Some(ref coa_prior) = cached.coa_semantic {
10972                use datasynth_generators::coa_generator::{
10973                    remap_account_numbers_to_prior, ChartOfAccountsGenerator,
10974                };
10975                // W8.2 — replace synthetic account numbers with corpus
10976                // ones so the W7.1 overlay fires at ~80% instead of ~16%.
10977                let mut rng =
10978                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_200));
10979                let remapped = remap_account_numbers_to_prior(&mut built, coa_prior, &mut rng);
10980                tracing::info!(
10981                    target: "datasynth_runtime::coa",
10982                    remapped,
10983                    total = built.accounts.len(),
10984                    "SP4.2 W8.2 — remapped synthetic account numbers to prior-matched corpus values"
10985                );
10986                // W7.1 — now overlay descriptions / class metadata for the
10987                // (now mostly corpus-numbered) accounts.
10988                let applied =
10989                    ChartOfAccountsGenerator::apply_coa_semantic_prior(&mut built, coa_prior);
10990                tracing::info!(
10991                    target: "datasynth_runtime::coa",
10992                    applied,
10993                    total = built.accounts.len(),
10994                    "SP4.2 W7.1 — overlaid real CoA semantic entries onto synthetic accounts"
10995                );
10996            }
10997            // SP6 — taxonomy overlay: run AFTER the semantic overlay so
10998            // taxonomy-templated accounts take precedence over verbatim
10999            // semantic descriptions.  Uses SyntheticExampleResolver because
11000            // the CoA is built before master-data pools are populated (so
11001            // vendor/customer names are not yet available).
11002            if let Some(tx) = cached.text_taxonomy.as_ref() {
11003                use datasynth_core::distributions::text_taxonomy::SyntheticExampleResolver;
11004                use datasynth_generators::coa_generator::overlay_coa_taxonomy;
11005                let mut resolver = SyntheticExampleResolver;
11006                let mut rng =
11007                    rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(88_201));
11008                overlay_coa_taxonomy(&mut built, tx, &mut resolver, &mut rng);
11009                tracing::info!(
11010                    target: "datasynth_runtime::coa",
11011                    total = built.accounts.len(),
11012                    "SP6 — overlaid text-taxonomy templates onto CoA descriptions"
11013                );
11014            }
11015        }
11016
11017        let coa = Arc::new(built);
11018        self.coa = Some(Arc::clone(&coa));
11019
11020        if let Some(pb) = pb {
11021            pb.finish_with_message("Chart of Accounts complete");
11022        }
11023
11024        Ok(coa)
11025    }
11026
11027    /// Generate master data entities.
11028    fn generate_master_data(&mut self) -> SynthResult<()> {
11029        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11030            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11031        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11032
11033        let total = self.config.companies.len() as u64 * 5; // 5 entity types
11034        let pb = self.create_progress_bar(total, "Generating Master Data");
11035
11036        // Resolve country pack once for all companies (uses primary company's country)
11037        let pack = self.primary_pack().clone();
11038
11039        // Capture config values needed inside the parallel closure
11040        let vendors_per_company = self.phase_config.vendors_per_company;
11041        let customers_per_company = self.phase_config.customers_per_company;
11042        let materials_per_company = self.phase_config.materials_per_company;
11043        let assets_per_company = self.phase_config.assets_per_company;
11044        let coa_framework = self.resolve_coa_framework();
11045
11046        // Generate all master data in parallel across companies.
11047        // Each company's data is independent, making this embarrassingly parallel.
11048        let per_company_results: Vec<_> = self
11049            .config
11050            .companies
11051            .par_iter()
11052            .enumerate()
11053            .map(|(i, company)| {
11054                let company_seed = self.seed.wrapping_add(i as u64 * 1000);
11055                let pack = pack.clone();
11056
11057                // Generate vendors (offset counter so IDs are globally unique across companies)
11058                let mut vendor_gen = VendorGenerator::new(company_seed);
11059                vendor_gen.set_country_pack(pack.clone());
11060                vendor_gen.set_coa_framework(coa_framework);
11061                vendor_gen.set_counter_offset(i * vendors_per_company);
11062                // v3.2.0+: user-supplied bank names (and future template
11063                // strings) flow through the shared provider.
11064                vendor_gen.set_template_provider(self.template_provider.clone());
11065                // Wire vendor network config when enabled
11066                if self.config.vendor_network.enabled {
11067                    let vn = &self.config.vendor_network;
11068                    vendor_gen.set_network_config(datasynth_generators::VendorNetworkConfig {
11069                        enabled: true,
11070                        depth: vn.depth,
11071                        tier1_count: datasynth_generators::TierCountConfig::new(
11072                            vn.tier1.min,
11073                            vn.tier1.max,
11074                        ),
11075                        tier2_per_parent: datasynth_generators::TierCountConfig::new(
11076                            vn.tier2_per_parent.min,
11077                            vn.tier2_per_parent.max,
11078                        ),
11079                        tier3_per_parent: datasynth_generators::TierCountConfig::new(
11080                            vn.tier3_per_parent.min,
11081                            vn.tier3_per_parent.max,
11082                        ),
11083                        cluster_distribution: datasynth_generators::ClusterDistribution {
11084                            reliable_strategic: vn.clusters.reliable_strategic,
11085                            standard_operational: vn.clusters.standard_operational,
11086                            transactional: vn.clusters.transactional,
11087                            problematic: vn.clusters.problematic,
11088                        },
11089                        concentration_limits: datasynth_generators::ConcentrationLimits {
11090                            max_single_vendor: vn.dependencies.max_single_vendor_concentration,
11091                            max_top5: vn.dependencies.top_5_concentration,
11092                        },
11093                        ..datasynth_generators::VendorNetworkConfig::default()
11094                    });
11095                }
11096                let vendor_pool =
11097                    vendor_gen.generate_vendor_pool(vendors_per_company, &company.code, start_date);
11098
11099                // Generate customers (offset counter so IDs are globally unique across companies)
11100                let mut customer_gen = CustomerGenerator::new(company_seed + 100);
11101                customer_gen.set_country_pack(pack.clone());
11102                customer_gen.set_coa_framework(coa_framework);
11103                customer_gen.set_counter_offset(i * customers_per_company);
11104                // v3.2.0+: user-supplied customer names flow through the shared provider.
11105                customer_gen.set_template_provider(self.template_provider.clone());
11106                // Wire customer segmentation config when enabled
11107                if self.config.customer_segmentation.enabled {
11108                    let cs = &self.config.customer_segmentation;
11109                    let seg_cfg = datasynth_generators::CustomerSegmentationConfig {
11110                        enabled: true,
11111                        segment_distribution: datasynth_generators::SegmentDistribution {
11112                            enterprise: cs.value_segments.enterprise.customer_share,
11113                            mid_market: cs.value_segments.mid_market.customer_share,
11114                            smb: cs.value_segments.smb.customer_share,
11115                            consumer: cs.value_segments.consumer.customer_share,
11116                        },
11117                        referral_config: datasynth_generators::ReferralConfig {
11118                            enabled: cs.networks.referrals.enabled,
11119                            referral_rate: cs.networks.referrals.referral_rate,
11120                            ..Default::default()
11121                        },
11122                        hierarchy_config: datasynth_generators::HierarchyConfig {
11123                            enabled: cs.networks.corporate_hierarchies.enabled,
11124                            hierarchy_rate: cs.networks.corporate_hierarchies.probability,
11125                            ..Default::default()
11126                        },
11127                        ..Default::default()
11128                    };
11129                    customer_gen.set_segmentation_config(seg_cfg);
11130                }
11131                let customer_pool = customer_gen.generate_customer_pool(
11132                    customers_per_company,
11133                    &company.code,
11134                    start_date,
11135                );
11136
11137                // Generate materials (offset counter so IDs are globally unique across companies)
11138                let mut material_gen = MaterialGenerator::new(company_seed + 200);
11139                material_gen.set_country_pack(pack.clone());
11140                material_gen.set_counter_offset(i * materials_per_company);
11141                // v3.2.1+: user-supplied material descriptions flow through shared provider
11142                material_gen.set_template_provider(self.template_provider.clone());
11143                let material_pool = material_gen.generate_material_pool(
11144                    materials_per_company,
11145                    &company.code,
11146                    start_date,
11147                );
11148
11149                // Generate fixed assets
11150                let mut asset_gen = AssetGenerator::new(company_seed + 300);
11151                // v3.2.1+: user-supplied asset descriptions flow through shared provider
11152                asset_gen.set_template_provider(self.template_provider.clone());
11153                let asset_pool = asset_gen.generate_asset_pool(
11154                    assets_per_company,
11155                    &company.code,
11156                    (start_date, end_date),
11157                );
11158
11159                // Generate employees
11160                let mut employee_gen = EmployeeGenerator::new(company_seed + 400);
11161                employee_gen.set_country_pack(pack);
11162                // v3.2.1+: user-supplied department names flow through shared provider
11163                employee_gen.set_template_provider(self.template_provider.clone());
11164                let employee_pool =
11165                    employee_gen.generate_company_pool(&company.code, (start_date, end_date));
11166
11167                // Generate employee change history (2-5 events per employee)
11168                let employee_change_history =
11169                    employee_gen.generate_all_change_history(&employee_pool, end_date);
11170
11171                // Generate cost center hierarchy (level-1 departments + level-2 sub-departments)
11172                let employee_ids: Vec<String> = employee_pool
11173                    .employees
11174                    .iter()
11175                    .map(|e| e.employee_id.clone())
11176                    .collect();
11177                let mut cc_gen = datasynth_generators::CostCenterGenerator::new(company_seed + 500);
11178                let cost_centers = cc_gen.generate_for_company(&company.code, &employee_ids);
11179
11180                // v5.1: profit centre hierarchy (two-level: top-level
11181                // segment / region / product-group nodes + sub-units).
11182                let mut pc_gen =
11183                    datasynth_generators::ProfitCenterGenerator::new(company_seed + 600);
11184                let profit_centers = pc_gen.generate_for_company(&company.code, &employee_ids);
11185
11186                (
11187                    vendor_pool.vendors,
11188                    customer_pool.customers,
11189                    material_pool.materials,
11190                    asset_pool.assets,
11191                    employee_pool.employees,
11192                    employee_change_history,
11193                    cost_centers,
11194                    profit_centers,
11195                )
11196            })
11197            .collect();
11198
11199        // Aggregate results from all companies
11200        for (
11201            vendors,
11202            customers,
11203            materials,
11204            assets,
11205            employees,
11206            change_history,
11207            cost_centers,
11208            profit_centers,
11209        ) in per_company_results
11210        {
11211            self.master_data.vendors.extend(vendors);
11212            self.master_data.customers.extend(customers);
11213            self.master_data.materials.extend(materials);
11214            self.master_data.assets.extend(assets);
11215            self.master_data.employees.extend(employees);
11216            self.master_data.cost_centers.extend(cost_centers);
11217            self.master_data.profit_centers.extend(profit_centers);
11218            self.master_data
11219                .employee_change_history
11220                .extend(change_history);
11221        }
11222
11223        // v3.3.0: one OrganizationalProfile per company. Cheap to
11224        // generate (derived from industry + company_code) so we
11225        // always emit when master data runs; no separate config flag.
11226        {
11227            use datasynth_core::models::IndustrySector;
11228            use datasynth_generators::organizational_profile_generator::OrganizationalProfileGenerator;
11229            let industry = match self.config.global.industry {
11230                IndustrySector::Manufacturing => "manufacturing",
11231                IndustrySector::Retail => "retail",
11232                IndustrySector::FinancialServices => "financial_services",
11233                IndustrySector::Technology => "technology",
11234                IndustrySector::Healthcare => "healthcare",
11235                _ => "other",
11236            };
11237            for (i, company) in self.config.companies.iter().enumerate() {
11238                let company_seed = self.seed.wrapping_add(i as u64 * 1000) + 500;
11239                let mut profile_gen = OrganizationalProfileGenerator::new(company_seed);
11240                let profile = profile_gen.generate(&company.code, industry);
11241                self.master_data.organizational_profiles.push(profile);
11242            }
11243        }
11244
11245        if let Some(pb) = &pb {
11246            pb.inc(total);
11247        }
11248        if let Some(pb) = pb {
11249            pb.finish_with_message("Master data generation complete");
11250        }
11251
11252        Ok(())
11253    }
11254
11255    /// Generate document flows (P2P and O2C).
11256    fn generate_document_flows(&mut self, flows: &mut DocumentFlowSnapshot) -> SynthResult<()> {
11257        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11258            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11259
11260        // Generate P2P chains
11261        // Cap at ~2 POs per vendor per month to keep spend concentration realistic
11262        let months = (self.config.global.period_months as usize).max(1);
11263        let p2p_count = self
11264            .phase_config
11265            .p2p_chains
11266            .min(self.master_data.vendors.len() * 2 * months);
11267        let pb = self.create_progress_bar(p2p_count as u64, "Generating P2P Document Flows");
11268
11269        // Convert P2P config from schema to generator config
11270        let p2p_config = convert_p2p_config(&self.config.document_flows.p2p);
11271        let mut p2p_gen = P2PGenerator::with_config(self.seed + 1000, p2p_config);
11272        p2p_gen.set_country_pack(self.primary_pack().clone());
11273        // v3.4.1: wire temporal context so PO/GR/invoice/payment dates snap
11274        // to business days. No-op when `temporal_patterns.business_days.
11275        // enabled = false`.
11276        if let Some(ctx) = &self.temporal_context {
11277            p2p_gen.set_temporal_context(Arc::clone(ctx));
11278        }
11279
11280        for i in 0..p2p_count {
11281            let vendor = &self.master_data.vendors[i % self.master_data.vendors.len()];
11282            let materials: Vec<&Material> = self
11283                .master_data
11284                .materials
11285                .iter()
11286                .skip(i % self.master_data.materials.len().max(1))
11287                .take(2.min(self.master_data.materials.len()))
11288                .collect();
11289
11290            if materials.is_empty() {
11291                continue;
11292            }
11293
11294            let company = &self.config.companies[i % self.config.companies.len()];
11295            let po_date = start_date + chrono::Duration::days((i * 3) as i64 % 365);
11296            let fiscal_period = po_date.month() as u8;
11297            let created_by = if self.master_data.employees.is_empty() {
11298                "SYSTEM"
11299            } else {
11300                self.master_data.employees[i % self.master_data.employees.len()]
11301                    .user_id
11302                    .as_str()
11303            };
11304
11305            let chain = p2p_gen.generate_chain(
11306                &company.code,
11307                vendor,
11308                &materials,
11309                po_date,
11310                start_date.year() as u16,
11311                fiscal_period,
11312                created_by,
11313            );
11314
11315            // Flatten documents
11316            flows.purchase_orders.push(chain.purchase_order.clone());
11317            flows.goods_receipts.extend(chain.goods_receipts.clone());
11318            if let Some(vi) = &chain.vendor_invoice {
11319                flows.vendor_invoices.push(vi.clone());
11320            }
11321            if let Some(payment) = &chain.payment {
11322                flows.payments.push(payment.clone());
11323            }
11324            for remainder in &chain.remainder_payments {
11325                flows.payments.push(remainder.clone());
11326            }
11327            flows.p2p_chains.push(chain);
11328
11329            if let Some(pb) = &pb {
11330                pb.inc(1);
11331            }
11332        }
11333
11334        if let Some(pb) = pb {
11335            pb.finish_with_message("P2P document flows complete");
11336        }
11337
11338        // Generate O2C chains
11339        // Cap at ~2 SOs per customer per month to keep order volume realistic
11340        let o2c_count = self
11341            .phase_config
11342            .o2c_chains
11343            .min(self.master_data.customers.len() * 2 * months);
11344        let pb = self.create_progress_bar(o2c_count as u64, "Generating O2C Document Flows");
11345
11346        // Convert O2C config from schema to generator config
11347        let o2c_config = convert_o2c_config(&self.config.document_flows.o2c);
11348        let mut o2c_gen = O2CGenerator::with_config(self.seed + 2000, o2c_config);
11349        o2c_gen.set_country_pack(self.primary_pack().clone());
11350        // v3.4.1: wire temporal context (no-op when business_days disabled).
11351        if let Some(ctx) = &self.temporal_context {
11352            o2c_gen.set_temporal_context(Arc::clone(ctx));
11353        }
11354
11355        for i in 0..o2c_count {
11356            let customer = &self.master_data.customers[i % self.master_data.customers.len()];
11357            let materials: Vec<&Material> = self
11358                .master_data
11359                .materials
11360                .iter()
11361                .skip(i % self.master_data.materials.len().max(1))
11362                .take(2.min(self.master_data.materials.len()))
11363                .collect();
11364
11365            if materials.is_empty() {
11366                continue;
11367            }
11368
11369            let company = &self.config.companies[i % self.config.companies.len()];
11370            let so_date = start_date + chrono::Duration::days((i * 2) as i64 % 365);
11371            let fiscal_period = so_date.month() as u8;
11372            let created_by = if self.master_data.employees.is_empty() {
11373                "SYSTEM"
11374            } else {
11375                self.master_data.employees[i % self.master_data.employees.len()]
11376                    .user_id
11377                    .as_str()
11378            };
11379
11380            let chain = o2c_gen.generate_chain(
11381                &company.code,
11382                customer,
11383                &materials,
11384                so_date,
11385                start_date.year() as u16,
11386                fiscal_period,
11387                created_by,
11388            );
11389
11390            // Flatten documents
11391            flows.sales_orders.push(chain.sales_order.clone());
11392            flows.deliveries.extend(chain.deliveries.clone());
11393            if let Some(ci) = &chain.customer_invoice {
11394                flows.customer_invoices.push(ci.clone());
11395            }
11396            if let Some(receipt) = &chain.customer_receipt {
11397                flows.payments.push(receipt.clone());
11398            }
11399            // Extract remainder receipts (follow-up to partial payments)
11400            for receipt in &chain.remainder_receipts {
11401                flows.payments.push(receipt.clone());
11402            }
11403            flows.o2c_chains.push(chain);
11404
11405            if let Some(pb) = &pb {
11406                pb.inc(1);
11407            }
11408        }
11409
11410        if let Some(pb) = pb {
11411            pb.finish_with_message("O2C document flows complete");
11412        }
11413
11414        // Collect all document cross-references from document headers.
11415        // Each document embeds references to its predecessor(s) via add_reference(); here we
11416        // denormalise them into a flat list for the document_references.json output file.
11417        {
11418            let mut refs = Vec::new();
11419            for doc in &flows.purchase_orders {
11420                refs.extend(doc.header.document_references.iter().cloned());
11421            }
11422            for doc in &flows.goods_receipts {
11423                refs.extend(doc.header.document_references.iter().cloned());
11424            }
11425            for doc in &flows.vendor_invoices {
11426                refs.extend(doc.header.document_references.iter().cloned());
11427            }
11428            for doc in &flows.sales_orders {
11429                refs.extend(doc.header.document_references.iter().cloned());
11430            }
11431            for doc in &flows.deliveries {
11432                refs.extend(doc.header.document_references.iter().cloned());
11433            }
11434            for doc in &flows.customer_invoices {
11435                refs.extend(doc.header.document_references.iter().cloned());
11436            }
11437            for doc in &flows.payments {
11438                refs.extend(doc.header.document_references.iter().cloned());
11439            }
11440            debug!(
11441                "Collected {} document cross-references from document headers",
11442                refs.len()
11443            );
11444            flows.document_references = refs;
11445        }
11446
11447        Ok(())
11448    }
11449
11450    /// Generate journal entries using parallel generation across multiple cores.
11451    fn generate_journal_entries(
11452        &mut self,
11453        coa: &Arc<ChartOfAccounts>,
11454    ) -> SynthResult<Vec<JournalEntry>> {
11455        use datasynth_core::traits::ParallelGenerator;
11456
11457        let total = self.calculate_total_transactions();
11458        let pb = self.create_progress_bar(total, "Generating Journal Entries");
11459
11460        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11461            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
11462        let end_date = start_date + chrono::Months::new(self.config.global.period_months);
11463
11464        let company_codes: Vec<String> = self
11465            .config
11466            .companies
11467            .iter()
11468            .map(|c| c.code.clone())
11469            .collect();
11470
11471        let mut generator = JournalEntryGenerator::new_with_params(
11472            self.config.transactions.clone(),
11473            Arc::clone(coa),
11474            company_codes,
11475            start_date,
11476            end_date,
11477            self.seed,
11478        );
11479        // Wire the `business_processes.*_weight` config through (phantom knob
11480        // until now — the JE generator hard-coded 0.35/0.30/0.20/0.10/0.05).
11481        let bp = &self.config.business_processes;
11482        generator.set_business_process_weights(
11483            bp.o2c_weight,
11484            bp.p2p_weight,
11485            bp.r2r_weight,
11486            bp.h2r_weight,
11487            bp.a2r_weight,
11488        );
11489        // v3.4.0: wire advanced distributions (mixture models + industry
11490        // profiles). No-op when `distributions.enabled = false` or
11491        // `distributions.amounts.enabled = false`, preserving v3.3.2
11492        // byte-identical output on default configs.
11493        generator
11494            .set_advanced_distributions(&self.config.distributions, self.seed + 400)
11495            .map_err(|e| SynthError::config(format!("invalid distributions config: {e}")))?;
11496
11497        // SP3: load and wire industry priors when the config opts in via
11498        //   distributions.industry_profile.priors.enabled = true
11499        // When disabled (or when using the legacy bare-name form), this block
11500        // is a no-op and generation behavior is identical to v5.11.
11501        if let Some(profile) = &self.config.distributions.industry_profile {
11502            if let Some(priors_cfg) = profile.priors() {
11503                if priors_cfg.enabled {
11504                    use datasynth_config::schema::PriorsSource;
11505                    use datasynth_generators::priors_loader::LoadedPriors;
11506
11507                    let mut priors_rng =
11508                        rand_chacha::ChaCha8Rng::seed_from_u64(self.seed.wrapping_add(500));
11509                    let period_days = i64::from(self.config.global.period_months) * 30;
11510                    let industry_slug = profile.profile_type().slug();
11511
11512                    let loaded = match priors_cfg.source {
11513                        PriorsSource::Bundled => {
11514                            LoadedPriors::load_bundled(industry_slug, &mut priors_rng, period_days)
11515                                .map_err(|e| {
11516                                    SynthError::config(format!(
11517                                "SP3: failed to load bundled priors for '{industry_slug}': {e}"
11518                            ))
11519                                })?
11520                        }
11521                        PriorsSource::File => {
11522                            let path = priors_cfg.path.as_ref().ok_or_else(|| {
11523                                SynthError::config(
11524                                    "SP3: industry_profile.priors.path required when source = file"
11525                                        .to_string(),
11526                                )
11527                            })?;
11528                            LoadedPriors::load_from_path(
11529                                path,
11530                                &mut priors_rng,
11531                                period_days,
11532                                Some(industry_slug),
11533                            )
11534                            .map_err(|e| {
11535                                SynthError::config(format!(
11536                                    "SP3: failed to load priors from '{}': {e}",
11537                                    path.display()
11538                                ))
11539                            })?
11540                        }
11541                    };
11542
11543                    // SP3.12 — cache priors in Arc so document-flow generator
11544                    // can also apply lines-per-JE padding without re-loading.
11545                    let loaded = std::sync::Arc::new(loaded);
11546                    self.cached_priors = Some(loaded.clone());
11547                    generator.loaded_priors = Some((*loaded).clone());
11548
11549                    // SP3.4 — instantiate VelocityCalibrator when the config
11550                    // opts in.  Default target rates (R7/R9) are a sensible
11551                    // baseline; they can be derived from the loaded priors in
11552                    // a future hardening pass.
11553                    if priors_cfg.velocity_calibration {
11554                        use datasynth_generators::velocity_calibrator::VelocityCalibrator;
11555                        let mut targets = std::collections::HashMap::new();
11556                        targets.insert("R7".to_string(), 0.10);
11557                        targets.insert("R9".to_string(), 0.10);
11558                        let calibrator = VelocityCalibrator::new(targets, 10_000);
11559                        generator.velocity_calibrator = Some(calibrator);
11560                    }
11561                }
11562            }
11563        }
11564
11565        let generator = generator;
11566
11567        // Connect generated master data to ensure JEs reference real entities
11568        // Enable persona-based error injection for realistic human behavior
11569        // Pass fraud configuration for fraud injection
11570        let je_pack = self.primary_pack();
11571
11572        // Master-data CC / PC pools so JE.cost_center and
11573        // JE.profit_center join back to `cost_centers.id` and
11574        // `profit_centers.id` (closes the v5.9.0 linkage gap that
11575        // had `JE.cost_center = "CC1000"` while master used
11576        // `CC-1000-FIN` etc.).  Empty when no master is present —
11577        // the generator falls back to its hardcoded constants.
11578        let cc_pool: Vec<String> = self
11579            .master_data
11580            .cost_centers
11581            .iter()
11582            .map(|c| c.id.clone())
11583            .collect();
11584        let pc_pool: Vec<String> = self
11585            .master_data
11586            .profit_centers
11587            .iter()
11588            .map(|p| p.id.clone())
11589            .collect();
11590
11591        // Build a UserPool from the generated employee master so
11592        // JE.created_by lines join back to `employees.user_id`.  v5.9.0:
11593        // closes the third linkage gap (the previous behaviour had
11594        // JeGenerator generate its own UserPool internally with
11595        // ids disjoint from the employee master).
11596        let user_pool_from_employees =
11597            datasynth_core::models::UserPool::from_employees(&self.master_data.employees);
11598
11599        let mut generator = generator
11600            .with_master_data(
11601                &self.master_data.vendors,
11602                &self.master_data.customers,
11603                &self.master_data.materials,
11604            )
11605            .with_cost_center_pool(cc_pool)
11606            .with_profit_center_pool(pc_pool)
11607            .with_country_pack_names(je_pack)
11608            .with_user_pool(user_pool_from_employees)
11609            .with_country_pack_temporal(
11610                self.config.temporal_patterns.clone(),
11611                self.seed + 200,
11612                je_pack,
11613            )
11614            .with_persona_errors(true)
11615            .with_fraud_config(self.config.fraud.clone());
11616
11617        // Apply temporal drift if configured. v3.5.2+: also merge
11618        // `distributions.regime_changes` (regime events, economic
11619        // cycles, parameter drifts) into the same DriftConfig so both
11620        // knobs flow through the shared DriftController.
11621        let temporal_enabled = self.config.temporal.enabled;
11622        let regimes_enabled = self.config.distributions.regime_changes.enabled;
11623        if temporal_enabled || regimes_enabled {
11624            let mut drift_config = if temporal_enabled {
11625                self.config.temporal.to_core_config()
11626            } else {
11627                // regime-changes only: start from default (drift OFF),
11628                // apply_to flips `enabled = true`.
11629                datasynth_core::distributions::DriftConfig::default()
11630            };
11631            if regimes_enabled {
11632                self.config
11633                    .distributions
11634                    .regime_changes
11635                    .apply_to(&mut drift_config, start_date);
11636            }
11637            generator = generator.with_drift_config(drift_config, self.seed + 100);
11638        }
11639
11640        // Check memory limit at start
11641        self.check_memory_limit()?;
11642
11643        // Determine parallelism: use available cores, but cap at total entries
11644        let num_threads = num_cpus::get().max(1).min(total as usize).max(1);
11645
11646        // Use parallel generation for datasets with 10K+ entries.
11647        // Below this threshold, the statistical properties of a single-seeded
11648        // generator (e.g. Benford compliance) are better preserved.
11649        let entries = if total >= 10_000 && num_threads > 1 {
11650            // Parallel path: split the generator across cores and generate in parallel.
11651            // Each sub-generator gets a unique seed for deterministic, independent generation.
11652            let sub_generators = generator.split(num_threads);
11653            let entries_per_thread = total as usize / num_threads;
11654            let remainder = total as usize % num_threads;
11655
11656            let batches: Vec<Vec<JournalEntry>> = sub_generators
11657                .into_par_iter()
11658                .enumerate()
11659                .map(|(i, mut gen)| {
11660                    let count = entries_per_thread + if i < remainder { 1 } else { 0 };
11661                    gen.generate_batch(count)
11662                })
11663                .collect();
11664
11665            // Merge all batches into a single Vec
11666            let entries = JournalEntryGenerator::merge_results(batches);
11667
11668            if let Some(pb) = &pb {
11669                pb.inc(total);
11670            }
11671            entries
11672        } else {
11673            // Sequential path for small datasets (< 1000 entries)
11674            let mut entries = Vec::with_capacity(total as usize);
11675            for _ in 0..total {
11676                let entry = generator.generate();
11677                entries.push(entry);
11678                if let Some(pb) = &pb {
11679                    pb.inc(1);
11680                }
11681            }
11682            entries
11683        };
11684
11685        if let Some(pb) = pb {
11686            pb.finish_with_message("Journal entries complete");
11687        }
11688
11689        Ok(entries)
11690    }
11691
11692    /// Generate journal entries from document flows.
11693    ///
11694    /// This creates proper GL entries for each document in the P2P and O2C flows,
11695    /// ensuring that document activity is reflected in the general ledger.
11696    fn generate_jes_from_document_flows(
11697        &mut self,
11698        flows: &DocumentFlowSnapshot,
11699    ) -> SynthResult<Vec<JournalEntry>> {
11700        let total_chains = flows.p2p_chains.len() + flows.o2c_chains.len();
11701        let pb = self.create_progress_bar(total_chains as u64, "Generating Document Flow JEs");
11702
11703        let je_config = match self.resolve_coa_framework() {
11704            CoAFramework::FrenchPcg => DocumentFlowJeConfig::french_gaap(),
11705            CoAFramework::GermanSkr04 => {
11706                let fa = datasynth_core::FrameworkAccounts::german_gaap();
11707                DocumentFlowJeConfig::from(&fa)
11708            }
11709            CoAFramework::UsGaap => DocumentFlowJeConfig::default(),
11710        };
11711
11712        let populate_fec = je_config.populate_fec_fields;
11713        let mut generator = DocumentFlowJeGenerator::with_config_and_seed(je_config, self.seed);
11714
11715        // SP3.12 — propagate cached priors so document-flow JEs receive
11716        // the same lines-per-JE padding as standalone JEs.
11717        if let Some(ref priors) = self.cached_priors {
11718            generator.set_loaded_priors(priors.clone());
11719        }
11720
11721        // Master-data CC / PC pools so document-flow-derived JEs
11722        // (P2P / O2C postings) reference IDs that join back to the
11723        // cost-centers / profit-centers masters.  Same plumbing as
11724        // for `JeGenerator` above; falls back to hardcoded const
11725        // pools when masters are absent.
11726        let cc_pool: Vec<String> = self
11727            .master_data
11728            .cost_centers
11729            .iter()
11730            .map(|c| c.id.clone())
11731            .collect();
11732        let pc_pool: Vec<String> = self
11733            .master_data
11734            .profit_centers
11735            .iter()
11736            .map(|p| p.id.clone())
11737            .collect();
11738        if !cc_pool.is_empty() {
11739            generator.set_cost_center_pool(cc_pool);
11740        }
11741        if !pc_pool.is_empty() {
11742            generator.set_profit_center_pool(pc_pool);
11743        }
11744
11745        // Build auxiliary account lookup from vendor/customer master data so that
11746        // FEC auxiliary_account_number uses framework-specific GL accounts (e.g.,
11747        // PCG "4010001") instead of raw partner IDs.
11748        if populate_fec {
11749            let mut aux_lookup = std::collections::HashMap::new();
11750            for vendor in &self.master_data.vendors {
11751                if let Some(ref aux) = vendor.auxiliary_gl_account {
11752                    aux_lookup.insert(vendor.vendor_id.clone(), aux.clone());
11753                }
11754            }
11755            for customer in &self.master_data.customers {
11756                if let Some(ref aux) = customer.auxiliary_gl_account {
11757                    aux_lookup.insert(customer.customer_id.clone(), aux.clone());
11758                }
11759            }
11760            if !aux_lookup.is_empty() {
11761                generator.set_auxiliary_account_lookup(aux_lookup);
11762            }
11763        }
11764
11765        let mut entries = Vec::new();
11766
11767        // Generate JEs from P2P chains
11768        for chain in &flows.p2p_chains {
11769            let chain_entries = generator.generate_from_p2p_chain(chain);
11770            entries.extend(chain_entries);
11771            if let Some(pb) = &pb {
11772                pb.inc(1);
11773            }
11774        }
11775
11776        // Generate JEs from O2C chains
11777        for chain in &flows.o2c_chains {
11778            let chain_entries = generator.generate_from_o2c_chain(chain);
11779            entries.extend(chain_entries);
11780            if let Some(pb) = &pb {
11781                pb.inc(1);
11782            }
11783        }
11784
11785        if let Some(pb) = pb {
11786            pb.finish_with_message(format!(
11787                "Generated {} JEs from document flows",
11788                entries.len()
11789            ));
11790        }
11791
11792        Ok(entries)
11793    }
11794
11795    /// Generate journal entries from payroll runs.
11796    ///
11797    /// Creates one JE per payroll run:
11798    /// - DR Salaries & Wages (6100) for gross pay
11799    /// - CR Payroll Clearing (9100) for gross pay
11800    fn generate_payroll_jes(payroll_runs: &[PayrollRun]) -> Vec<JournalEntry> {
11801        use datasynth_core::accounts::{expense_accounts, suspense_accounts};
11802
11803        let mut jes = Vec::with_capacity(payroll_runs.len());
11804
11805        for run in payroll_runs {
11806            let mut je = JournalEntry::new_simple(
11807                format!("JE-PAYROLL-{}", run.payroll_id),
11808                run.company_code.clone(),
11809                run.run_date,
11810                format!("Payroll {}", run.payroll_id),
11811            );
11812
11813            // Debit Salaries & Wages for gross pay
11814            je.add_line(JournalEntryLine {
11815                line_number: 1,
11816                gl_account: expense_accounts::SALARIES_WAGES.to_string(),
11817                debit_amount: run.total_gross,
11818                reference: Some(run.payroll_id.clone()),
11819                text: Some(format!(
11820                    "Payroll {} ({} employees)",
11821                    run.payroll_id, run.employee_count
11822                )),
11823                ..Default::default()
11824            });
11825
11826            // Credit Payroll Clearing for gross pay
11827            je.add_line(JournalEntryLine {
11828                line_number: 2,
11829                gl_account: suspense_accounts::PAYROLL_CLEARING.to_string(),
11830                credit_amount: run.total_gross,
11831                reference: Some(run.payroll_id.clone()),
11832                ..Default::default()
11833            });
11834
11835            jes.push(je);
11836        }
11837
11838        jes
11839    }
11840
11841    /// Link document flows to subledger records.
11842    ///
11843    /// Creates AP invoices from vendor invoices and AR invoices from customer invoices,
11844    /// ensuring subledger data is coherent with document flow data.
11845    fn link_document_flows_to_subledgers(
11846        &mut self,
11847        flows: &DocumentFlowSnapshot,
11848    ) -> SynthResult<SubledgerSnapshot> {
11849        let total = flows.vendor_invoices.len() + flows.customer_invoices.len();
11850        let pb = self.create_progress_bar(total as u64, "Linking Subledgers");
11851
11852        // Build vendor/customer name maps from master data for realistic subledger names
11853        let vendor_names: std::collections::HashMap<String, String> = self
11854            .master_data
11855            .vendors
11856            .iter()
11857            .map(|v| (v.vendor_id.clone(), v.name.clone()))
11858            .collect();
11859        let customer_names: std::collections::HashMap<String, String> = self
11860            .master_data
11861            .customers
11862            .iter()
11863            .map(|c| (c.customer_id.clone(), c.name.clone()))
11864            .collect();
11865
11866        let mut linker = DocumentFlowLinker::new()
11867            .with_vendor_names(vendor_names)
11868            .with_customer_names(customer_names);
11869
11870        // Convert vendor invoices to AP invoices
11871        let ap_invoices = linker.batch_create_ap_invoices(&flows.vendor_invoices);
11872        if let Some(pb) = &pb {
11873            pb.inc(flows.vendor_invoices.len() as u64);
11874        }
11875
11876        // Convert customer invoices to AR invoices
11877        let ar_invoices = linker.batch_create_ar_invoices(&flows.customer_invoices);
11878        if let Some(pb) = &pb {
11879            pb.inc(flows.customer_invoices.len() as u64);
11880        }
11881
11882        if let Some(pb) = pb {
11883            pb.finish_with_message(format!(
11884                "Linked {} AP and {} AR invoices",
11885                ap_invoices.len(),
11886                ar_invoices.len()
11887            ));
11888        }
11889
11890        Ok(SubledgerSnapshot {
11891            ap_invoices,
11892            ar_invoices,
11893            fa_records: Vec::new(),
11894            inventory_positions: Vec::new(),
11895            inventory_movements: Vec::new(),
11896            // Aging reports are computed after payment settlement in phase_document_flows.
11897            ar_aging_reports: Vec::new(),
11898            ap_aging_reports: Vec::new(),
11899            // Depreciation runs and inventory valuations are populated after FA/inventory generation.
11900            depreciation_runs: Vec::new(),
11901            inventory_valuations: Vec::new(),
11902            // Dunning runs and letters are populated in phase_document_flows after AR aging.
11903            dunning_runs: Vec::new(),
11904            dunning_letters: Vec::new(),
11905        })
11906    }
11907
11908    /// Generate OCPM events from document flows.
11909    ///
11910    /// Creates OCEL 2.0 compliant event logs from P2P and O2C document flows,
11911    /// capturing the object-centric process perspective.
11912    #[allow(clippy::too_many_arguments)]
11913    fn generate_ocpm_events(
11914        &mut self,
11915        flows: &DocumentFlowSnapshot,
11916        sourcing: &SourcingSnapshot,
11917        hr: &HrSnapshot,
11918        manufacturing: &ManufacturingSnapshot,
11919        banking: &BankingSnapshot,
11920        audit: &AuditSnapshot,
11921        financial_reporting: &FinancialReportingSnapshot,
11922    ) -> SynthResult<OcpmSnapshot> {
11923        let total_chains = flows.p2p_chains.len()
11924            + flows.o2c_chains.len()
11925            + sourcing.sourcing_projects.len()
11926            + hr.payroll_runs.len()
11927            + manufacturing.production_orders.len()
11928            + banking.customers.len()
11929            + audit.engagements.len()
11930            + financial_reporting.bank_reconciliations.len();
11931        let pb = self.create_progress_bar(total_chains as u64, "Generating OCPM Events");
11932
11933        // Create OCPM event log with standard types
11934        let metadata = EventLogMetadata::new("SyntheticData OCPM Log");
11935        let mut event_log = OcpmEventLog::with_metadata(metadata).with_standard_types();
11936
11937        // Configure the OCPM generator
11938        let ocpm_config = OcpmGeneratorConfig {
11939            generate_p2p: true,
11940            generate_o2c: true,
11941            generate_s2c: !sourcing.sourcing_projects.is_empty(),
11942            generate_h2r: !hr.payroll_runs.is_empty(),
11943            generate_mfg: !manufacturing.production_orders.is_empty(),
11944            generate_bank_recon: !financial_reporting.bank_reconciliations.is_empty(),
11945            generate_bank: !banking.customers.is_empty(),
11946            generate_audit: !audit.engagements.is_empty(),
11947            happy_path_rate: 0.75,
11948            exception_path_rate: 0.20,
11949            error_path_rate: 0.05,
11950            add_duration_variability: true,
11951            duration_std_dev_factor: 0.3,
11952        };
11953        let mut ocpm_gen = OcpmEventGenerator::with_config(self.seed + 3000, ocpm_config);
11954        let ocpm_uuid_factory = OcpmUuidFactory::new(self.seed + 3001);
11955
11956        // Get available users for resource assignment
11957        let available_users: Vec<String> = self
11958            .master_data
11959            .employees
11960            .iter()
11961            .take(20)
11962            .map(|e| e.user_id.clone())
11963            .collect();
11964
11965        // Deterministic base date from config (avoids Utc::now() non-determinism)
11966        let fallback_date =
11967            NaiveDate::from_ymd_opt(2024, 1, 1).expect("static date 2024-01-01 is always valid");
11968        let base_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
11969            .unwrap_or(fallback_date);
11970        let base_midnight = base_date
11971            .and_hms_opt(0, 0, 0)
11972            .expect("midnight is always valid");
11973        let base_datetime =
11974            chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(base_midnight, chrono::Utc);
11975
11976        // Helper closure to add case results to event log
11977        let add_result = |event_log: &mut OcpmEventLog,
11978                          result: datasynth_ocpm::CaseGenerationResult| {
11979            for event in result.events {
11980                event_log.add_event(event);
11981            }
11982            for object in result.objects {
11983                event_log.add_object(object);
11984            }
11985            for relationship in result.relationships {
11986                event_log.add_relationship(relationship);
11987            }
11988            for corr in result.correlation_events {
11989                event_log.add_correlation_event(corr);
11990            }
11991            event_log.add_case(result.case_trace);
11992        };
11993
11994        // Generate events from P2P chains
11995        for chain in &flows.p2p_chains {
11996            let po = &chain.purchase_order;
11997            let documents = P2pDocuments::new(
11998                &po.header.document_id,
11999                &po.vendor_id,
12000                &po.header.company_code,
12001                po.total_net_amount,
12002                &po.header.currency,
12003                &ocpm_uuid_factory,
12004            )
12005            .with_goods_receipt(
12006                chain
12007                    .goods_receipts
12008                    .first()
12009                    .map(|gr| gr.header.document_id.as_str())
12010                    .unwrap_or(""),
12011                &ocpm_uuid_factory,
12012            )
12013            .with_invoice(
12014                chain
12015                    .vendor_invoice
12016                    .as_ref()
12017                    .map(|vi| vi.header.document_id.as_str())
12018                    .unwrap_or(""),
12019                &ocpm_uuid_factory,
12020            )
12021            .with_payment(
12022                chain
12023                    .payment
12024                    .as_ref()
12025                    .map(|p| p.header.document_id.as_str())
12026                    .unwrap_or(""),
12027                &ocpm_uuid_factory,
12028            );
12029
12030            let start_time =
12031                chrono::DateTime::from_naive_utc_and_offset(po.header.entry_timestamp, chrono::Utc);
12032            let result = ocpm_gen.generate_p2p_case(&documents, start_time, &available_users);
12033            add_result(&mut event_log, result);
12034
12035            if let Some(pb) = &pb {
12036                pb.inc(1);
12037            }
12038        }
12039
12040        // Generate events from O2C chains
12041        for chain in &flows.o2c_chains {
12042            let so = &chain.sales_order;
12043            let documents = O2cDocuments::new(
12044                &so.header.document_id,
12045                &so.customer_id,
12046                &so.header.company_code,
12047                so.total_net_amount,
12048                &so.header.currency,
12049                &ocpm_uuid_factory,
12050            )
12051            .with_delivery(
12052                chain
12053                    .deliveries
12054                    .first()
12055                    .map(|d| d.header.document_id.as_str())
12056                    .unwrap_or(""),
12057                &ocpm_uuid_factory,
12058            )
12059            .with_invoice(
12060                chain
12061                    .customer_invoice
12062                    .as_ref()
12063                    .map(|ci| ci.header.document_id.as_str())
12064                    .unwrap_or(""),
12065                &ocpm_uuid_factory,
12066            )
12067            .with_receipt(
12068                chain
12069                    .customer_receipt
12070                    .as_ref()
12071                    .map(|r| r.header.document_id.as_str())
12072                    .unwrap_or(""),
12073                &ocpm_uuid_factory,
12074            );
12075
12076            let start_time =
12077                chrono::DateTime::from_naive_utc_and_offset(so.header.entry_timestamp, chrono::Utc);
12078            let result = ocpm_gen.generate_o2c_case(&documents, start_time, &available_users);
12079            add_result(&mut event_log, result);
12080
12081            if let Some(pb) = &pb {
12082                pb.inc(1);
12083            }
12084        }
12085
12086        // Generate events from S2C sourcing projects
12087        for project in &sourcing.sourcing_projects {
12088            // Find vendor from contracts or qualifications
12089            let vendor_id = sourcing
12090                .contracts
12091                .iter()
12092                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12093                .map(|c| c.vendor_id.clone())
12094                .or_else(|| sourcing.qualifications.first().map(|q| q.vendor_id.clone()))
12095                .or_else(|| {
12096                    self.master_data
12097                        .vendors
12098                        .first()
12099                        .map(|v| v.vendor_id.clone())
12100                })
12101                .unwrap_or_else(|| "V000".to_string());
12102            let mut docs = S2cDocuments::new(
12103                &project.project_id,
12104                &vendor_id,
12105                &project.company_code,
12106                project.estimated_annual_spend,
12107                &ocpm_uuid_factory,
12108            );
12109            // Link RFx if available
12110            if let Some(rfx) = sourcing
12111                .rfx_events
12112                .iter()
12113                .find(|r| r.sourcing_project_id == project.project_id)
12114            {
12115                docs = docs.with_rfx(&rfx.rfx_id, &ocpm_uuid_factory);
12116                // Link winning bid (status == Accepted)
12117                if let Some(bid) = sourcing.bids.iter().find(|b| {
12118                    b.rfx_id == rfx.rfx_id
12119                        && b.status == datasynth_core::models::sourcing::BidStatus::Accepted
12120                }) {
12121                    docs = docs.with_winning_bid(&bid.bid_id, &ocpm_uuid_factory);
12122                }
12123            }
12124            // Link contract
12125            if let Some(contract) = sourcing
12126                .contracts
12127                .iter()
12128                .find(|c| c.sourcing_project_id.as_deref() == Some(&project.project_id))
12129            {
12130                docs = docs.with_contract(&contract.contract_id, &ocpm_uuid_factory);
12131            }
12132            let start_time = base_datetime - chrono::Duration::days(90);
12133            let result = ocpm_gen.generate_s2c_case(&docs, start_time, &available_users);
12134            add_result(&mut event_log, result);
12135
12136            if let Some(pb) = &pb {
12137                pb.inc(1);
12138            }
12139        }
12140
12141        // Generate events from H2R payroll runs
12142        for run in &hr.payroll_runs {
12143            // Use first matching payroll line item's employee, or fallback
12144            let employee_id = hr
12145                .payroll_line_items
12146                .iter()
12147                .find(|li| li.payroll_id == run.payroll_id)
12148                .map(|li| li.employee_id.as_str())
12149                .unwrap_or("EMP000");
12150            let docs = H2rDocuments::new(
12151                &run.payroll_id,
12152                employee_id,
12153                &run.company_code,
12154                run.total_gross,
12155                &ocpm_uuid_factory,
12156            )
12157            .with_time_entries(
12158                hr.time_entries
12159                    .iter()
12160                    .filter(|t| t.date >= run.pay_period_start && t.date <= run.pay_period_end)
12161                    .take(5)
12162                    .map(|t| t.entry_id.as_str())
12163                    .collect(),
12164            );
12165            let start_time = base_datetime - chrono::Duration::days(30);
12166            let result = ocpm_gen.generate_h2r_case(&docs, start_time, &available_users);
12167            add_result(&mut event_log, result);
12168
12169            if let Some(pb) = &pb {
12170                pb.inc(1);
12171            }
12172        }
12173
12174        // Generate events from MFG production orders
12175        for order in &manufacturing.production_orders {
12176            let mut docs = MfgDocuments::new(
12177                &order.order_id,
12178                &order.material_id,
12179                &order.company_code,
12180                order.planned_quantity,
12181                &ocpm_uuid_factory,
12182            )
12183            .with_operations(
12184                order
12185                    .operations
12186                    .iter()
12187                    .map(|o| format!("OP-{:04}", o.operation_number))
12188                    .collect::<Vec<_>>()
12189                    .iter()
12190                    .map(std::string::String::as_str)
12191                    .collect(),
12192            );
12193            // Link quality inspection if available (via reference_id matching order_id)
12194            if let Some(insp) = manufacturing
12195                .quality_inspections
12196                .iter()
12197                .find(|i| i.reference_id == order.order_id)
12198            {
12199                docs = docs.with_inspection(&insp.inspection_id, &ocpm_uuid_factory);
12200            }
12201            // Link cycle count if available (match by material_id in items)
12202            if let Some(cc) = manufacturing.cycle_counts.iter().find(|cc| {
12203                cc.items
12204                    .iter()
12205                    .any(|item| item.material_id == order.material_id)
12206            }) {
12207                docs = docs.with_cycle_count(&cc.count_id, &ocpm_uuid_factory);
12208            }
12209            let start_time = base_datetime - chrono::Duration::days(60);
12210            let result = ocpm_gen.generate_mfg_case(&docs, start_time, &available_users);
12211            add_result(&mut event_log, result);
12212
12213            if let Some(pb) = &pb {
12214                pb.inc(1);
12215            }
12216        }
12217
12218        // Generate events from Banking customers
12219        for customer in &banking.customers {
12220            let customer_id_str = customer.customer_id.to_string();
12221            let mut docs = BankDocuments::new(&customer_id_str, "1000", &ocpm_uuid_factory);
12222            // Link accounts (primary_owner_id matches customer_id)
12223            if let Some(account) = banking
12224                .accounts
12225                .iter()
12226                .find(|a| a.primary_owner_id == customer.customer_id)
12227            {
12228                let account_id_str = account.account_id.to_string();
12229                docs = docs.with_account(&account_id_str, &ocpm_uuid_factory);
12230                // Link transactions for this account
12231                let txn_strs: Vec<String> = banking
12232                    .transactions
12233                    .iter()
12234                    .filter(|t| t.account_id == account.account_id)
12235                    .take(10)
12236                    .map(|t| t.transaction_id.to_string())
12237                    .collect();
12238                let txn_ids: Vec<&str> = txn_strs.iter().map(std::string::String::as_str).collect();
12239                let txn_amounts: Vec<rust_decimal::Decimal> = banking
12240                    .transactions
12241                    .iter()
12242                    .filter(|t| t.account_id == account.account_id)
12243                    .take(10)
12244                    .map(|t| t.amount)
12245                    .collect();
12246                if !txn_ids.is_empty() {
12247                    docs = docs.with_transactions(txn_ids, txn_amounts);
12248                }
12249            }
12250            let start_time = base_datetime - chrono::Duration::days(180);
12251            let result = ocpm_gen.generate_bank_case(&docs, start_time, &available_users);
12252            add_result(&mut event_log, result);
12253
12254            if let Some(pb) = &pb {
12255                pb.inc(1);
12256            }
12257        }
12258
12259        // Generate events from Audit engagements
12260        for engagement in &audit.engagements {
12261            let engagement_id_str = engagement.engagement_id.to_string();
12262            let docs = AuditDocuments::new(
12263                &engagement_id_str,
12264                &engagement.client_entity_id,
12265                &ocpm_uuid_factory,
12266            )
12267            .with_workpapers(
12268                audit
12269                    .workpapers
12270                    .iter()
12271                    .filter(|w| w.engagement_id == engagement.engagement_id)
12272                    .take(10)
12273                    .map(|w| w.workpaper_id.to_string())
12274                    .collect::<Vec<_>>()
12275                    .iter()
12276                    .map(std::string::String::as_str)
12277                    .collect(),
12278            )
12279            .with_evidence(
12280                audit
12281                    .evidence
12282                    .iter()
12283                    .filter(|e| e.engagement_id == engagement.engagement_id)
12284                    .take(10)
12285                    .map(|e| e.evidence_id.to_string())
12286                    .collect::<Vec<_>>()
12287                    .iter()
12288                    .map(std::string::String::as_str)
12289                    .collect(),
12290            )
12291            .with_risks(
12292                audit
12293                    .risk_assessments
12294                    .iter()
12295                    .filter(|r| r.engagement_id == engagement.engagement_id)
12296                    .take(5)
12297                    .map(|r| r.risk_id.to_string())
12298                    .collect::<Vec<_>>()
12299                    .iter()
12300                    .map(std::string::String::as_str)
12301                    .collect(),
12302            )
12303            .with_findings(
12304                audit
12305                    .findings
12306                    .iter()
12307                    .filter(|f| f.engagement_id == engagement.engagement_id)
12308                    .take(5)
12309                    .map(|f| f.finding_id.to_string())
12310                    .collect::<Vec<_>>()
12311                    .iter()
12312                    .map(std::string::String::as_str)
12313                    .collect(),
12314            )
12315            .with_judgments(
12316                audit
12317                    .judgments
12318                    .iter()
12319                    .filter(|j| j.engagement_id == engagement.engagement_id)
12320                    .take(5)
12321                    .map(|j| j.judgment_id.to_string())
12322                    .collect::<Vec<_>>()
12323                    .iter()
12324                    .map(std::string::String::as_str)
12325                    .collect(),
12326            );
12327            let start_time = base_datetime - chrono::Duration::days(120);
12328            let result = ocpm_gen.generate_audit_case(&docs, start_time, &available_users);
12329            add_result(&mut event_log, result);
12330
12331            if let Some(pb) = &pb {
12332                pb.inc(1);
12333            }
12334        }
12335
12336        // Generate events from Bank Reconciliations
12337        for recon in &financial_reporting.bank_reconciliations {
12338            let docs = BankReconDocuments::new(
12339                &recon.reconciliation_id,
12340                &recon.bank_account_id,
12341                &recon.company_code,
12342                recon.bank_ending_balance,
12343                &ocpm_uuid_factory,
12344            )
12345            .with_statement_lines(
12346                recon
12347                    .statement_lines
12348                    .iter()
12349                    .take(20)
12350                    .map(|l| l.line_id.as_str())
12351                    .collect(),
12352            )
12353            .with_reconciling_items(
12354                recon
12355                    .reconciling_items
12356                    .iter()
12357                    .take(10)
12358                    .map(|i| i.item_id.as_str())
12359                    .collect(),
12360            );
12361            let start_time = base_datetime - chrono::Duration::days(30);
12362            let result = ocpm_gen.generate_bank_recon_case(&docs, start_time, &available_users);
12363            add_result(&mut event_log, result);
12364
12365            if let Some(pb) = &pb {
12366                pb.inc(1);
12367            }
12368        }
12369
12370        // Compute process variants
12371        event_log.compute_variants();
12372
12373        let summary = event_log.summary();
12374
12375        if let Some(pb) = pb {
12376            pb.finish_with_message(format!(
12377                "Generated {} OCPM events, {} objects",
12378                summary.event_count, summary.object_count
12379            ));
12380        }
12381
12382        Ok(OcpmSnapshot {
12383            event_count: summary.event_count,
12384            object_count: summary.object_count,
12385            case_count: summary.case_count,
12386            event_log: Some(event_log),
12387        })
12388    }
12389
12390    /// Inject anomalies into journal entries.
12391    fn inject_anomalies(&mut self, entries: &mut [JournalEntry]) -> SynthResult<AnomalyLabels> {
12392        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Anomalies");
12393
12394        // Read anomaly rates from config instead of using hardcoded values.
12395        // Priority: anomaly_injection config > fraud config > default 0.02
12396        let total_rate = if self.config.anomaly_injection.enabled {
12397            self.config.anomaly_injection.rates.total_rate
12398        } else if self.config.fraud.enabled {
12399            self.config.fraud.fraud_rate
12400        } else {
12401            0.02
12402        };
12403
12404        let fraud_rate = if self.config.anomaly_injection.enabled {
12405            self.config.anomaly_injection.rates.fraud_rate
12406        } else {
12407            AnomalyRateConfig::default().fraud_rate
12408        };
12409
12410        let error_rate = if self.config.anomaly_injection.enabled {
12411            self.config.anomaly_injection.rates.error_rate
12412        } else {
12413            AnomalyRateConfig::default().error_rate
12414        };
12415
12416        let process_issue_rate = if self.config.anomaly_injection.enabled {
12417            self.config.anomaly_injection.rates.process_rate
12418        } else {
12419            AnomalyRateConfig::default().process_issue_rate
12420        };
12421
12422        let anomaly_config = AnomalyInjectorConfig {
12423            rates: AnomalyRateConfig {
12424                total_rate,
12425                fraud_rate,
12426                error_rate,
12427                process_issue_rate,
12428                ..Default::default()
12429            },
12430            // Fraud behavioral-bias signatures now flow from config (the subtlety lever); defaults
12431            // match the engine's historical hardcoded values, so output is unchanged unless overridden.
12432            enhanced: EnhancedInjectionConfig {
12433                fraud_behavioral_bias: self.config.fraud.effective_bias().to_core(),
12434                // Persistent fraud campaigns (A1) — off unless config opts in.
12435                fraud_campaign: self.config.fraud.campaigns.clone(),
12436                ..Default::default()
12437            },
12438            seed: self.seed + 5000,
12439            ..Default::default()
12440        };
12441
12442        let mut injector = AnomalyInjector::new(anomaly_config);
12443        let result = injector.process_entries(entries);
12444
12445        // Central concentration abstraction (#143, Phase 1): run the post-process
12446        // pipeline AFTER per-entry strategies. The pipeline merges the SOTA-12
12447        // tagger + new passes (trading-partner pool, Phase-2 account substitution)
12448        // through a single integration point — see
12449        // docs/superpowers/specs/2026-05-23-concentration-pass-INDEX.md.
12450        //
12451        // Back-compat: the legacy `anomaly_injection.source_conditional_rarity_rate`
12452        // key remains honored. If `concentration.source_conditional_rarity` is also
12453        // set in the same config, the unified DSL field wins.
12454        let (sota12_tagged, consolidation_outlier_expanded): (usize, usize) = {
12455            use datasynth_config::schema::{
12456                ConcentrationConfig, ConsolidationOutlierPassConfig,
12457                SourceConditionalRarityPassConfig,
12458            };
12459            use datasynth_generators::concentration::ConcentrationPipeline;
12460
12461            // Decide effective ConcentrationConfig: start from user config, then
12462            // back-fill from the legacy SOTA-12 key if the unified DSL didn't set it.
12463            let mut effective: ConcentrationConfig = self.config.concentration.clone();
12464            if effective.source_conditional_rarity.is_none() {
12465                if let Some(rate) = self.config.anomaly_injection.source_conditional_rarity_rate {
12466                    effective.enabled = true;
12467                    effective.source_conditional_rarity = Some(SourceConditionalRarityPassConfig {
12468                        rate,
12469                        min_surprise: None,
12470                        min_per_source_lines: None,
12471                    });
12472                }
12473            }
12474            // v5.30 B2 (#154) — back-compat: surface
12475            // `anomaly_injection.rates.consolidation_outlier_rate` as a
12476            // `ConsolidationOutlierPassConfig` if the unified DSL didn't
12477            // set one. Default 0.001 baseline shipped via the schema's
12478            // `default_consolidation_outlier_rate` — only synthesise the
12479            // pass when the rate is > 0, otherwise it's a no-op anyway.
12480            if effective.consolidation_outlier.is_none() {
12481                let rate = self
12482                    .config
12483                    .anomaly_injection
12484                    .rates
12485                    .consolidation_outlier_rate;
12486                if rate > 0.0 {
12487                    effective.enabled = true;
12488                    effective.consolidation_outlier = Some(ConsolidationOutlierPassConfig {
12489                        rate,
12490                        ..Default::default()
12491                    });
12492                }
12493            }
12494
12495            if !effective.enabled {
12496                (0, 0)
12497            } else {
12498                let pipeline = ConcentrationPipeline::from_config(&effective).map_err(|e| {
12499                    SynthError::generation(format!(
12500                        "ConcentrationPipeline construction failed: {e}"
12501                    ))
12502                })?;
12503                if !pipeline.is_active() {
12504                    (0, 0)
12505                } else {
12506                    // Per-pipeline seed disjoint from every other generator stream.
12507                    const CONCENTRATION_SEED_OFFSET: u64 = 0xC0_C3_E1_47_10_43_77_3B;
12508                    let stats =
12509                        pipeline.run(entries, self.seed.wrapping_add(CONCENTRATION_SEED_OFFSET));
12510                    let sota12: usize = stats
12511                        .iter()
12512                        .filter(|s| s.pass == "source_conditional_rarity")
12513                        .map(|s| s.entries_modified)
12514                        .sum();
12515                    let consol: usize = stats
12516                        .iter()
12517                        .filter(|s| s.pass == "consolidation_outlier")
12518                        .map(|s| s.entries_modified)
12519                        .sum();
12520                    (sota12, consol)
12521                }
12522            }
12523        };
12524
12525        if let Some(pb) = &pb {
12526            pb.inc(entries.len() as u64);
12527            pb.finish_with_message("Anomaly injection complete");
12528        }
12529
12530        let mut by_type = HashMap::new();
12531        for label in &result.labels {
12532            *by_type
12533                .entry(format!("{:?}", label.anomaly_type))
12534                .or_insert(0) += 1;
12535        }
12536        if sota12_tagged > 0 {
12537            *by_type
12538                .entry("SourceConditionalRarity".to_string())
12539                .or_insert(0) += sota12_tagged;
12540        }
12541        // v5.30 B2 (#154): record the consolidation-outlier expansion
12542        // count under a stable label key so the orchestrator's run
12543        // report surfaces the heavy-tail emission rate alongside the
12544        // other anomaly buckets.
12545        if consolidation_outlier_expanded > 0 {
12546            *by_type
12547                .entry("ConsolidationOutlier".to_string())
12548                .or_insert(0) += consolidation_outlier_expanded;
12549        }
12550
12551        Ok(AnomalyLabels {
12552            labels: result.labels,
12553            summary: Some(result.summary),
12554            by_type,
12555            carry_forward: result.carry_forward,
12556        })
12557    }
12558
12559    /// Validate journal entries using running balance tracker.
12560    ///
12561    /// Applies all entries to the balance tracker and validates:
12562    /// - Each entry is internally balanced (debits = credits)
12563    /// - Balance sheet equation holds (Assets = Liabilities + Equity + Net Income)
12564    ///
12565    /// Note: Entries with human errors (marked with [HUMAN_ERROR:*] tags) are
12566    /// excluded from balance validation as they may be intentionally unbalanced.
12567    fn validate_journal_entries(
12568        &mut self,
12569        entries: &[JournalEntry],
12570    ) -> SynthResult<BalanceValidationResult> {
12571        // Filter out entries with human errors as they may be intentionally unbalanced
12572        let clean_entries: Vec<&JournalEntry> = entries
12573            .iter()
12574            .filter(|e| {
12575                e.header
12576                    .header_text
12577                    .as_ref()
12578                    .map(|t| !t.contains("[HUMAN_ERROR:"))
12579                    .unwrap_or(true)
12580            })
12581            .collect();
12582
12583        let pb = self.create_progress_bar(clean_entries.len() as u64, "Validating Balances");
12584
12585        // Configure tracker to not fail on errors (collect them instead)
12586        let config = BalanceTrackerConfig {
12587            validate_on_each_entry: false,   // We'll validate at the end
12588            track_history: false,            // Skip history for performance
12589            fail_on_validation_error: false, // Collect errors, don't fail
12590            ..Default::default()
12591        };
12592        let validation_currency = self
12593            .config
12594            .companies
12595            .first()
12596            .map(|c| c.currency.clone())
12597            .unwrap_or_else(|| "USD".to_string());
12598
12599        let mut tracker = RunningBalanceTracker::new_with_currency(config, validation_currency);
12600
12601        // Apply clean entries (without human errors)
12602        let clean_refs: Vec<JournalEntry> = clean_entries.into_iter().cloned().collect();
12603        let errors = tracker.apply_entries(&clean_refs);
12604
12605        if let Some(pb) = &pb {
12606            pb.inc(entries.len() as u64);
12607        }
12608
12609        // Check if any entries were unbalanced
12610        // Note: When fail_on_validation_error is false, errors are stored in tracker
12611        let has_unbalanced = tracker
12612            .get_validation_errors()
12613            .iter()
12614            .any(|e| e.error_type == datasynth_generators::ValidationErrorType::UnbalancedEntry);
12615
12616        // Validate balance sheet for each company
12617        // Include both returned errors and collected validation errors
12618        let mut all_errors = errors;
12619        all_errors.extend(tracker.get_validation_errors().iter().cloned());
12620        let company_codes: Vec<String> = self
12621            .config
12622            .companies
12623            .iter()
12624            .map(|c| c.code.clone())
12625            .collect();
12626
12627        let end_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12628            .map(|d| d + chrono::Months::new(self.config.global.period_months))
12629            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12630
12631        for company_code in &company_codes {
12632            if let Err(e) = tracker.validate_balance_sheet(company_code, end_date, None) {
12633                all_errors.push(e);
12634            }
12635        }
12636
12637        // Get statistics after all mutable operations are done
12638        let stats = tracker.get_statistics();
12639
12640        // Determine if balanced overall
12641        let is_balanced = all_errors.is_empty();
12642
12643        if let Some(pb) = pb {
12644            let msg = if is_balanced {
12645                "Balance validation passed"
12646            } else {
12647                "Balance validation completed with errors"
12648            };
12649            pb.finish_with_message(msg);
12650        }
12651
12652        Ok(BalanceValidationResult {
12653            validated: true,
12654            is_balanced,
12655            entries_processed: stats.entries_processed,
12656            total_debits: stats.total_debits,
12657            total_credits: stats.total_credits,
12658            accounts_tracked: stats.accounts_tracked,
12659            companies_tracked: stats.companies_tracked,
12660            validation_errors: all_errors,
12661            has_unbalanced_entries: has_unbalanced,
12662        })
12663    }
12664
12665    /// Inject data quality variations into journal entries.
12666    ///
12667    /// Applies typos, missing values, and format variations to make
12668    /// the synthetic data more realistic for testing data cleaning pipelines.
12669    fn inject_data_quality(
12670        &mut self,
12671        entries: &mut [JournalEntry],
12672    ) -> SynthResult<(DataQualityStats, Vec<datasynth_generators::QualityIssue>)> {
12673        let pb = self.create_progress_bar(entries.len() as u64, "Injecting Data Quality Issues");
12674
12675        // Build config from user-specified schema settings when data_quality is enabled;
12676        // otherwise fall back to the low-rate minimal() preset.
12677        let config = if self.config.data_quality.enabled {
12678            let dq = &self.config.data_quality;
12679            // Propagate per-field rates and protected fields from the schema
12680            // so users can dial in real-production NULL profiles per field
12681            // (e.g. CostCenter 96.5% NULL, Invoice_Reference 100% NULL).
12682            let field_rates = dq.missing_values.field_rates.clone();
12683            let mut required_fields: std::collections::HashSet<String> =
12684                dq.missing_values.protected_fields.iter().cloned().collect();
12685            // Always preserve audit-critical identifiers regardless of
12686            // user config — losing these breaks downstream joins.
12687            for f in [
12688                "document_id",
12689                "company_code",
12690                "posting_date",
12691                "fiscal_year",
12692                "fiscal_period",
12693                "gl_account",
12694                "line_number",
12695                "transaction_id",
12696            ] {
12697                required_fields.insert(f.to_string());
12698            }
12699            DataQualityConfig {
12700                enable_missing_values: dq.missing_values.enabled,
12701                missing_values: datasynth_generators::MissingValueConfig {
12702                    global_rate: dq.effective_missing_rate(),
12703                    field_rates,
12704                    required_fields,
12705                    ..Default::default()
12706                },
12707                enable_format_variations: dq.format_variations.enabled,
12708                format_variations: datasynth_generators::FormatVariationConfig {
12709                    date_variation_rate: dq.format_variations.dates.rate,
12710                    amount_variation_rate: dq.format_variations.amounts.rate,
12711                    identifier_variation_rate: dq.format_variations.identifiers.rate,
12712                    ..Default::default()
12713                },
12714                enable_duplicates: dq.duplicates.enabled,
12715                duplicates: datasynth_generators::DuplicateConfig {
12716                    duplicate_rate: dq.effective_duplicate_rate(),
12717                    ..Default::default()
12718                },
12719                enable_typos: dq.typos.enabled,
12720                typos: datasynth_generators::TypoConfig {
12721                    char_error_rate: dq.effective_typo_rate(),
12722                    ..Default::default()
12723                },
12724                enable_encoding_issues: dq.encoding_issues.enabled,
12725                encoding_issue_rate: dq.encoding_issues.rate,
12726                seed: self.seed.wrapping_add(77), // deterministic offset for DQ phase
12727                track_statistics: true,
12728            }
12729        } else {
12730            DataQualityConfig::minimal()
12731        };
12732        let mut injector = DataQualityInjector::new(config);
12733
12734        // Wire country pack for locale-aware format baselines
12735        injector.set_country_pack(self.primary_pack().clone());
12736
12737        // Build context for missing value decisions
12738        let context = HashMap::new();
12739
12740        for entry in entries.iter_mut() {
12741            // Process header_text field (common target for typos)
12742            if let Some(text) = &entry.header.header_text {
12743                let processed = injector.process_text_field(
12744                    "header_text",
12745                    text,
12746                    &entry.header.document_id.to_string(),
12747                    &context,
12748                );
12749                match processed {
12750                    Some(new_text) if new_text != *text => {
12751                        entry.header.header_text = Some(new_text);
12752                    }
12753                    None => {
12754                        entry.header.header_text = None; // Missing value
12755                    }
12756                    _ => {}
12757                }
12758            }
12759
12760            // Process reference field
12761            if let Some(ref_text) = &entry.header.reference {
12762                let processed = injector.process_text_field(
12763                    "reference",
12764                    ref_text,
12765                    &entry.header.document_id.to_string(),
12766                    &context,
12767                );
12768                match processed {
12769                    Some(new_text) if new_text != *ref_text => {
12770                        entry.header.reference = Some(new_text);
12771                    }
12772                    None => {
12773                        entry.header.reference = None;
12774                    }
12775                    _ => {}
12776                }
12777            }
12778
12779            // Process user_persona field (potential for typos in user IDs)
12780            let user_persona = entry.header.user_persona.clone();
12781            if let Some(processed) = injector.process_text_field(
12782                "user_persona",
12783                &user_persona,
12784                &entry.header.document_id.to_string(),
12785                &context,
12786            ) {
12787                if processed != user_persona {
12788                    entry.header.user_persona = processed;
12789                }
12790            }
12791
12792            // Process line items
12793            for line in &mut entry.lines {
12794                // Process line description if present
12795                if let Some(ref text) = line.line_text {
12796                    let processed = injector.process_text_field(
12797                        "line_text",
12798                        text,
12799                        &entry.header.document_id.to_string(),
12800                        &context,
12801                    );
12802                    match processed {
12803                        Some(new_text) if new_text != *text => {
12804                            line.line_text = Some(new_text);
12805                        }
12806                        None => {
12807                            line.line_text = None;
12808                        }
12809                        _ => {}
12810                    }
12811                }
12812
12813                // Process cost_center if present
12814                if let Some(cc) = &line.cost_center {
12815                    let processed = injector.process_text_field(
12816                        "cost_center",
12817                        cc,
12818                        &entry.header.document_id.to_string(),
12819                        &context,
12820                    );
12821                    match processed {
12822                        Some(new_cc) if new_cc != *cc => {
12823                            line.cost_center = Some(new_cc);
12824                        }
12825                        None => {
12826                            line.cost_center = None;
12827                        }
12828                        _ => {}
12829                    }
12830                }
12831
12832                // Extended field coverage (v5.6+): apply NULL injection to
12833                // every Option<String> on the line so users can match
12834                // arbitrary real-production NULL profiles via
12835                // `data_quality.missing_values.field_rates`.
12836                //
12837                // Macro-free helper: process_field returns the new value
12838                // ({Some, None, unchanged}) and we apply it back.
12839                macro_rules! process_opt_field {
12840                    ($field_name:expr, $opt:expr) => {
12841                        if let Some(val) = $opt.as_ref() {
12842                            match injector.process_text_field(
12843                                $field_name,
12844                                val,
12845                                &entry.header.document_id.to_string(),
12846                                &context,
12847                            ) {
12848                                Some(new_val) if new_val != *val => {
12849                                    *$opt = Some(new_val);
12850                                }
12851                                None => {
12852                                    *$opt = None;
12853                                }
12854                                _ => {}
12855                            }
12856                        }
12857                    };
12858                }
12859
12860                process_opt_field!("profit_center", &mut line.profit_center);
12861                process_opt_field!("assignment", &mut line.assignment);
12862                process_opt_field!("tax_code", &mut line.tax_code);
12863                process_opt_field!("account_description", &mut line.account_description);
12864                process_opt_field!(
12865                    "auxiliary_account_number",
12866                    &mut line.auxiliary_account_number
12867                );
12868                process_opt_field!("auxiliary_account_label", &mut line.auxiliary_account_label);
12869                process_opt_field!("lettrage", &mut line.lettrage);
12870            }
12871
12872            if let Some(pb) = &pb {
12873                pb.inc(1);
12874            }
12875        }
12876
12877        if let Some(pb) = pb {
12878            pb.finish_with_message("Data quality injection complete");
12879        }
12880
12881        let quality_issues = injector.issues().to_vec();
12882        Ok((injector.stats().clone(), quality_issues))
12883    }
12884
12885    /// Generate audit data (engagements, workpapers, evidence, risks, findings, judgments).
12886    ///
12887    /// Creates complete audit documentation for each company in the configuration,
12888    /// following ISA standards:
12889    /// - ISA 210/220: Engagement acceptance and terms
12890    /// - ISA 230: Audit documentation (workpapers)
12891    /// - ISA 265: Control deficiencies (findings)
12892    /// - ISA 315/330: Risk assessment and response
12893    /// - ISA 500: Audit evidence
12894    /// - ISA 200: Professional judgment
12895    fn generate_audit_data(&mut self, entries: &[JournalEntry]) -> SynthResult<AuditSnapshot> {
12896        // Check if FSM-driven audit generation is enabled
12897        let use_fsm = self
12898            .config
12899            .audit
12900            .fsm
12901            .as_ref()
12902            .map(|f| f.enabled)
12903            .unwrap_or(false);
12904
12905        if use_fsm {
12906            return self.generate_audit_data_with_fsm(entries);
12907        }
12908
12909        // --- Legacy (non-FSM) audit generation follows ---
12910        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
12911            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
12912        let fiscal_year = start_date.year() as u16;
12913        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
12914
12915        // Calculate rough total revenue from entries for materiality
12916        let total_revenue: rust_decimal::Decimal = entries
12917            .iter()
12918            .flat_map(|e| e.lines.iter())
12919            .filter(|l| l.credit_amount > rust_decimal::Decimal::ZERO)
12920            .map(|l| l.credit_amount)
12921            .sum();
12922
12923        let total_items = (self.phase_config.audit_engagements * 50) as u64; // Approximate items
12924        let pb = self.create_progress_bar(total_items, "Generating Audit Data");
12925
12926        let mut snapshot = AuditSnapshot::default();
12927
12928        // Initialize generators
12929        let mut engagement_gen = AuditEngagementGenerator::new(self.seed + 7000);
12930        // v3.3.2: thread the user-facing audit schema config into the
12931        // engagement generator (team size range).
12932        engagement_gen.set_team_config(&self.config.audit.team);
12933
12934        let mut workpaper_gen = WorkpaperGenerator::new(self.seed + 7100);
12935        // v3.3.2: thread workpaper + review workflow schema config into
12936        // the workpaper generator (per-section count range + review
12937        // delay ranges).
12938        workpaper_gen.set_schema_configs(&self.config.audit.workpapers, &self.config.audit.review);
12939        let mut evidence_gen = EvidenceGenerator::new(self.seed + 7200);
12940        let mut risk_gen = RiskAssessmentGenerator::new(self.seed + 7300);
12941        let mut finding_gen = FindingGenerator::new(self.seed + 7400);
12942        // v3.2.1+: user-supplied finding titles + narratives flow through shared provider
12943        finding_gen.set_template_provider(self.template_provider.clone());
12944        let mut judgment_gen = JudgmentGenerator::new(self.seed + 7500);
12945        let mut confirmation_gen = ConfirmationGenerator::new(self.seed + 7600);
12946        let mut procedure_step_gen = ProcedureStepGenerator::new(self.seed + 7700);
12947        let mut sample_gen = SampleGenerator::new(self.seed + 7800);
12948        let mut analytical_gen = AnalyticalProcedureGenerator::new(self.seed + 7900);
12949        let mut ia_gen = InternalAuditGenerator::new(self.seed + 8000);
12950        let mut related_party_gen = RelatedPartyGenerator::new(self.seed + 8100);
12951
12952        // Get list of accounts from CoA for risk assessment
12953        let accounts: Vec<String> = self
12954            .coa
12955            .as_ref()
12956            .map(|coa| {
12957                coa.get_postable_accounts()
12958                    .iter()
12959                    .map(|acc| acc.account_code().to_string())
12960                    .collect()
12961            })
12962            .unwrap_or_default();
12963
12964        // Generate engagements for each company
12965        for (i, company) in self.config.companies.iter().enumerate() {
12966            // Calculate company-specific revenue (proportional to volume weight)
12967            let company_revenue = total_revenue
12968                * rust_decimal::Decimal::try_from(company.volume_weight).unwrap_or_default();
12969
12970            // Generate engagements for this company
12971            let engagements_for_company =
12972                self.phase_config.audit_engagements / self.config.companies.len().max(1);
12973            let extra = if i < self.phase_config.audit_engagements % self.config.companies.len() {
12974                1
12975            } else {
12976                0
12977            };
12978
12979            for _eng_idx in 0..(engagements_for_company + extra) {
12980                // v3.3.2: draw engagement type from the user-configured
12981                // distribution instead of always using the default
12982                // (AnnualAudit). Falls back to the default when all
12983                // probabilities are zero.
12984                let eng_type =
12985                    engagement_gen.draw_engagement_type(&self.config.audit.engagement_types);
12986
12987                // Generate the engagement
12988                let mut engagement = engagement_gen.generate_engagement(
12989                    &company.code,
12990                    &company.name,
12991                    fiscal_year,
12992                    period_end,
12993                    company_revenue,
12994                    Some(eng_type),
12995                );
12996
12997                // Replace synthetic team IDs with real employee IDs from master data
12998                if !self.master_data.employees.is_empty() {
12999                    let emp_count = self.master_data.employees.len();
13000                    // Use employee IDs deterministically based on engagement index
13001                    let base = (i * 10 + _eng_idx) % emp_count;
13002                    engagement.engagement_partner_id = self.master_data.employees[base % emp_count]
13003                        .employee_id
13004                        .clone();
13005                    engagement.engagement_manager_id = self.master_data.employees
13006                        [(base + 1) % emp_count]
13007                        .employee_id
13008                        .clone();
13009                    let real_team: Vec<String> = engagement
13010                        .team_member_ids
13011                        .iter()
13012                        .enumerate()
13013                        .map(|(j, _)| {
13014                            self.master_data.employees[(base + 2 + j) % emp_count]
13015                                .employee_id
13016                                .clone()
13017                        })
13018                        .collect();
13019                    engagement.team_member_ids = real_team;
13020                }
13021
13022                if let Some(pb) = &pb {
13023                    pb.inc(1);
13024                }
13025
13026                // Get team members from the engagement
13027                let team_members: Vec<String> = engagement.team_member_ids.clone();
13028
13029                // Generate workpapers for the engagement.
13030                // v3.3.2: honor `audit.generate_workpapers` — when false,
13031                // workpapers (and dependent evidence) are skipped while
13032                // the engagement itself, risk assessments, findings, etc.
13033                // still generate normally.
13034                let workpapers = if self.config.audit.generate_workpapers {
13035                    workpaper_gen.generate_complete_workpaper_set(&engagement, &team_members)
13036                } else {
13037                    Vec::new()
13038                };
13039
13040                for wp in &workpapers {
13041                    if let Some(pb) = &pb {
13042                        pb.inc(1);
13043                    }
13044
13045                    // Generate evidence for each workpaper
13046                    let evidence = evidence_gen.generate_evidence_for_workpaper(
13047                        wp,
13048                        &team_members,
13049                        wp.preparer_date,
13050                    );
13051
13052                    for _ in &evidence {
13053                        if let Some(pb) = &pb {
13054                            pb.inc(1);
13055                        }
13056                    }
13057
13058                    snapshot.evidence.extend(evidence);
13059                }
13060
13061                // Generate risk assessments for the engagement
13062                let risks =
13063                    risk_gen.generate_risks_for_engagement(&engagement, &team_members, &accounts);
13064
13065                for _ in &risks {
13066                    if let Some(pb) = &pb {
13067                        pb.inc(1);
13068                    }
13069                }
13070                snapshot.risk_assessments.extend(risks);
13071
13072                // Generate findings for the engagement
13073                let findings = finding_gen.generate_findings_for_engagement(
13074                    &engagement,
13075                    &workpapers,
13076                    &team_members,
13077                );
13078
13079                for _ in &findings {
13080                    if let Some(pb) = &pb {
13081                        pb.inc(1);
13082                    }
13083                }
13084                snapshot.findings.extend(findings);
13085
13086                // Generate professional judgments for the engagement
13087                let judgments =
13088                    judgment_gen.generate_judgments_for_engagement(&engagement, &team_members);
13089
13090                for _ in &judgments {
13091                    if let Some(pb) = &pb {
13092                        pb.inc(1);
13093                    }
13094                }
13095                snapshot.judgments.extend(judgments);
13096
13097                // ISA 505: External confirmations and responses
13098                let (confs, resps) =
13099                    confirmation_gen.generate_confirmations(&engagement, &workpapers, &accounts);
13100                snapshot.confirmations.extend(confs);
13101                snapshot.confirmation_responses.extend(resps);
13102
13103                // ISA 330: Procedure steps per workpaper
13104                let team_pairs: Vec<(String, String)> = team_members
13105                    .iter()
13106                    .map(|id| {
13107                        let name = self
13108                            .master_data
13109                            .employees
13110                            .iter()
13111                            .find(|e| e.employee_id == *id)
13112                            .map(|e| e.display_name.clone())
13113                            .unwrap_or_else(|| format!("Employee {}", &id[..8.min(id.len())]));
13114                        (id.clone(), name)
13115                    })
13116                    .collect();
13117                for wp in &workpapers {
13118                    let steps = procedure_step_gen.generate_steps(wp, &team_pairs);
13119                    snapshot.procedure_steps.extend(steps);
13120                }
13121
13122                // ISA 530: Samples per workpaper
13123                for wp in &workpapers {
13124                    if let Some(sample) = sample_gen.generate_sample(wp, engagement.engagement_id) {
13125                        snapshot.samples.push(sample);
13126                    }
13127                }
13128
13129                // ISA 520: Analytical procedures
13130                let analytical = analytical_gen.generate_procedures(&engagement, &accounts);
13131                snapshot.analytical_results.extend(analytical);
13132
13133                // ISA 610: Internal audit function and reports
13134                let (ia_func, ia_reports) = ia_gen.generate(&engagement);
13135                snapshot.ia_functions.push(ia_func);
13136                snapshot.ia_reports.extend(ia_reports);
13137
13138                // ISA 550: Related parties and transactions
13139                let vendor_names: Vec<String> = self
13140                    .master_data
13141                    .vendors
13142                    .iter()
13143                    .map(|v| v.name.clone())
13144                    .collect();
13145                let customer_names: Vec<String> = self
13146                    .master_data
13147                    .customers
13148                    .iter()
13149                    .map(|c| c.name.clone())
13150                    .collect();
13151                let (parties, rp_txns) =
13152                    related_party_gen.generate(&engagement, &vendor_names, &customer_names);
13153                snapshot.related_parties.extend(parties);
13154                snapshot.related_party_transactions.extend(rp_txns);
13155
13156                // Add workpapers after findings since findings need them
13157                snapshot.workpapers.extend(workpapers);
13158
13159                // Generate audit scope record for this engagement (one per engagement)
13160                {
13161                    let scope_id = format!(
13162                        "SCOPE-{}-{}",
13163                        engagement.engagement_id.simple(),
13164                        &engagement.client_entity_id
13165                    );
13166                    let scope = datasynth_core::models::audit::AuditScope::new(
13167                        scope_id.clone(),
13168                        engagement.engagement_id.to_string(),
13169                        engagement.client_entity_id.clone(),
13170                        engagement.materiality,
13171                    );
13172                    // Wire scope_id back to engagement
13173                    let mut eng = engagement;
13174                    eng.scope_id = Some(scope_id);
13175                    snapshot.audit_scopes.push(scope);
13176                    snapshot.engagements.push(eng);
13177                }
13178            }
13179        }
13180
13181        // ----------------------------------------------------------------
13182        // ISA 600: Group audit — component auditors, plan, instructions, reports
13183        // ----------------------------------------------------------------
13184        if self.config.companies.len() > 1 {
13185            // Use materiality from the first engagement if available, otherwise
13186            // derive a reasonable figure from total revenue.
13187            let group_materiality = snapshot
13188                .engagements
13189                .first()
13190                .map(|e| e.materiality)
13191                .unwrap_or_else(|| {
13192                    let pct = rust_decimal::Decimal::try_from(0.005_f64).unwrap_or_default();
13193                    total_revenue * pct
13194                });
13195
13196            let mut component_gen = ComponentAuditGenerator::new(self.seed + 8200);
13197            let group_engagement_id = snapshot
13198                .engagements
13199                .first()
13200                .map(|e| e.engagement_id.to_string())
13201                .unwrap_or_else(|| "GROUP-ENG".to_string());
13202
13203            let component_snapshot = component_gen.generate(
13204                &self.config.companies,
13205                group_materiality,
13206                &group_engagement_id,
13207                period_end,
13208            );
13209
13210            snapshot.component_auditors = component_snapshot.component_auditors;
13211            snapshot.group_audit_plan = component_snapshot.group_audit_plan;
13212            snapshot.component_instructions = component_snapshot.component_instructions;
13213            snapshot.component_reports = component_snapshot.component_reports;
13214
13215            info!(
13216                "ISA 600 group audit: {} component auditors, {} instructions, {} reports",
13217                snapshot.component_auditors.len(),
13218                snapshot.component_instructions.len(),
13219                snapshot.component_reports.len(),
13220            );
13221        }
13222
13223        // ----------------------------------------------------------------
13224        // ISA 210: Engagement letters — one per engagement
13225        // ----------------------------------------------------------------
13226        {
13227            let applicable_framework = self
13228                .config
13229                .accounting_standards
13230                .framework
13231                .as_ref()
13232                .map(|f| format!("{f:?}"))
13233                .unwrap_or_else(|| "IFRS".to_string());
13234
13235            let mut letter_gen = EngagementLetterGenerator::new(self.seed + 8300);
13236            let entity_count = self.config.companies.len();
13237
13238            for engagement in &snapshot.engagements {
13239                let company = self
13240                    .config
13241                    .companies
13242                    .iter()
13243                    .find(|c| c.code == engagement.client_entity_id);
13244                let currency = company.map(|c| c.currency.as_str()).unwrap_or("USD");
13245                let letter_date = engagement.planning_start;
13246                let letter = letter_gen.generate(
13247                    &engagement.engagement_id.to_string(),
13248                    &engagement.client_name,
13249                    entity_count,
13250                    engagement.period_end_date,
13251                    currency,
13252                    &applicable_framework,
13253                    letter_date,
13254                );
13255                snapshot.engagement_letters.push(letter);
13256            }
13257
13258            info!(
13259                "ISA 210 engagement letters: {} generated",
13260                snapshot.engagement_letters.len()
13261            );
13262        }
13263
13264        // ----------------------------------------------------------------
13265        // v3.3.0: Legal documents per engagement (WI: LegalDocumentGenerator)
13266        // ----------------------------------------------------------------
13267        if self.phase_config.generate_legal_documents {
13268            use datasynth_generators::legal_document_generator::LegalDocumentGenerator;
13269            let mut legal_gen = LegalDocumentGenerator::new(self.seed + 8400);
13270            for engagement in &snapshot.engagements {
13271                // Build an employee name list for signatory drawing —
13272                // prefer employees from the engaged entity, fall back to
13273                // all employees.
13274                let employee_names: Vec<String> = self
13275                    .master_data
13276                    .employees
13277                    .iter()
13278                    .filter(|e| e.company_code == engagement.client_entity_id)
13279                    .map(|e| e.display_name.clone())
13280                    .collect();
13281                let names_to_use = if !employee_names.is_empty() {
13282                    employee_names
13283                } else {
13284                    self.master_data
13285                        .employees
13286                        .iter()
13287                        .take(10)
13288                        .map(|e| e.display_name.clone())
13289                        .collect()
13290                };
13291                let docs = legal_gen.generate(
13292                    &engagement.client_entity_id,
13293                    engagement.fiscal_year as i32,
13294                    &names_to_use,
13295                );
13296                snapshot.legal_documents.extend(docs);
13297            }
13298            info!(
13299                "v3.3.0 legal documents: {} emitted across {} engagements",
13300                snapshot.legal_documents.len(),
13301                snapshot.engagements.len()
13302            );
13303        }
13304
13305        // ----------------------------------------------------------------
13306        // v3.3.0: IT general controls — access logs + change records
13307        //
13308        // `ItControlsGenerator` runs one pass per company (not per
13309        // engagement) so employee sets and system catalogs stay
13310        // coherent. We derive the period from the earliest engagement's
13311        // planning_start through the latest engagement's period_end_date
13312        // for each company.
13313        // ----------------------------------------------------------------
13314        if self.phase_config.generate_it_controls {
13315            use datasynth_generators::it_controls_generator::ItControlsGenerator;
13316            use std::collections::HashMap;
13317            let mut it_gen = ItControlsGenerator::new(self.seed + 8500);
13318
13319            // Group engagements by company to produce one IT-controls
13320            // window per entity.
13321            let mut by_company: HashMap<String, (chrono::NaiveDate, chrono::NaiveDate)> =
13322                HashMap::new();
13323            for engagement in &snapshot.engagements {
13324                let entry = by_company
13325                    .entry(engagement.client_entity_id.clone())
13326                    .or_insert((engagement.planning_start, engagement.period_end_date));
13327                if engagement.planning_start < entry.0 {
13328                    entry.0 = engagement.planning_start;
13329                }
13330                if engagement.period_end_date > entry.1 {
13331                    entry.1 = engagement.period_end_date;
13332                }
13333            }
13334
13335            // Standard system catalog — populated from known ERP / app
13336            // names. Keeps the generator's data shape stable when the
13337            // user hasn't configured IT-system naming separately.
13338            let systems: Vec<String> = vec![
13339                "SAP ECC",
13340                "SAP S/4 HANA",
13341                "Oracle EBS",
13342                "Workday",
13343                "NetSuite",
13344                "Active Directory",
13345                "SharePoint",
13346                "Salesforce",
13347                "ServiceNow",
13348                "Jira",
13349                "GitHub Enterprise",
13350                "AWS Console",
13351                "Okta",
13352            ]
13353            .into_iter()
13354            .map(String::from)
13355            .collect();
13356
13357            for (company_code, (start, end)) in by_company {
13358                let emps: Vec<(String, String)> = self
13359                    .master_data
13360                    .employees
13361                    .iter()
13362                    .filter(|e| e.company_code == company_code)
13363                    .map(|e| (e.employee_id.clone(), e.display_name.clone()))
13364                    .collect();
13365                if emps.is_empty() {
13366                    continue;
13367                }
13368                // Compute period in months, rounded up to the nearest
13369                // whole month (min 1).
13370                let months = ((end.signed_duration_since(start).num_days() / 30) + 1).max(1) as u32;
13371                let access_logs = it_gen.generate_access_logs(&emps, &systems, start, months);
13372                let change_records = it_gen.generate_change_records(&emps, &systems, start, months);
13373                snapshot.it_controls_access_logs.extend(access_logs);
13374                snapshot.it_controls_change_records.extend(change_records);
13375            }
13376
13377            info!(
13378                "v3.3.0 IT controls: {} access logs, {} change records",
13379                snapshot.it_controls_access_logs.len(),
13380                snapshot.it_controls_change_records.len()
13381            );
13382        }
13383
13384        // ----------------------------------------------------------------
13385        // ISA 560 / IAS 10: Subsequent events
13386        // ----------------------------------------------------------------
13387        {
13388            let mut event_gen = SubsequentEventGenerator::new(self.seed + 8400);
13389            let entity_codes: Vec<String> = self
13390                .config
13391                .companies
13392                .iter()
13393                .map(|c| c.code.clone())
13394                .collect();
13395            let subsequent = event_gen.generate_for_entities(&entity_codes, period_end);
13396            info!(
13397                "ISA 560 subsequent events: {} generated ({} adjusting, {} non-adjusting)",
13398                subsequent.len(),
13399                subsequent
13400                    .iter()
13401                    .filter(|e| matches!(
13402                        e.classification,
13403                        datasynth_core::models::audit::subsequent_events::EventClassification::Adjusting
13404                    ))
13405                    .count(),
13406                subsequent
13407                    .iter()
13408                    .filter(|e| matches!(
13409                        e.classification,
13410                        datasynth_core::models::audit::subsequent_events::EventClassification::NonAdjusting
13411                    ))
13412                    .count(),
13413            );
13414            snapshot.subsequent_events = subsequent;
13415        }
13416
13417        // ----------------------------------------------------------------
13418        // ISA 402: Service organization controls
13419        // ----------------------------------------------------------------
13420        {
13421            let mut soc_gen = ServiceOrgGenerator::new(self.seed + 8500);
13422            let entity_codes: Vec<String> = self
13423                .config
13424                .companies
13425                .iter()
13426                .map(|c| c.code.clone())
13427                .collect();
13428            let soc_snapshot = soc_gen.generate(&entity_codes, period_end);
13429            info!(
13430                "ISA 402 service orgs: {} orgs, {} SOC reports, {} user entity controls",
13431                soc_snapshot.service_organizations.len(),
13432                soc_snapshot.soc_reports.len(),
13433                soc_snapshot.user_entity_controls.len(),
13434            );
13435            snapshot.service_organizations = soc_snapshot.service_organizations;
13436            snapshot.soc_reports = soc_snapshot.soc_reports;
13437            snapshot.user_entity_controls = soc_snapshot.user_entity_controls;
13438        }
13439
13440        // ----------------------------------------------------------------
13441        // ISA 570: Going concern assessments
13442        // ----------------------------------------------------------------
13443        {
13444            use datasynth_generators::audit::going_concern_generator::{
13445                GoingConcernGenerator, GoingConcernInput,
13446            };
13447            let mut gc_gen = GoingConcernGenerator::new(self.seed + 8570);
13448            let entity_codes: Vec<String> = self
13449                .config
13450                .companies
13451                .iter()
13452                .map(|c| c.code.clone())
13453                .collect();
13454            // Assessment date = period end + 75 days (typical sign-off window).
13455            let assessment_date = period_end + chrono::Duration::days(75);
13456            let period_label = format!("FY{}", period_end.year());
13457
13458            // Build financial inputs from actual journal entries.
13459            //
13460            // We derive approximate P&L, working capital, and operating cash flow
13461            // by aggregating GL account balances from the journal entry population.
13462            // Account ranges used (standard chart):
13463            //   Revenue:         4xxx (credit-normal → negate for positive revenue)
13464            //   Expenses:        6xxx (debit-normal)
13465            //   Current assets:  1xxx (AR=1100, cash=1000, inventory=1300)
13466            //   Current liabs:   2xxx up to 2499 (AP=2000, accruals=2100)
13467            //   Operating CF:    net income adjusted for D&A (rough proxy)
13468            let gc_inputs: Vec<GoingConcernInput> = self
13469                .config
13470                .companies
13471                .iter()
13472                .map(|company| {
13473                    let code = &company.code;
13474                    let mut revenue = rust_decimal::Decimal::ZERO;
13475                    let mut expenses = rust_decimal::Decimal::ZERO;
13476                    let mut current_assets = rust_decimal::Decimal::ZERO;
13477                    let mut current_liabs = rust_decimal::Decimal::ZERO;
13478                    let mut total_debt = rust_decimal::Decimal::ZERO;
13479
13480                    for je in entries.iter().filter(|je| &je.header.company_code == code) {
13481                        for line in &je.lines {
13482                            let acct = line.gl_account.as_str();
13483                            let net = line.debit_amount - line.credit_amount;
13484                            if acct.starts_with('4') {
13485                                // Revenue accounts: credit-normal, so negative net = revenue earned
13486                                revenue -= net;
13487                            } else if acct.starts_with('6') {
13488                                // Expense accounts: debit-normal
13489                                expenses += net;
13490                            }
13491                            // Balance sheet accounts for working capital
13492                            if acct.starts_with('1') {
13493                                // Current asset accounts (1000–1499)
13494                                if let Ok(n) = acct.parse::<u32>() {
13495                                    if (1000..=1499).contains(&n) {
13496                                        current_assets += net;
13497                                    }
13498                                }
13499                            } else if acct.starts_with('2') {
13500                                if let Ok(n) = acct.parse::<u32>() {
13501                                    if (2000..=2499).contains(&n) {
13502                                        // Current liabilities
13503                                        current_liabs -= net; // credit-normal
13504                                    } else if (2500..=2999).contains(&n) {
13505                                        // Long-term debt
13506                                        total_debt -= net;
13507                                    }
13508                                }
13509                            }
13510                        }
13511                    }
13512
13513                    let net_income = revenue - expenses;
13514                    let working_capital = current_assets - current_liabs;
13515                    // Rough operating CF proxy: net income (full accrual CF calculation
13516                    // is done separately in the cash flow statement generator)
13517                    let operating_cash_flow = net_income;
13518
13519                    GoingConcernInput {
13520                        entity_code: code.clone(),
13521                        net_income,
13522                        working_capital,
13523                        operating_cash_flow,
13524                        total_debt: total_debt.max(rust_decimal::Decimal::ZERO),
13525                        assessment_date,
13526                    }
13527                })
13528                .collect();
13529
13530            let assessments = if gc_inputs.is_empty() {
13531                gc_gen.generate_for_entities(&entity_codes, assessment_date, &period_label)
13532            } else {
13533                gc_gen.generate_for_entities_with_inputs(
13534                    &entity_codes,
13535                    &gc_inputs,
13536                    assessment_date,
13537                    &period_label,
13538                )
13539            };
13540            info!(
13541                "ISA 570 going concern: {} assessments ({} clean, {} material uncertainty, {} doubt)",
13542                assessments.len(),
13543                assessments.iter().filter(|a| matches!(
13544                    a.auditor_conclusion,
13545                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::NoMaterialUncertainty
13546                )).count(),
13547                assessments.iter().filter(|a| matches!(
13548                    a.auditor_conclusion,
13549                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::MaterialUncertaintyExists
13550                )).count(),
13551                assessments.iter().filter(|a| matches!(
13552                    a.auditor_conclusion,
13553                    datasynth_core::models::audit::going_concern::GoingConcernConclusion::GoingConcernDoubt
13554                )).count(),
13555            );
13556            snapshot.going_concern_assessments = assessments;
13557        }
13558
13559        // ----------------------------------------------------------------
13560        // ISA 540: Accounting estimates
13561        // ----------------------------------------------------------------
13562        {
13563            use datasynth_generators::audit::accounting_estimate_generator::AccountingEstimateGenerator;
13564            let mut est_gen = AccountingEstimateGenerator::new(self.seed + 8540);
13565            let entity_codes: Vec<String> = self
13566                .config
13567                .companies
13568                .iter()
13569                .map(|c| c.code.clone())
13570                .collect();
13571            let estimates = est_gen.generate_for_entities(&entity_codes);
13572            info!(
13573                "ISA 540 accounting estimates: {} estimates across {} entities \
13574                 ({} with retrospective reviews, {} with auditor point estimates)",
13575                estimates.len(),
13576                entity_codes.len(),
13577                estimates
13578                    .iter()
13579                    .filter(|e| e.retrospective_review.is_some())
13580                    .count(),
13581                estimates
13582                    .iter()
13583                    .filter(|e| e.auditor_point_estimate.is_some())
13584                    .count(),
13585            );
13586            snapshot.accounting_estimates = estimates;
13587        }
13588
13589        // ----------------------------------------------------------------
13590        // ISA 700/701/705/706: Audit opinions (one per engagement)
13591        // ----------------------------------------------------------------
13592        {
13593            use datasynth_generators::audit::audit_opinion_generator::{
13594                AuditOpinionGenerator, AuditOpinionInput,
13595            };
13596
13597            let mut opinion_gen = AuditOpinionGenerator::new(self.seed + 8700);
13598
13599            // Build inputs — one per engagement, linking findings and going concern.
13600            let opinion_inputs: Vec<AuditOpinionInput> = snapshot
13601                .engagements
13602                .iter()
13603                .map(|eng| {
13604                    // Collect findings for this engagement.
13605                    let eng_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13606                        .findings
13607                        .iter()
13608                        .filter(|f| f.engagement_id == eng.engagement_id)
13609                        .cloned()
13610                        .collect();
13611
13612                    // Going concern for this entity.
13613                    let gc = snapshot
13614                        .going_concern_assessments
13615                        .iter()
13616                        .find(|g| g.entity_code == eng.client_entity_id)
13617                        .cloned();
13618
13619                    // Component reports relevant to this engagement.
13620                    let comp_reports: Vec<datasynth_core::models::audit::ComponentAuditorReport> =
13621                        snapshot.component_reports.clone();
13622
13623                    let auditor = self
13624                        .master_data
13625                        .employees
13626                        .first()
13627                        .map(|e| e.display_name.clone())
13628                        .unwrap_or_else(|| "Global Audit LLP".into());
13629
13630                    let partner = self
13631                        .master_data
13632                        .employees
13633                        .get(1)
13634                        .map(|e| e.display_name.clone())
13635                        .unwrap_or_else(|| eng.engagement_partner_id.clone());
13636
13637                    AuditOpinionInput {
13638                        entity_code: eng.client_entity_id.clone(),
13639                        entity_name: eng.client_name.clone(),
13640                        engagement_id: eng.engagement_id,
13641                        period_end: eng.period_end_date,
13642                        findings: eng_findings,
13643                        going_concern: gc,
13644                        component_reports: comp_reports,
13645                        // Mark as US-listed when audit standards include PCAOB.
13646                        is_us_listed: {
13647                            let fw = &self.config.audit_standards.isa_compliance.framework;
13648                            fw.eq_ignore_ascii_case("pcaob") || fw.eq_ignore_ascii_case("dual")
13649                        },
13650                        auditor_name: auditor,
13651                        engagement_partner: partner,
13652                    }
13653                })
13654                .collect();
13655
13656            let generated_opinions = opinion_gen.generate_batch(&opinion_inputs);
13657
13658            for go in &generated_opinions {
13659                snapshot
13660                    .key_audit_matters
13661                    .extend(go.key_audit_matters.clone());
13662            }
13663            snapshot.audit_opinions = generated_opinions
13664                .into_iter()
13665                .map(|go| go.opinion)
13666                .collect();
13667
13668            info!(
13669                "ISA 700 audit opinions: {} generated ({} unmodified, {} qualified, {} adverse, {} disclaimer)",
13670                snapshot.audit_opinions.len(),
13671                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Unmodified)).count(),
13672                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Qualified)).count(),
13673                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Adverse)).count(),
13674                snapshot.audit_opinions.iter().filter(|o| matches!(o.opinion_type, datasynth_standards::audit::opinion::OpinionType::Disclaimer)).count(),
13675            );
13676        }
13677
13678        // ----------------------------------------------------------------
13679        // SOX 302 / 404 assessments
13680        // ----------------------------------------------------------------
13681        {
13682            use datasynth_generators::audit::sox_generator::{SoxGenerator, SoxGeneratorInput};
13683
13684            let mut sox_gen = SoxGenerator::new(self.seed + 8302);
13685
13686            for (i, company) in self.config.companies.iter().enumerate() {
13687                // Collect findings for this company's engagements.
13688                let company_engagement_ids: Vec<uuid::Uuid> = snapshot
13689                    .engagements
13690                    .iter()
13691                    .filter(|e| e.client_entity_id == company.code)
13692                    .map(|e| e.engagement_id)
13693                    .collect();
13694
13695                let company_findings: Vec<datasynth_core::models::audit::AuditFinding> = snapshot
13696                    .findings
13697                    .iter()
13698                    .filter(|f| company_engagement_ids.contains(&f.engagement_id))
13699                    .cloned()
13700                    .collect();
13701
13702                // Derive executive names from employee list.
13703                let emp_count = self.master_data.employees.len();
13704                let ceo_name = if emp_count > 0 {
13705                    self.master_data.employees[i % emp_count]
13706                        .display_name
13707                        .clone()
13708                } else {
13709                    format!("CEO of {}", company.name)
13710                };
13711                let cfo_name = if emp_count > 1 {
13712                    self.master_data.employees[(i + 1) % emp_count]
13713                        .display_name
13714                        .clone()
13715                } else {
13716                    format!("CFO of {}", company.name)
13717                };
13718
13719                // Use engagement materiality if available.
13720                let materiality = snapshot
13721                    .engagements
13722                    .iter()
13723                    .find(|e| e.client_entity_id == company.code)
13724                    .map(|e| e.materiality)
13725                    .unwrap_or_else(|| rust_decimal::Decimal::from(100_000));
13726
13727                let input = SoxGeneratorInput {
13728                    company_code: company.code.clone(),
13729                    company_name: company.name.clone(),
13730                    fiscal_year,
13731                    period_end,
13732                    findings: company_findings,
13733                    ceo_name,
13734                    cfo_name,
13735                    materiality_threshold: materiality,
13736                    revenue_percent: rust_decimal::Decimal::from(100),
13737                    assets_percent: rust_decimal::Decimal::from(100),
13738                    significant_accounts: vec![
13739                        "Revenue".into(),
13740                        "Accounts Receivable".into(),
13741                        "Inventory".into(),
13742                        "Fixed Assets".into(),
13743                        "Accounts Payable".into(),
13744                    ],
13745                };
13746
13747                let (certs, assessment) = sox_gen.generate(&input);
13748                snapshot.sox_302_certifications.extend(certs);
13749                snapshot.sox_404_assessments.push(assessment);
13750            }
13751
13752            info!(
13753                "SOX 302/404: {} certifications, {} assessments ({} effective, {} ineffective)",
13754                snapshot.sox_302_certifications.len(),
13755                snapshot.sox_404_assessments.len(),
13756                snapshot
13757                    .sox_404_assessments
13758                    .iter()
13759                    .filter(|a| a.icfr_effective)
13760                    .count(),
13761                snapshot
13762                    .sox_404_assessments
13763                    .iter()
13764                    .filter(|a| !a.icfr_effective)
13765                    .count(),
13766            );
13767        }
13768
13769        // ----------------------------------------------------------------
13770        // ISA 320: Materiality calculations (one per entity)
13771        // ----------------------------------------------------------------
13772        {
13773            use datasynth_generators::audit::materiality_generator::{
13774                MaterialityGenerator, MaterialityInput,
13775            };
13776
13777            let mut mat_gen = MaterialityGenerator::new(self.seed + 8320);
13778
13779            // Compute per-company financials from JEs.
13780            // Asset accounts start with '1', revenue with '4',
13781            // expense accounts with '5' or '6'.
13782            let mut materiality_inputs: Vec<MaterialityInput> = Vec::new();
13783
13784            for company in &self.config.companies {
13785                let company_code = company.code.clone();
13786
13787                // Revenue: credit-side entries on 4xxx accounts
13788                let company_revenue: rust_decimal::Decimal = entries
13789                    .iter()
13790                    .filter(|e| e.company_code() == company_code)
13791                    .flat_map(|e| e.lines.iter())
13792                    .filter(|l| l.account_code.starts_with('4'))
13793                    .map(|l| l.credit_amount)
13794                    .sum();
13795
13796                // Total assets: debit balances on 1xxx accounts
13797                let total_assets: rust_decimal::Decimal = entries
13798                    .iter()
13799                    .filter(|e| e.company_code() == company_code)
13800                    .flat_map(|e| e.lines.iter())
13801                    .filter(|l| l.account_code.starts_with('1'))
13802                    .map(|l| l.debit_amount)
13803                    .sum();
13804
13805                // Expenses: debit-side entries on 5xxx/6xxx accounts
13806                let total_expenses: rust_decimal::Decimal = entries
13807                    .iter()
13808                    .filter(|e| e.company_code() == company_code)
13809                    .flat_map(|e| e.lines.iter())
13810                    .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
13811                    .map(|l| l.debit_amount)
13812                    .sum();
13813
13814                // Equity: credit balances on 3xxx accounts
13815                let equity: rust_decimal::Decimal = entries
13816                    .iter()
13817                    .filter(|e| e.company_code() == company_code)
13818                    .flat_map(|e| e.lines.iter())
13819                    .filter(|l| l.account_code.starts_with('3'))
13820                    .map(|l| l.credit_amount)
13821                    .sum();
13822
13823                let pretax_income = company_revenue - total_expenses;
13824
13825                // If no company-specific data, fall back to proportional share
13826                let (rev, assets, pti, eq) = if company_revenue == rust_decimal::Decimal::ZERO {
13827                    let w = rust_decimal::Decimal::try_from(company.volume_weight)
13828                        .unwrap_or(rust_decimal::Decimal::ONE);
13829                    (
13830                        total_revenue * w,
13831                        total_revenue * w * rust_decimal::Decimal::from(3),
13832                        total_revenue * w * rust_decimal::Decimal::new(1, 1),
13833                        total_revenue * w * rust_decimal::Decimal::from(2),
13834                    )
13835                } else {
13836                    (company_revenue, total_assets, pretax_income, equity)
13837                };
13838
13839                let gross_profit = rev * rust_decimal::Decimal::new(35, 2); // 35% assumed
13840
13841                materiality_inputs.push(MaterialityInput {
13842                    entity_code: company_code,
13843                    period: format!("FY{}", fiscal_year),
13844                    revenue: rev,
13845                    pretax_income: pti,
13846                    total_assets: assets,
13847                    equity: eq,
13848                    gross_profit,
13849                });
13850            }
13851
13852            snapshot.materiality_calculations = mat_gen.generate_batch(&materiality_inputs);
13853
13854            info!(
13855                "Materiality: {} calculations generated ({} pre-tax income, {} revenue, \
13856                 {} total assets, {} equity benchmarks)",
13857                snapshot.materiality_calculations.len(),
13858                snapshot
13859                    .materiality_calculations
13860                    .iter()
13861                    .filter(|m| matches!(
13862                        m.benchmark,
13863                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::PretaxIncome
13864                    ))
13865                    .count(),
13866                snapshot
13867                    .materiality_calculations
13868                    .iter()
13869                    .filter(|m| matches!(
13870                        m.benchmark,
13871                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Revenue
13872                    ))
13873                    .count(),
13874                snapshot
13875                    .materiality_calculations
13876                    .iter()
13877                    .filter(|m| matches!(
13878                        m.benchmark,
13879                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::TotalAssets
13880                    ))
13881                    .count(),
13882                snapshot
13883                    .materiality_calculations
13884                    .iter()
13885                    .filter(|m| matches!(
13886                        m.benchmark,
13887                        datasynth_core::models::audit::materiality_calculation::MaterialityBenchmark::Equity
13888                    ))
13889                    .count(),
13890            );
13891        }
13892
13893        // ----------------------------------------------------------------
13894        // ISA 315: Combined Risk Assessments (per entity, per account area)
13895        // ----------------------------------------------------------------
13896        {
13897            use datasynth_generators::audit::cra_generator::CraGenerator;
13898
13899            let mut cra_gen = CraGenerator::new(self.seed + 8315);
13900
13901            // Build entity → scope_id map from already-generated scopes
13902            let entity_scope_map: std::collections::HashMap<String, String> = snapshot
13903                .audit_scopes
13904                .iter()
13905                .map(|s| (s.entity_code.clone(), s.id.clone()))
13906                .collect();
13907
13908            for company in &self.config.companies {
13909                let cras = cra_gen.generate_for_entity(&company.code, None);
13910                let scope_id = entity_scope_map.get(&company.code).cloned();
13911                let cras_with_scope: Vec<_> = cras
13912                    .into_iter()
13913                    .map(|mut cra| {
13914                        cra.scope_id = scope_id.clone();
13915                        cra
13916                    })
13917                    .collect();
13918                snapshot.combined_risk_assessments.extend(cras_with_scope);
13919            }
13920
13921            let significant_count = snapshot
13922                .combined_risk_assessments
13923                .iter()
13924                .filter(|c| c.significant_risk)
13925                .count();
13926            let high_cra_count = snapshot
13927                .combined_risk_assessments
13928                .iter()
13929                .filter(|c| {
13930                    matches!(
13931                        c.combined_risk,
13932                        datasynth_core::models::audit::risk_assessment_cra::CraLevel::High
13933                    )
13934                })
13935                .count();
13936
13937            info!(
13938                "CRA: {} combined risk assessments ({} significant, {} high CRA)",
13939                snapshot.combined_risk_assessments.len(),
13940                significant_count,
13941                high_cra_count,
13942            );
13943        }
13944
13945        // ----------------------------------------------------------------
13946        // ISA 530: Sampling Plans (per CRA at Moderate or High level)
13947        // ----------------------------------------------------------------
13948        {
13949            use datasynth_generators::audit::sampling_plan_generator::SamplingPlanGenerator;
13950
13951            let mut sp_gen = SamplingPlanGenerator::new(self.seed + 8530);
13952
13953            // Group CRAs by entity and use per-entity tolerable error from materiality
13954            for company in &self.config.companies {
13955                let entity_code = company.code.clone();
13956
13957                // Find tolerable error for this entity (= performance materiality)
13958                let tolerable_error = snapshot
13959                    .materiality_calculations
13960                    .iter()
13961                    .find(|m| m.entity_code == entity_code)
13962                    .map(|m| m.tolerable_error);
13963
13964                // Collect CRAs for this entity
13965                let entity_cras: Vec<_> = snapshot
13966                    .combined_risk_assessments
13967                    .iter()
13968                    .filter(|c| c.entity_code == entity_code)
13969                    .cloned()
13970                    .collect();
13971
13972                if !entity_cras.is_empty() {
13973                    let (plans, items) = sp_gen.generate_for_cras(&entity_cras, tolerable_error);
13974                    snapshot.sampling_plans.extend(plans);
13975                    snapshot.sampled_items.extend(items);
13976                }
13977            }
13978
13979            let misstatement_count = snapshot
13980                .sampled_items
13981                .iter()
13982                .filter(|i| i.misstatement_found)
13983                .count();
13984
13985            info!(
13986                "ISA 530: {} sampling plans, {} sampled items ({} misstatements found)",
13987                snapshot.sampling_plans.len(),
13988                snapshot.sampled_items.len(),
13989                misstatement_count,
13990            );
13991        }
13992
13993        // ----------------------------------------------------------------
13994        // ISA 315: Significant Classes of Transactions (SCOTS)
13995        // ----------------------------------------------------------------
13996        {
13997            use datasynth_generators::audit::scots_generator::{
13998                ScotsGenerator, ScotsGeneratorConfig,
13999            };
14000
14001            let ic_enabled = self.config.intercompany.enabled;
14002
14003            let config = ScotsGeneratorConfig {
14004                intercompany_enabled: ic_enabled,
14005                ..ScotsGeneratorConfig::default()
14006            };
14007            let mut scots_gen = ScotsGenerator::with_config(self.seed + 83_150, config);
14008
14009            for company in &self.config.companies {
14010                let entity_scots = scots_gen.generate_for_entity(&company.code, entries);
14011                snapshot
14012                    .significant_transaction_classes
14013                    .extend(entity_scots);
14014            }
14015
14016            let estimation_count = snapshot
14017                .significant_transaction_classes
14018                .iter()
14019                .filter(|s| {
14020                    matches!(
14021                        s.transaction_type,
14022                        datasynth_core::models::audit::scots::ScotTransactionType::Estimation
14023                    )
14024                })
14025                .count();
14026
14027            info!(
14028                "ISA 315 SCOTS: {} significant transaction classes ({} estimation SCOTs)",
14029                snapshot.significant_transaction_classes.len(),
14030                estimation_count,
14031            );
14032        }
14033
14034        // ----------------------------------------------------------------
14035        // ISA 520: Unusual Item Markers
14036        // ----------------------------------------------------------------
14037        {
14038            use datasynth_generators::audit::unusual_item_generator::UnusualItemGenerator;
14039
14040            let mut unusual_gen = UnusualItemGenerator::new(self.seed + 83_200);
14041            let entity_codes: Vec<String> = self
14042                .config
14043                .companies
14044                .iter()
14045                .map(|c| c.code.clone())
14046                .collect();
14047            let unusual_flags =
14048                unusual_gen.generate_for_entities(&entity_codes, entries, period_end);
14049            info!(
14050                "ISA 520 unusual items: {} flags ({} significant, {} moderate, {} minor)",
14051                unusual_flags.len(),
14052                unusual_flags
14053                    .iter()
14054                    .filter(|f| matches!(
14055                        f.severity,
14056                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Significant
14057                    ))
14058                    .count(),
14059                unusual_flags
14060                    .iter()
14061                    .filter(|f| matches!(
14062                        f.severity,
14063                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Moderate
14064                    ))
14065                    .count(),
14066                unusual_flags
14067                    .iter()
14068                    .filter(|f| matches!(
14069                        f.severity,
14070                        datasynth_core::models::audit::unusual_items::UnusualSeverity::Minor
14071                    ))
14072                    .count(),
14073            );
14074            snapshot.unusual_items = unusual_flags;
14075        }
14076
14077        // ----------------------------------------------------------------
14078        // ISA 520: Analytical Relationships
14079        // ----------------------------------------------------------------
14080        {
14081            use datasynth_generators::audit::analytical_relationship_generator::AnalyticalRelationshipGenerator;
14082
14083            let mut ar_gen = AnalyticalRelationshipGenerator::new(self.seed + 83_201);
14084            let entity_codes: Vec<String> = self
14085                .config
14086                .companies
14087                .iter()
14088                .map(|c| c.code.clone())
14089                .collect();
14090            let current_period_label = format!("FY{fiscal_year}");
14091            let prior_period_label = format!("FY{}", fiscal_year - 1);
14092            let analytical_rels = ar_gen.generate_for_entities(
14093                &entity_codes,
14094                entries,
14095                &current_period_label,
14096                &prior_period_label,
14097            );
14098            let out_of_range = analytical_rels
14099                .iter()
14100                .filter(|r| !r.within_expected_range)
14101                .count();
14102            info!(
14103                "ISA 520 analytical relationships: {} relationships ({} out of expected range)",
14104                analytical_rels.len(),
14105                out_of_range,
14106            );
14107            snapshot.analytical_relationships = analytical_rels;
14108        }
14109
14110        if let Some(pb) = pb {
14111            pb.finish_with_message(format!(
14112                "Audit data: {} engagements, {} workpapers, {} evidence, \
14113                 {} confirmations, {} procedure steps, {} samples, \
14114                 {} analytical, {} IA funcs, {} related parties, \
14115                 {} component auditors, {} letters, {} subsequent events, \
14116                 {} service orgs, {} going concern, {} accounting estimates, \
14117                 {} opinions, {} KAMs, {} SOX 302 certs, {} SOX 404 assessments, \
14118                 {} materiality calcs, {} CRAs, {} sampling plans, {} SCOTS, \
14119                 {} unusual items, {} analytical relationships",
14120                snapshot.engagements.len(),
14121                snapshot.workpapers.len(),
14122                snapshot.evidence.len(),
14123                snapshot.confirmations.len(),
14124                snapshot.procedure_steps.len(),
14125                snapshot.samples.len(),
14126                snapshot.analytical_results.len(),
14127                snapshot.ia_functions.len(),
14128                snapshot.related_parties.len(),
14129                snapshot.component_auditors.len(),
14130                snapshot.engagement_letters.len(),
14131                snapshot.subsequent_events.len(),
14132                snapshot.service_organizations.len(),
14133                snapshot.going_concern_assessments.len(),
14134                snapshot.accounting_estimates.len(),
14135                snapshot.audit_opinions.len(),
14136                snapshot.key_audit_matters.len(),
14137                snapshot.sox_302_certifications.len(),
14138                snapshot.sox_404_assessments.len(),
14139                snapshot.materiality_calculations.len(),
14140                snapshot.combined_risk_assessments.len(),
14141                snapshot.sampling_plans.len(),
14142                snapshot.significant_transaction_classes.len(),
14143                snapshot.unusual_items.len(),
14144                snapshot.analytical_relationships.len(),
14145            ));
14146        }
14147
14148        // ----------------------------------------------------------------
14149        // PCAOB-ISA cross-reference mappings
14150        // ----------------------------------------------------------------
14151        // Always include the standard PCAOB-ISA mappings when audit generation is
14152        // enabled. These are static reference data (no randomness required) so we
14153        // call standard_mappings() directly.
14154        {
14155            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14156            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14157            debug!(
14158                "PCAOB-ISA mappings generated: {} mappings",
14159                snapshot.isa_pcaob_mappings.len()
14160            );
14161        }
14162
14163        // ----------------------------------------------------------------
14164        // ISA standard reference entries
14165        // ----------------------------------------------------------------
14166        // Emit flat ISA standard reference data (number, title, series) so
14167        // consumers get a machine-readable listing of all 34 ISA standards in
14168        // audit/isa_mappings.json alongside the PCAOB cross-reference file.
14169        {
14170            use datasynth_standards::audit::isa_reference::IsaStandard;
14171            snapshot.isa_mappings = IsaStandard::standard_entries();
14172            debug!(
14173                "ISA standard entries generated: {} standards",
14174                snapshot.isa_mappings.len()
14175            );
14176        }
14177
14178        // Populate RelatedPartyTransaction.journal_entry_id by matching on date and company.
14179        // For each RPT, find the chronologically closest JE for the engagement's entity.
14180        {
14181            let engagement_by_id: std::collections::HashMap<String, &str> = snapshot
14182                .engagements
14183                .iter()
14184                .map(|e| (e.engagement_id.to_string(), e.client_entity_id.as_str()))
14185                .collect();
14186
14187            for rpt in &mut snapshot.related_party_transactions {
14188                if rpt.journal_entry_id.is_some() {
14189                    continue; // already set
14190                }
14191                let entity = engagement_by_id
14192                    .get(&rpt.engagement_id.to_string())
14193                    .copied()
14194                    .unwrap_or("");
14195
14196                // Find closest JE by date in the entity's company
14197                let best_je = entries
14198                    .iter()
14199                    .filter(|je| je.header.company_code == entity)
14200                    .min_by_key(|je| {
14201                        (je.header.posting_date - rpt.transaction_date)
14202                            .num_days()
14203                            .abs()
14204                    });
14205
14206                if let Some(je) = best_je {
14207                    rpt.journal_entry_id = Some(je.header.document_id.to_string());
14208                }
14209            }
14210
14211            let linked = snapshot
14212                .related_party_transactions
14213                .iter()
14214                .filter(|t| t.journal_entry_id.is_some())
14215                .count();
14216            debug!(
14217                "Linked {}/{} related party transactions to journal entries",
14218                linked,
14219                snapshot.related_party_transactions.len()
14220            );
14221        }
14222
14223        // --- ISA 700 / 701 / 705 / 706: audit opinion + key audit matters.
14224        // One opinion per engagement, derived from that engagement's findings,
14225        // going-concern assessment, and any component-auditor reports. Fills
14226        // `audit_opinions` + a flattened `key_audit_matters` for downstream
14227        // export.
14228        if !snapshot.engagements.is_empty() {
14229            use datasynth_generators::audit_opinion_generator::{
14230                AuditOpinionGenerator, AuditOpinionInput,
14231            };
14232
14233            let mut opinion_gen = AuditOpinionGenerator::new(self.seed.wrapping_add(0x700));
14234            let inputs: Vec<AuditOpinionInput> = snapshot
14235                .engagements
14236                .iter()
14237                .map(|eng| {
14238                    let findings = snapshot
14239                        .findings
14240                        .iter()
14241                        .filter(|f| f.engagement_id == eng.engagement_id)
14242                        .cloned()
14243                        .collect();
14244                    let going_concern = snapshot
14245                        .going_concern_assessments
14246                        .iter()
14247                        .find(|gc| gc.entity_code == eng.client_entity_id)
14248                        .cloned();
14249                    // ComponentAuditorReport doesn't carry an engagement id, but
14250                    // component scope is keyed by `entity_code`, so filter on that.
14251                    let component_reports = snapshot
14252                        .component_reports
14253                        .iter()
14254                        .filter(|r| r.entity_code == eng.client_entity_id)
14255                        .cloned()
14256                        .collect();
14257
14258                    AuditOpinionInput {
14259                        entity_code: eng.client_entity_id.clone(),
14260                        entity_name: eng.client_name.clone(),
14261                        engagement_id: eng.engagement_id,
14262                        period_end: eng.period_end_date,
14263                        findings,
14264                        going_concern,
14265                        component_reports,
14266                        is_us_listed: matches!(
14267                            eng.engagement_type,
14268                            datasynth_core::audit::EngagementType::IntegratedAudit
14269                                | datasynth_core::audit::EngagementType::Sox404
14270                        ),
14271                        auditor_name: "DataSynth Audit LLP".to_string(),
14272                        engagement_partner: "Engagement Partner".to_string(),
14273                    }
14274                })
14275                .collect();
14276
14277            let generated = opinion_gen.generate_batch(&inputs);
14278            for g in generated {
14279                snapshot.key_audit_matters.extend(g.key_audit_matters);
14280                snapshot.audit_opinions.push(g.opinion);
14281            }
14282            debug!(
14283                "Generated {} audit opinions with {} key audit matters",
14284                snapshot.audit_opinions.len(),
14285                snapshot.key_audit_matters.len()
14286            );
14287        }
14288
14289        Ok(snapshot)
14290    }
14291
14292    /// Generate audit data using the FSM engine (called when `audit.fsm.enabled: true`).
14293    ///
14294    /// Loads the configured blueprint and overlay, builds an [`EngagementContext`]
14295    /// from the current orchestrator state, runs the FSM engine, and maps the
14296    /// resulting [`ArtifactBag`] into an [`AuditSnapshot`].  The FSM event trail
14297    /// is stored in [`AuditSnapshot::fsm_event_trail`] for downstream export.
14298    fn generate_audit_data_with_fsm(
14299        &mut self,
14300        entries: &[JournalEntry],
14301    ) -> SynthResult<AuditSnapshot> {
14302        use datasynth_audit_fsm::{
14303            context::EngagementContext,
14304            engine::AuditFsmEngine,
14305            loader::{load_overlay, BlueprintWithPreconditions, BuiltinOverlay, OverlaySource},
14306        };
14307        use rand::SeedableRng;
14308        use rand_chacha::ChaCha8Rng;
14309
14310        info!("Audit FSM: generating audit data via FSM engine");
14311
14312        let fsm_config = self
14313            .config
14314            .audit
14315            .fsm
14316            .as_ref()
14317            .expect("FSM config must be present when FSM is enabled");
14318
14319        // 1. Load blueprint from config string.
14320        let bwp = match fsm_config.blueprint.as_str() {
14321            "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
14322            "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
14323            _ => {
14324                warn!(
14325                    "Unknown FSM blueprint '{}', falling back to builtin:fsa",
14326                    fsm_config.blueprint
14327                );
14328                BlueprintWithPreconditions::load_builtin_fsa()
14329            }
14330        }
14331        .map_err(|e| SynthError::generation(format!("FSM blueprint load failed: {e}")))?;
14332
14333        // 2. Load overlay from config string.
14334        let overlay = match fsm_config.overlay.as_str() {
14335            "builtin:default" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default)),
14336            "builtin:thorough" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough)),
14337            "builtin:rushed" => load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed)),
14338            _ => {
14339                warn!(
14340                    "Unknown FSM overlay '{}', falling back to builtin:default",
14341                    fsm_config.overlay
14342                );
14343                load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
14344            }
14345        }
14346        .map_err(|e| SynthError::generation(format!("FSM overlay load failed: {e}")))?;
14347
14348        // 3. Build EngagementContext from orchestrator state.
14349        let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
14350            .map_err(|e| SynthError::config(format!("Invalid start_date: {e}")))?;
14351        let period_end = start_date + chrono::Months::new(self.config.global.period_months);
14352
14353        // Determine the engagement entity early so we can filter JEs.
14354        let company = self.config.companies.first();
14355        let company_code = company
14356            .map(|c| c.code.clone())
14357            .unwrap_or_else(|| "UNKNOWN".to_string());
14358        let company_name = company
14359            .map(|c| c.name.clone())
14360            .unwrap_or_else(|| "Unknown Company".to_string());
14361        let currency = company
14362            .map(|c| c.currency.clone())
14363            .unwrap_or_else(|| "USD".to_string());
14364
14365        // Filter JEs to the engagement entity for single-company coherence.
14366        let entity_entries: Vec<_> = entries
14367            .iter()
14368            .filter(|e| company_code == "UNKNOWN" || e.header.company_code == company_code)
14369            .cloned()
14370            .collect();
14371        let entries = &entity_entries; // Shadow the parameter for remaining usage
14372
14373        // Financial aggregates from journal entries.
14374        let total_revenue: rust_decimal::Decimal = entries
14375            .iter()
14376            .flat_map(|e| e.lines.iter())
14377            .filter(|l| l.account_code.starts_with('4'))
14378            .map(|l| l.credit_amount - l.debit_amount)
14379            .sum();
14380
14381        let total_assets: rust_decimal::Decimal = entries
14382            .iter()
14383            .flat_map(|e| e.lines.iter())
14384            .filter(|l| l.account_code.starts_with('1'))
14385            .map(|l| l.debit_amount - l.credit_amount)
14386            .sum();
14387
14388        let total_expenses: rust_decimal::Decimal = entries
14389            .iter()
14390            .flat_map(|e| e.lines.iter())
14391            .filter(|l| l.account_code.starts_with('5') || l.account_code.starts_with('6'))
14392            .map(|l| l.debit_amount)
14393            .sum();
14394
14395        let equity: rust_decimal::Decimal = entries
14396            .iter()
14397            .flat_map(|e| e.lines.iter())
14398            .filter(|l| l.account_code.starts_with('3'))
14399            .map(|l| l.credit_amount - l.debit_amount)
14400            .sum();
14401
14402        let total_debt: rust_decimal::Decimal = entries
14403            .iter()
14404            .flat_map(|e| e.lines.iter())
14405            .filter(|l| l.account_code.starts_with('2'))
14406            .map(|l| l.credit_amount - l.debit_amount)
14407            .sum();
14408
14409        let pretax_income = total_revenue - total_expenses;
14410
14411        let cogs: rust_decimal::Decimal = entries
14412            .iter()
14413            .flat_map(|e| e.lines.iter())
14414            .filter(|l| l.account_code.starts_with('5'))
14415            .map(|l| l.debit_amount)
14416            .sum();
14417        let gross_profit = total_revenue - cogs;
14418
14419        let current_assets: rust_decimal::Decimal = entries
14420            .iter()
14421            .flat_map(|e| e.lines.iter())
14422            .filter(|l| {
14423                l.account_code.starts_with("10")
14424                    || l.account_code.starts_with("11")
14425                    || l.account_code.starts_with("12")
14426                    || l.account_code.starts_with("13")
14427            })
14428            .map(|l| l.debit_amount - l.credit_amount)
14429            .sum();
14430        let current_liabilities: rust_decimal::Decimal = entries
14431            .iter()
14432            .flat_map(|e| e.lines.iter())
14433            .filter(|l| {
14434                l.account_code.starts_with("20")
14435                    || l.account_code.starts_with("21")
14436                    || l.account_code.starts_with("22")
14437            })
14438            .map(|l| l.credit_amount - l.debit_amount)
14439            .sum();
14440        let working_capital = current_assets - current_liabilities;
14441
14442        let depreciation: rust_decimal::Decimal = entries
14443            .iter()
14444            .flat_map(|e| e.lines.iter())
14445            .filter(|l| l.account_code.starts_with("60"))
14446            .map(|l| l.debit_amount)
14447            .sum();
14448        let operating_cash_flow = pretax_income + depreciation;
14449
14450        // GL accounts for reference data.
14451        let accounts: Vec<String> = self
14452            .coa
14453            .as_ref()
14454            .map(|coa| {
14455                coa.get_postable_accounts()
14456                    .iter()
14457                    .map(|acc| acc.account_code().to_string())
14458                    .collect()
14459            })
14460            .unwrap_or_default();
14461
14462        // Team member IDs and display names from master data.
14463        let team_member_ids: Vec<String> = self
14464            .master_data
14465            .employees
14466            .iter()
14467            .take(8) // Cap team size
14468            .map(|e| e.employee_id.clone())
14469            .collect();
14470        let team_member_pairs: Vec<(String, String)> = self
14471            .master_data
14472            .employees
14473            .iter()
14474            .take(8)
14475            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
14476            .collect();
14477
14478        let vendor_names: Vec<String> = self
14479            .master_data
14480            .vendors
14481            .iter()
14482            .map(|v| v.name.clone())
14483            .collect();
14484        let customer_names: Vec<String> = self
14485            .master_data
14486            .customers
14487            .iter()
14488            .map(|c| c.name.clone())
14489            .collect();
14490
14491        let entity_codes: Vec<String> = self
14492            .config
14493            .companies
14494            .iter()
14495            .map(|c| c.code.clone())
14496            .collect();
14497
14498        // Journal entry IDs for evidence tracing (sample up to 50).
14499        let journal_entry_ids: Vec<String> = entries
14500            .iter()
14501            .take(50)
14502            .map(|e| e.header.document_id.to_string())
14503            .collect();
14504
14505        // Account balances for risk weighting (aggregate debit - credit per account).
14506        let mut account_balances = std::collections::HashMap::<String, f64>::new();
14507        for entry in entries {
14508            for line in &entry.lines {
14509                let debit_f64: f64 = line.debit_amount.to_string().parse().unwrap_or(0.0);
14510                let credit_f64: f64 = line.credit_amount.to_string().parse().unwrap_or(0.0);
14511                *account_balances
14512                    .entry(line.account_code.clone())
14513                    .or_insert(0.0) += debit_f64 - credit_f64;
14514            }
14515        }
14516
14517        // Internal control IDs and anomaly refs are populated by the
14518        // caller when available; here we default to empty because the
14519        // orchestrator state may not have generated controls/anomalies
14520        // yet at this point in the pipeline.
14521        let control_ids: Vec<String> = Vec::new();
14522        let anomaly_refs: Vec<String> = Vec::new();
14523
14524        let mut context = EngagementContext {
14525            company_code,
14526            company_name,
14527            fiscal_year: start_date.year(),
14528            currency,
14529            total_revenue,
14530            total_assets,
14531            engagement_start: start_date,
14532            report_date: period_end,
14533            pretax_income,
14534            equity,
14535            gross_profit,
14536            working_capital,
14537            operating_cash_flow,
14538            total_debt,
14539            team_member_ids,
14540            team_member_pairs,
14541            accounts,
14542            vendor_names,
14543            customer_names,
14544            journal_entry_ids,
14545            account_balances,
14546            control_ids,
14547            anomaly_refs,
14548            journal_entries: entries.to_vec(),
14549            is_us_listed: false,
14550            entity_codes,
14551            auditor_firm_name: "DataSynth Audit LLP".into(),
14552            accounting_framework: self
14553                .config
14554                .accounting_standards
14555                .framework
14556                .map(|f| match f {
14557                    datasynth_config::schema::AccountingFrameworkConfig::UsGaap => "US GAAP",
14558                    datasynth_config::schema::AccountingFrameworkConfig::Ifrs => "IFRS",
14559                    datasynth_config::schema::AccountingFrameworkConfig::FrenchGaap => {
14560                        "French GAAP"
14561                    }
14562                    datasynth_config::schema::AccountingFrameworkConfig::GermanGaap => {
14563                        "German GAAP"
14564                    }
14565                    datasynth_config::schema::AccountingFrameworkConfig::DualReporting => {
14566                        "Dual Reporting"
14567                    }
14568                })
14569                .unwrap_or("IFRS")
14570                .into(),
14571        };
14572
14573        // 4. Create and run the FSM engine.
14574        let seed = fsm_config.seed.unwrap_or(self.seed + 8000);
14575        let rng = ChaCha8Rng::seed_from_u64(seed);
14576        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
14577
14578        let mut result = engine
14579            .run_engagement(&context)
14580            .map_err(|e| SynthError::generation(format!("FSM engine failed: {e}")))?;
14581
14582        info!(
14583            "Audit FSM: engine produced {} events, {} artifacts, {} anomalies, \
14584             {} phases completed, duration {:.1}h",
14585            result.event_log.len(),
14586            result.artifacts.total_artifacts(),
14587            result.anomalies.len(),
14588            result.phases_completed.len(),
14589            result.total_duration_hours,
14590        );
14591
14592        // 4b. Populate financial data in the artifact bag for downstream consumers.
14593        let tb_entity = context.company_code.clone();
14594        let tb_fy = context.fiscal_year;
14595        result.artifacts.journal_entries = std::mem::take(&mut context.journal_entries);
14596        result.artifacts.trial_balance_entries = compute_trial_balance_entries(
14597            entries,
14598            &tb_entity,
14599            tb_fy,
14600            self.coa.as_ref().map(|c| c.as_ref()),
14601        );
14602
14603        // 5. Map ArtifactBag fields to AuditSnapshot.
14604        let bag = result.artifacts;
14605        let mut snapshot = AuditSnapshot {
14606            engagements: bag.engagements,
14607            engagement_letters: bag.engagement_letters,
14608            materiality_calculations: bag.materiality_calculations,
14609            risk_assessments: bag.risk_assessments,
14610            combined_risk_assessments: bag.combined_risk_assessments,
14611            workpapers: bag.workpapers,
14612            evidence: bag.evidence,
14613            findings: bag.findings,
14614            judgments: bag.judgments,
14615            sampling_plans: bag.sampling_plans,
14616            sampled_items: bag.sampled_items,
14617            analytical_results: bag.analytical_results,
14618            going_concern_assessments: bag.going_concern_assessments,
14619            subsequent_events: bag.subsequent_events,
14620            audit_opinions: bag.audit_opinions,
14621            key_audit_matters: bag.key_audit_matters,
14622            procedure_steps: bag.procedure_steps,
14623            samples: bag.samples,
14624            confirmations: bag.confirmations,
14625            confirmation_responses: bag.confirmation_responses,
14626            // Store the event trail for downstream export.
14627            fsm_event_trail: Some(result.event_log),
14628            // Fields not produced by the FSM engine remain at their defaults.
14629            ..Default::default()
14630        };
14631
14632        // 6. Add static reference data (same as legacy path).
14633        {
14634            use datasynth_standards::audit::pcaob::PcaobIsaMapping;
14635            snapshot.isa_pcaob_mappings = PcaobIsaMapping::standard_mappings();
14636        }
14637        {
14638            use datasynth_standards::audit::isa_reference::IsaStandard;
14639            snapshot.isa_mappings = IsaStandard::standard_entries();
14640        }
14641
14642        info!(
14643            "Audit FSM: snapshot contains {} engagements, {} workpapers, {} evidence, \
14644             {} risk assessments, {} findings, {} materiality calcs",
14645            snapshot.engagements.len(),
14646            snapshot.workpapers.len(),
14647            snapshot.evidence.len(),
14648            snapshot.risk_assessments.len(),
14649            snapshot.findings.len(),
14650            snapshot.materiality_calculations.len(),
14651        );
14652
14653        Ok(snapshot)
14654    }
14655
14656    /// Export journal entries as graph data for ML training and network reconstruction.
14657    ///
14658    /// Builds a transaction graph where:
14659    /// - Nodes are GL accounts
14660    /// - Edges are money flows from credit to debit accounts
14661    /// - Edge attributes include amount, date, business process, anomaly flags
14662    fn export_graphs(
14663        &mut self,
14664        entries: &[JournalEntry],
14665        _coa: &Arc<ChartOfAccounts>,
14666        stats: &mut EnhancedGenerationStatistics,
14667    ) -> SynthResult<GraphExportSnapshot> {
14668        let pb = self.create_progress_bar(100, "Exporting Graphs");
14669
14670        let mut snapshot = GraphExportSnapshot::default();
14671
14672        // Get output directory
14673        let output_dir = self
14674            .output_path
14675            .clone()
14676            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14677        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14678
14679        // Process each graph type configuration
14680        for graph_type in &self.config.graph_export.graph_types {
14681            if let Some(pb) = &pb {
14682                pb.inc(10);
14683            }
14684
14685            // Build transaction graph
14686            let graph_config = TransactionGraphConfig {
14687                include_vendors: false,
14688                include_customers: false,
14689                create_debit_credit_edges: true,
14690                include_document_nodes: graph_type.include_document_nodes,
14691                min_edge_weight: graph_type.min_edge_weight,
14692                aggregate_parallel_edges: graph_type.aggregate_edges,
14693                framework: None,
14694            };
14695
14696            let mut builder = TransactionGraphBuilder::new(graph_config);
14697            builder.add_journal_entries(entries);
14698            let graph = builder.build();
14699
14700            // Update stats
14701            stats.graph_node_count += graph.node_count();
14702            stats.graph_edge_count += graph.edge_count();
14703
14704            if let Some(pb) = &pb {
14705                pb.inc(40);
14706            }
14707
14708            // Export to each configured format
14709            for format in &self.config.graph_export.formats {
14710                let format_dir = graph_dir.join(&graph_type.name).join(format_name(*format));
14711
14712                // Create output directory
14713                if let Err(e) = std::fs::create_dir_all(&format_dir) {
14714                    warn!("Failed to create graph output directory: {}", e);
14715                    continue;
14716                }
14717
14718                match format {
14719                    datasynth_config::schema::GraphExportFormat::PytorchGeometric => {
14720                        let pyg_config = PyGExportConfig {
14721                            common: datasynth_graph::CommonExportConfig {
14722                                export_node_features: true,
14723                                export_edge_features: true,
14724                                export_node_labels: true,
14725                                export_edge_labels: true,
14726                                export_masks: true,
14727                                train_ratio: self.config.graph_export.train_ratio,
14728                                val_ratio: self.config.graph_export.validation_ratio,
14729                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14730                            },
14731                            one_hot_categoricals: false,
14732                        };
14733
14734                        let exporter = PyGExporter::new(pyg_config);
14735                        match exporter.export(&graph, &format_dir) {
14736                            Ok(metadata) => {
14737                                snapshot.exports.insert(
14738                                    format!("{}_{}", graph_type.name, "pytorch_geometric"),
14739                                    GraphExportInfo {
14740                                        name: graph_type.name.clone(),
14741                                        format: "pytorch_geometric".to_string(),
14742                                        output_path: format_dir.clone(),
14743                                        node_count: metadata.num_nodes,
14744                                        edge_count: metadata.num_edges,
14745                                    },
14746                                );
14747                                snapshot.graph_count += 1;
14748                            }
14749                            Err(e) => {
14750                                warn!("Failed to export PyTorch Geometric graph: {}", e);
14751                            }
14752                        }
14753                    }
14754                    datasynth_config::schema::GraphExportFormat::Neo4j => {
14755                        use datasynth_graph::{Neo4jExportConfig, Neo4jExporter};
14756
14757                        let neo4j_config = Neo4jExportConfig {
14758                            export_node_properties: true,
14759                            export_edge_properties: true,
14760                            export_features: true,
14761                            generate_cypher: true,
14762                            generate_admin_import: true,
14763                            database_name: "synth".to_string(),
14764                            cypher_batch_size: 1000,
14765                        };
14766
14767                        let exporter = Neo4jExporter::new(neo4j_config);
14768                        match exporter.export(&graph, &format_dir) {
14769                            Ok(metadata) => {
14770                                snapshot.exports.insert(
14771                                    format!("{}_{}", graph_type.name, "neo4j"),
14772                                    GraphExportInfo {
14773                                        name: graph_type.name.clone(),
14774                                        format: "neo4j".to_string(),
14775                                        output_path: format_dir.clone(),
14776                                        node_count: metadata.num_nodes,
14777                                        edge_count: metadata.num_edges,
14778                                    },
14779                                );
14780                                snapshot.graph_count += 1;
14781                            }
14782                            Err(e) => {
14783                                warn!("Failed to export Neo4j graph: {}", e);
14784                            }
14785                        }
14786                    }
14787                    datasynth_config::schema::GraphExportFormat::Dgl => {
14788                        use datasynth_graph::{DGLExportConfig, DGLExporter};
14789
14790                        let dgl_config = DGLExportConfig {
14791                            common: datasynth_graph::CommonExportConfig {
14792                                export_node_features: true,
14793                                export_edge_features: true,
14794                                export_node_labels: true,
14795                                export_edge_labels: true,
14796                                export_masks: true,
14797                                train_ratio: self.config.graph_export.train_ratio,
14798                                val_ratio: self.config.graph_export.validation_ratio,
14799                                seed: self.config.graph_export.split_seed.unwrap_or(self.seed),
14800                            },
14801                            heterogeneous: self.config.graph_export.dgl.heterogeneous,
14802                            include_pickle_script: true, // DGL ecosystem standard helper
14803                        };
14804
14805                        let exporter = DGLExporter::new(dgl_config);
14806                        match exporter.export(&graph, &format_dir) {
14807                            Ok(metadata) => {
14808                                snapshot.exports.insert(
14809                                    format!("{}_{}", graph_type.name, "dgl"),
14810                                    GraphExportInfo {
14811                                        name: graph_type.name.clone(),
14812                                        format: "dgl".to_string(),
14813                                        output_path: format_dir.clone(),
14814                                        node_count: metadata.common.num_nodes,
14815                                        edge_count: metadata.common.num_edges,
14816                                    },
14817                                );
14818                                snapshot.graph_count += 1;
14819                            }
14820                            Err(e) => {
14821                                warn!("Failed to export DGL graph: {}", e);
14822                            }
14823                        }
14824                    }
14825                    datasynth_config::schema::GraphExportFormat::RustGraph => {
14826                        use datasynth_graph::{
14827                            RustGraphExportConfig, RustGraphExporter, RustGraphOutputFormat,
14828                        };
14829
14830                        let rustgraph_config = RustGraphExportConfig {
14831                            include_features: true,
14832                            include_temporal: true,
14833                            include_labels: true,
14834                            source_name: "datasynth".to_string(),
14835                            batch_id: None,
14836                            output_format: RustGraphOutputFormat::JsonLines,
14837                            export_node_properties: true,
14838                            export_edge_properties: true,
14839                            pretty_print: false,
14840                        };
14841
14842                        let exporter = RustGraphExporter::new(rustgraph_config);
14843                        match exporter.export(&graph, &format_dir) {
14844                            Ok(metadata) => {
14845                                snapshot.exports.insert(
14846                                    format!("{}_{}", graph_type.name, "rustgraph"),
14847                                    GraphExportInfo {
14848                                        name: graph_type.name.clone(),
14849                                        format: "rustgraph".to_string(),
14850                                        output_path: format_dir.clone(),
14851                                        node_count: metadata.num_nodes,
14852                                        edge_count: metadata.num_edges,
14853                                    },
14854                                );
14855                                snapshot.graph_count += 1;
14856                            }
14857                            Err(e) => {
14858                                warn!("Failed to export RustGraph: {}", e);
14859                            }
14860                        }
14861                    }
14862                    datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => {
14863                        // Hypergraph export is handled separately in Phase 10b
14864                        debug!("RustGraphHypergraph format is handled in Phase 10b (hypergraph export)");
14865                    }
14866                }
14867            }
14868
14869            if let Some(pb) = &pb {
14870                pb.inc(40);
14871            }
14872        }
14873
14874        stats.graph_export_count = snapshot.graph_count;
14875        snapshot.exported = snapshot.graph_count > 0;
14876
14877        if let Some(pb) = pb {
14878            pb.finish_with_message(format!(
14879                "Graphs exported: {} graphs ({} nodes, {} edges)",
14880                snapshot.graph_count, stats.graph_node_count, stats.graph_edge_count
14881            ));
14882        }
14883
14884        Ok(snapshot)
14885    }
14886
14887    /// Build additional graph types (banking, approval, entity) when relevant data
14888    /// is available. These run as a late phase because the data they need (banking
14889    /// snapshot, intercompany snapshot) is only generated after the main graph
14890    /// export phase.
14891    fn build_additional_graphs(
14892        &self,
14893        banking: &BankingSnapshot,
14894        intercompany: &IntercompanySnapshot,
14895        entries: &[JournalEntry],
14896        stats: &mut EnhancedGenerationStatistics,
14897    ) {
14898        let output_dir = self
14899            .output_path
14900            .clone()
14901            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
14902        let graph_dir = output_dir.join(&self.config.graph_export.output_subdirectory);
14903
14904        // Banking graph: build when banking customers and transactions exist
14905        if !banking.customers.is_empty() && !banking.transactions.is_empty() {
14906            info!("Phase 10c: Building banking network graph");
14907            let config = BankingGraphConfig::default();
14908            let mut builder = BankingGraphBuilder::new(config);
14909            builder.add_customers(&banking.customers);
14910            builder.add_accounts(&banking.accounts, &banking.customers);
14911            builder.add_transactions(&banking.transactions);
14912            let graph = builder.build();
14913
14914            let node_count = graph.node_count();
14915            let edge_count = graph.edge_count();
14916            stats.graph_node_count += node_count;
14917            stats.graph_edge_count += edge_count;
14918
14919            // Export as PyG if configured
14920            for format in &self.config.graph_export.formats {
14921                if matches!(
14922                    format,
14923                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14924                ) {
14925                    let format_dir = graph_dir.join("banking_network").join("pytorch_geometric");
14926                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
14927                        warn!("Failed to create banking graph output dir: {}", e);
14928                        continue;
14929                    }
14930                    let pyg_config = PyGExportConfig::default();
14931                    let exporter = PyGExporter::new(pyg_config);
14932                    if let Err(e) = exporter.export(&graph, &format_dir) {
14933                        warn!("Failed to export banking graph as PyG: {}", e);
14934                    } else {
14935                        info!(
14936                            "Banking network graph exported: {} nodes, {} edges",
14937                            node_count, edge_count
14938                        );
14939                    }
14940                }
14941            }
14942        }
14943
14944        // Approval graph: build from journal entry approval workflows
14945        let approval_entries: Vec<_> = entries
14946            .iter()
14947            .filter(|je| je.header.approval_workflow.is_some())
14948            .collect();
14949
14950        if !approval_entries.is_empty() {
14951            info!(
14952                "Phase 10c: Building approval network graph ({} entries with approvals)",
14953                approval_entries.len()
14954            );
14955            let config = ApprovalGraphConfig::default();
14956            let mut builder = ApprovalGraphBuilder::new(config);
14957
14958            for je in &approval_entries {
14959                if let Some(ref wf) = je.header.approval_workflow {
14960                    for action in &wf.actions {
14961                        let record = datasynth_core::models::ApprovalRecord {
14962                            approval_id: format!(
14963                                "APR-{}-{}",
14964                                je.header.document_id, action.approval_level
14965                            ),
14966                            document_number: je.header.document_id.to_string(),
14967                            document_type: "JE".to_string(),
14968                            company_code: je.company_code().to_string(),
14969                            requester_id: wf.preparer_id.clone(),
14970                            requester_name: Some(wf.preparer_name.clone()),
14971                            approver_id: action.actor_id.clone(),
14972                            approver_name: action.actor_name.clone(),
14973                            approval_date: je.posting_date(),
14974                            action: format!("{:?}", action.action),
14975                            amount: wf.amount,
14976                            approval_limit: None,
14977                            comments: action.comments.clone(),
14978                            delegation_from: None,
14979                            is_auto_approved: false,
14980                        };
14981                        builder.add_approval(&record);
14982                    }
14983                }
14984            }
14985
14986            let graph = builder.build();
14987            let node_count = graph.node_count();
14988            let edge_count = graph.edge_count();
14989            stats.graph_node_count += node_count;
14990            stats.graph_edge_count += edge_count;
14991
14992            // Export as PyG if configured
14993            for format in &self.config.graph_export.formats {
14994                if matches!(
14995                    format,
14996                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
14997                ) {
14998                    let format_dir = graph_dir.join("approval_network").join("pytorch_geometric");
14999                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
15000                        warn!("Failed to create approval graph output dir: {}", e);
15001                        continue;
15002                    }
15003                    let pyg_config = PyGExportConfig::default();
15004                    let exporter = PyGExporter::new(pyg_config);
15005                    if let Err(e) = exporter.export(&graph, &format_dir) {
15006                        warn!("Failed to export approval graph as PyG: {}", e);
15007                    } else {
15008                        info!(
15009                            "Approval network graph exported: {} nodes, {} edges",
15010                            node_count, edge_count
15011                        );
15012                    }
15013                }
15014            }
15015        }
15016
15017        // Entity graph: map CompanyConfig → Company and wire intercompany relationships
15018        if self.config.companies.len() >= 2 {
15019            info!(
15020                "Phase 10c: Building entity relationship graph ({} companies)",
15021                self.config.companies.len()
15022            );
15023
15024            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15025                .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).expect("valid date"));
15026
15027            // Map CompanyConfig → Company objects
15028            let parent_code = &self.config.companies[0].code;
15029            let mut companies: Vec<datasynth_core::models::Company> =
15030                Vec::with_capacity(self.config.companies.len());
15031
15032            // First company is the parent
15033            let first = &self.config.companies[0];
15034            companies.push(datasynth_core::models::Company::parent(
15035                &first.code,
15036                &first.name,
15037                &first.country,
15038                &first.currency,
15039            ));
15040
15041            // Remaining companies are subsidiaries (100% owned by parent)
15042            for cc in self.config.companies.iter().skip(1) {
15043                companies.push(datasynth_core::models::Company::subsidiary(
15044                    &cc.code,
15045                    &cc.name,
15046                    &cc.country,
15047                    &cc.currency,
15048                    parent_code,
15049                    rust_decimal::Decimal::from(100),
15050                ));
15051            }
15052
15053            // Build IntercompanyRelationship records (same logic as phase_intercompany)
15054            let relationships: Vec<datasynth_core::models::intercompany::IntercompanyRelationship> =
15055                self.config
15056                    .companies
15057                    .iter()
15058                    .skip(1)
15059                    .enumerate()
15060                    .map(|(i, cc)| {
15061                        let mut rel =
15062                            datasynth_core::models::intercompany::IntercompanyRelationship::new(
15063                                format!("REL{:03}", i + 1),
15064                                parent_code.clone(),
15065                                cc.code.clone(),
15066                                rust_decimal::Decimal::from(100),
15067                                start_date,
15068                            );
15069                        rel.functional_currency = cc.currency.clone();
15070                        rel
15071                    })
15072                    .collect();
15073
15074            let mut builder = EntityGraphBuilder::new(EntityGraphConfig::default());
15075            builder.add_companies(&companies);
15076            builder.add_ownership_relationships(&relationships);
15077
15078            // Thread IC matched-pair transaction edges into the entity graph
15079            for pair in &intercompany.matched_pairs {
15080                builder.add_intercompany_edge(
15081                    &pair.seller_company,
15082                    &pair.buyer_company,
15083                    pair.amount,
15084                    &format!("{:?}", pair.transaction_type),
15085                );
15086            }
15087
15088            let graph = builder.build();
15089            let node_count = graph.node_count();
15090            let edge_count = graph.edge_count();
15091            stats.graph_node_count += node_count;
15092            stats.graph_edge_count += edge_count;
15093
15094            // Export as PyG if configured
15095            for format in &self.config.graph_export.formats {
15096                if matches!(
15097                    format,
15098                    datasynth_config::schema::GraphExportFormat::PytorchGeometric
15099                ) {
15100                    let format_dir = graph_dir.join("entity_network").join("pytorch_geometric");
15101                    if let Err(e) = std::fs::create_dir_all(&format_dir) {
15102                        warn!("Failed to create entity graph output dir: {}", e);
15103                        continue;
15104                    }
15105                    let pyg_config = PyGExportConfig::default();
15106                    let exporter = PyGExporter::new(pyg_config);
15107                    if let Err(e) = exporter.export(&graph, &format_dir) {
15108                        warn!("Failed to export entity graph as PyG: {}", e);
15109                    } else {
15110                        info!(
15111                            "Entity relationship graph exported: {} nodes, {} edges",
15112                            node_count, edge_count
15113                        );
15114                    }
15115                }
15116            }
15117        } else {
15118            debug!(
15119                "EntityGraphBuilder: skipped (requires 2+ companies, found {})",
15120                self.config.companies.len()
15121            );
15122        }
15123    }
15124
15125    /// Export a multi-layer hypergraph for RustGraph integration.
15126    ///
15127    /// Builds a 3-layer hypergraph:
15128    /// - Layer 1: Governance & Controls (COSO, internal controls, master data)
15129    /// - Layer 2: Process Events (all process family document flows + OCPM events)
15130    /// - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
15131    #[allow(clippy::too_many_arguments)]
15132    fn export_hypergraph(
15133        &self,
15134        coa: &Arc<ChartOfAccounts>,
15135        entries: &[JournalEntry],
15136        document_flows: &DocumentFlowSnapshot,
15137        sourcing: &SourcingSnapshot,
15138        hr: &HrSnapshot,
15139        manufacturing: &ManufacturingSnapshot,
15140        banking: &BankingSnapshot,
15141        audit: &AuditSnapshot,
15142        financial_reporting: &FinancialReportingSnapshot,
15143        ocpm: &OcpmSnapshot,
15144        compliance: &ComplianceRegulationsSnapshot,
15145        stats: &mut EnhancedGenerationStatistics,
15146    ) -> SynthResult<HypergraphExportInfo> {
15147        use datasynth_graph::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
15148        use datasynth_graph::exporters::hypergraph::{HypergraphExportConfig, HypergraphExporter};
15149        use datasynth_graph::exporters::unified::{RustGraphUnifiedExporter, UnifiedExportConfig};
15150        use datasynth_graph::models::hypergraph::AggregationStrategy;
15151
15152        let hg_settings = &self.config.graph_export.hypergraph;
15153
15154        // Parse aggregation strategy from config string
15155        let aggregation_strategy = match hg_settings.aggregation_strategy.as_str() {
15156            "truncate" => AggregationStrategy::Truncate,
15157            "pool_by_counterparty" => AggregationStrategy::PoolByCounterparty,
15158            "pool_by_time_period" => AggregationStrategy::PoolByTimePeriod,
15159            "importance_sample" => AggregationStrategy::ImportanceSample,
15160            _ => AggregationStrategy::PoolByCounterparty,
15161        };
15162
15163        let builder_config = HypergraphConfig {
15164            max_nodes: hg_settings.max_nodes,
15165            aggregation_strategy,
15166            include_coso: hg_settings.governance_layer.include_coso,
15167            include_controls: hg_settings.governance_layer.include_controls,
15168            include_sox: hg_settings.governance_layer.include_sox,
15169            include_vendors: hg_settings.governance_layer.include_vendors,
15170            include_customers: hg_settings.governance_layer.include_customers,
15171            include_employees: hg_settings.governance_layer.include_employees,
15172            include_p2p: hg_settings.process_layer.include_p2p,
15173            include_o2c: hg_settings.process_layer.include_o2c,
15174            include_s2c: hg_settings.process_layer.include_s2c,
15175            include_h2r: hg_settings.process_layer.include_h2r,
15176            include_mfg: hg_settings.process_layer.include_mfg,
15177            include_bank: hg_settings.process_layer.include_bank,
15178            include_audit: hg_settings.process_layer.include_audit,
15179            include_r2r: hg_settings.process_layer.include_r2r,
15180            events_as_hyperedges: hg_settings.process_layer.events_as_hyperedges,
15181            docs_per_counterparty_threshold: hg_settings
15182                .process_layer
15183                .docs_per_counterparty_threshold,
15184            include_accounts: hg_settings.accounting_layer.include_accounts,
15185            je_as_hyperedges: hg_settings.accounting_layer.je_as_hyperedges,
15186            include_cross_layer_edges: hg_settings.cross_layer.enabled,
15187            include_compliance: self.config.compliance_regulations.enabled,
15188            include_tax: true,
15189            include_treasury: true,
15190            include_esg: true,
15191            include_project: true,
15192            include_intercompany: true,
15193            include_temporal_events: true,
15194        };
15195
15196        let mut builder = HypergraphBuilder::new(builder_config);
15197
15198        // Layer 1: Governance & Controls
15199        builder.add_coso_framework();
15200
15201        // Add controls if available (generated during JE generation)
15202        // Controls are generated per-company; we use the standard set
15203        if hg_settings.governance_layer.include_controls && self.config.internal_controls.enabled {
15204            let controls = InternalControl::standard_controls();
15205            builder.add_controls(&controls);
15206        }
15207
15208        // Add master data
15209        builder.add_vendors(&self.master_data.vendors);
15210        builder.add_customers(&self.master_data.customers);
15211        builder.add_employees(&self.master_data.employees);
15212
15213        // Layer 2: Process Events (all process families)
15214        builder.add_p2p_documents(
15215            &document_flows.purchase_orders,
15216            &document_flows.goods_receipts,
15217            &document_flows.vendor_invoices,
15218            &document_flows.payments,
15219        );
15220        builder.add_o2c_documents(
15221            &document_flows.sales_orders,
15222            &document_flows.deliveries,
15223            &document_flows.customer_invoices,
15224        );
15225        builder.add_s2c_documents(
15226            &sourcing.sourcing_projects,
15227            &sourcing.qualifications,
15228            &sourcing.rfx_events,
15229            &sourcing.bids,
15230            &sourcing.bid_evaluations,
15231            &sourcing.contracts,
15232        );
15233        builder.add_h2r_documents(&hr.payroll_runs, &hr.time_entries, &hr.expense_reports);
15234        builder.add_mfg_documents(
15235            &manufacturing.production_orders,
15236            &manufacturing.quality_inspections,
15237            &manufacturing.cycle_counts,
15238        );
15239        builder.add_bank_documents(&banking.customers, &banking.accounts, &banking.transactions);
15240        builder.add_audit_documents(
15241            &audit.engagements,
15242            &audit.workpapers,
15243            &audit.findings,
15244            &audit.evidence,
15245            &audit.risk_assessments,
15246            &audit.judgments,
15247            &audit.materiality_calculations,
15248            &audit.audit_opinions,
15249            &audit.going_concern_assessments,
15250        );
15251        builder.add_bank_recon_documents(&financial_reporting.bank_reconciliations);
15252
15253        // OCPM events as hyperedges
15254        if let Some(ref event_log) = ocpm.event_log {
15255            builder.add_ocpm_events(event_log);
15256        }
15257
15258        // Compliance regulations as cross-layer nodes
15259        if self.config.compliance_regulations.enabled
15260            && hg_settings.governance_layer.include_controls
15261        {
15262            // Reconstruct ComplianceStandard objects from the registry
15263            let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15264            let standards: Vec<datasynth_core::models::compliance::ComplianceStandard> = compliance
15265                .standard_records
15266                .iter()
15267                .filter_map(|r| {
15268                    let sid = datasynth_core::models::compliance::StandardId::parse(&r.standard_id);
15269                    registry.get(&sid).cloned()
15270                })
15271                .collect();
15272
15273            builder.add_compliance_regulations(
15274                &standards,
15275                &compliance.findings,
15276                &compliance.filings,
15277            );
15278        }
15279
15280        // Layer 3: Accounting Network
15281        builder.add_accounts(coa);
15282        builder.add_journal_entries_as_hyperedges(entries);
15283
15284        // Build the hypergraph
15285        let hypergraph = builder.build();
15286
15287        // Export
15288        let output_dir = self
15289            .output_path
15290            .clone()
15291            .unwrap_or_else(|| PathBuf::from(&self.config.output.output_directory));
15292        let hg_dir = output_dir
15293            .join(&self.config.graph_export.output_subdirectory)
15294            .join(&hg_settings.output_subdirectory);
15295
15296        // Branch on output format
15297        let (num_nodes, num_edges, num_hyperedges) = match hg_settings.output_format.as_str() {
15298            "unified" => {
15299                let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15300                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15301                    SynthError::generation(format!("Unified hypergraph export failed: {e}"))
15302                })?;
15303                (
15304                    metadata.num_nodes,
15305                    metadata.num_edges,
15306                    metadata.num_hyperedges,
15307                )
15308            }
15309            _ => {
15310                // "native" or any unrecognized format → use existing exporter
15311                let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
15312                let metadata = exporter.export(&hypergraph, &hg_dir).map_err(|e| {
15313                    SynthError::generation(format!("Hypergraph export failed: {e}"))
15314                })?;
15315                (
15316                    metadata.num_nodes,
15317                    metadata.num_edges,
15318                    metadata.num_hyperedges,
15319                )
15320            }
15321        };
15322
15323        // Stream to RustGraph ingest endpoint if configured
15324        #[cfg(feature = "streaming")]
15325        if let Some(ref target_url) = hg_settings.stream_target {
15326            use crate::stream_client::{StreamClient, StreamConfig};
15327            use std::io::Write as _;
15328
15329            let api_key = std::env::var("RUSTGRAPH_API_KEY").ok();
15330            let stream_config = StreamConfig {
15331                target_url: target_url.clone(),
15332                batch_size: hg_settings.stream_batch_size,
15333                api_key,
15334                ..StreamConfig::default()
15335            };
15336
15337            match StreamClient::new(stream_config) {
15338                Ok(mut client) => {
15339                    let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
15340                    match exporter.export_to_writer(&hypergraph, &mut client) {
15341                        Ok(_) => {
15342                            if let Err(e) = client.flush() {
15343                                warn!("Failed to flush stream client: {}", e);
15344                            } else {
15345                                info!("Streamed {} records to {}", client.total_sent(), target_url);
15346                            }
15347                        }
15348                        Err(e) => {
15349                            warn!("Streaming export failed: {}", e);
15350                        }
15351                    }
15352                }
15353                Err(e) => {
15354                    warn!("Failed to create stream client: {}", e);
15355                }
15356            }
15357        }
15358
15359        // Update stats
15360        stats.graph_node_count += num_nodes;
15361        stats.graph_edge_count += num_edges;
15362        stats.graph_export_count += 1;
15363
15364        Ok(HypergraphExportInfo {
15365            node_count: num_nodes,
15366            edge_count: num_edges,
15367            hyperedge_count: num_hyperedges,
15368            output_path: hg_dir,
15369        })
15370    }
15371
15372    /// Generate banking KYC/AML data.
15373    ///
15374    /// Creates banking customers, accounts, and transactions with AML typology injection.
15375    /// Uses the BankingOrchestrator from synth-banking crate.
15376    fn generate_banking_data(&mut self) -> SynthResult<BankingSnapshot> {
15377        let pb = self.create_progress_bar(100, "Generating Banking Data");
15378
15379        // Build the banking orchestrator from config
15380        let orchestrator = BankingOrchestratorBuilder::new()
15381            .config(self.config.banking.clone())
15382            .seed(self.seed + 9000)
15383            .country_pack(self.primary_pack().clone())
15384            .build();
15385
15386        if let Some(pb) = &pb {
15387            pb.inc(10);
15388        }
15389
15390        // Generate the banking data
15391        let result = orchestrator.generate();
15392
15393        if let Some(pb) = &pb {
15394            pb.inc(90);
15395            pb.finish_with_message(format!(
15396                "Banking: {} customers, {} transactions",
15397                result.customers.len(),
15398                result.transactions.len()
15399            ));
15400        }
15401
15402        // Cross-reference banking customers with core master data so that
15403        // banking customer names align with the enterprise customer list.
15404        // We rotate through core customers, overlaying their name and country
15405        // onto the generated banking customers where possible.
15406        let mut banking_customers = result.customers;
15407        let core_customers = &self.master_data.customers;
15408        if !core_customers.is_empty() {
15409            for (i, bc) in banking_customers.iter_mut().enumerate() {
15410                let core = &core_customers[i % core_customers.len()];
15411                bc.name = CustomerName::business(&core.name);
15412                bc.residence_country = core.country.clone();
15413                bc.enterprise_customer_id = Some(core.customer_id.clone());
15414            }
15415            debug!(
15416                "Cross-referenced {} banking customers with {} core customers",
15417                banking_customers.len(),
15418                core_customers.len()
15419            );
15420        }
15421
15422        Ok(BankingSnapshot {
15423            customers: banking_customers,
15424            accounts: result.accounts,
15425            transactions: result.transactions,
15426            transaction_labels: result.transaction_labels,
15427            customer_labels: result.customer_labels,
15428            account_labels: result.account_labels,
15429            relationship_labels: result.relationship_labels,
15430            narratives: result.narratives,
15431            suspicious_count: result.stats.suspicious_count,
15432            scenario_count: result.scenarios.len(),
15433        })
15434    }
15435
15436    /// Calculate total transactions to generate.
15437    fn calculate_total_transactions(&self) -> u64 {
15438        let months = self.config.global.period_months as f64;
15439        self.config
15440            .companies
15441            .iter()
15442            .map(|c| {
15443                let annual = c.annual_transaction_volume.count() as f64;
15444                let weighted = annual * c.volume_weight;
15445                (weighted * months / 12.0) as u64
15446            })
15447            .sum()
15448    }
15449
15450    /// Create a progress bar if progress display is enabled.
15451    fn create_progress_bar(&self, total: u64, message: &str) -> Option<ProgressBar> {
15452        if !self.phase_config.show_progress {
15453            return None;
15454        }
15455
15456        let pb = if let Some(mp) = &self.multi_progress {
15457            mp.add(ProgressBar::new(total))
15458        } else {
15459            ProgressBar::new(total)
15460        };
15461
15462        pb.set_style(
15463            ProgressStyle::default_bar()
15464                .template(&format!(
15465                    "{{spinner:.green}} {message} [{{elapsed_precise}}] [{{bar:40.cyan/blue}}] {{pos}}/{{len}} ({{per_sec}})"
15466                ))
15467                .expect("Progress bar template should be valid - uses only standard indicatif placeholders")
15468                .progress_chars("#>-"),
15469        );
15470
15471        Some(pb)
15472    }
15473
15474    /// Get the generated chart of accounts.
15475    pub fn get_coa(&self) -> Option<Arc<ChartOfAccounts>> {
15476        self.coa.clone()
15477    }
15478
15479    /// Get the generated master data.
15480    pub fn get_master_data(&self) -> &MasterDataSnapshot {
15481        &self.master_data
15482    }
15483
15484    /// Phase: Generate compliance regulations data (standards, procedures, findings, filings, graph).
15485    fn phase_compliance_regulations(
15486        &mut self,
15487        _stats: &mut EnhancedGenerationStatistics,
15488    ) -> SynthResult<ComplianceRegulationsSnapshot> {
15489        if !self.phase_config.generate_compliance_regulations {
15490            return Ok(ComplianceRegulationsSnapshot::default());
15491        }
15492
15493        info!("Phase: Generating Compliance Regulations Data");
15494
15495        let cr_config = &self.config.compliance_regulations;
15496
15497        // Determine jurisdictions: from config or inferred from companies
15498        let jurisdictions: Vec<String> = if cr_config.jurisdictions.is_empty() {
15499            self.config
15500                .companies
15501                .iter()
15502                .map(|c| c.country.clone())
15503                .collect::<std::collections::HashSet<_>>()
15504                .into_iter()
15505                .collect()
15506        } else {
15507            cr_config.jurisdictions.clone()
15508        };
15509
15510        // Determine reference date
15511        let fallback_date =
15512            NaiveDate::from_ymd_opt(2025, 1, 1).expect("static date is always valid");
15513        let reference_date = cr_config
15514            .reference_date
15515            .as_ref()
15516            .and_then(|d| NaiveDate::parse_from_str(d, "%Y-%m-%d").ok())
15517            .unwrap_or_else(|| {
15518                NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15519                    .unwrap_or(fallback_date)
15520            });
15521
15522        // Generate standards registry data
15523        let reg_gen = datasynth_generators::compliance::RegulationGenerator::new();
15524        let standard_records = reg_gen.generate_standard_records(&jurisdictions, reference_date);
15525        let cross_reference_records = reg_gen.generate_cross_reference_records();
15526        let jurisdiction_records =
15527            reg_gen.generate_jurisdiction_records(&jurisdictions, reference_date);
15528
15529        info!(
15530            "  Standards: {} records, {} cross-references, {} jurisdictions",
15531            standard_records.len(),
15532            cross_reference_records.len(),
15533            jurisdiction_records.len()
15534        );
15535
15536        // Generate audit procedures (if enabled)
15537        let audit_procedures = if cr_config.audit_procedures.enabled {
15538            let proc_config = datasynth_generators::compliance::ProcedureGeneratorConfig {
15539                procedures_per_standard: cr_config.audit_procedures.procedures_per_standard,
15540                sampling_method: cr_config.audit_procedures.sampling_method.clone(),
15541                confidence_level: cr_config.audit_procedures.confidence_level,
15542                tolerable_misstatement: cr_config.audit_procedures.tolerable_misstatement,
15543            };
15544            let mut proc_gen = datasynth_generators::compliance::ProcedureGenerator::with_config(
15545                self.seed + 9000,
15546                proc_config,
15547            );
15548            let registry = reg_gen.registry();
15549            let mut all_procs = Vec::new();
15550            for jurisdiction in &jurisdictions {
15551                let procs = proc_gen.generate_procedures(registry, jurisdiction, reference_date);
15552                all_procs.extend(procs);
15553            }
15554            info!("  Audit procedures: {}", all_procs.len());
15555            all_procs
15556        } else {
15557            Vec::new()
15558        };
15559
15560        // Generate compliance findings (if enabled)
15561        let findings = if cr_config.findings.enabled && !audit_procedures.is_empty() {
15562            let finding_config =
15563                datasynth_generators::compliance::ComplianceFindingGeneratorConfig {
15564                    finding_rate: cr_config.findings.finding_rate,
15565                    material_weakness_rate: cr_config.findings.material_weakness_rate,
15566                    significant_deficiency_rate: cr_config.findings.significant_deficiency_rate,
15567                    generate_remediation: cr_config.findings.generate_remediation,
15568                };
15569            let mut finding_gen =
15570                datasynth_generators::compliance::ComplianceFindingGenerator::with_config(
15571                    self.seed + 9100,
15572                    finding_config,
15573                );
15574            let mut all_findings = Vec::new();
15575            for company in &self.config.companies {
15576                let company_findings =
15577                    finding_gen.generate_findings(&audit_procedures, &company.code, reference_date);
15578                all_findings.extend(company_findings);
15579            }
15580            info!("  Compliance findings: {}", all_findings.len());
15581            all_findings
15582        } else {
15583            Vec::new()
15584        };
15585
15586        // Generate regulatory filings (if enabled)
15587        let filings = if cr_config.filings.enabled {
15588            let filing_config = datasynth_generators::compliance::FilingGeneratorConfig {
15589                filing_types: cr_config.filings.filing_types.clone(),
15590                generate_status_progression: cr_config.filings.generate_status_progression,
15591            };
15592            let mut filing_gen = datasynth_generators::compliance::FilingGenerator::with_config(
15593                self.seed + 9200,
15594                filing_config,
15595            );
15596            let company_codes: Vec<String> = self
15597                .config
15598                .companies
15599                .iter()
15600                .map(|c| c.code.clone())
15601                .collect();
15602            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
15603                .unwrap_or(fallback_date);
15604            let filings = filing_gen.generate_filings(
15605                &company_codes,
15606                &jurisdictions,
15607                start_date,
15608                self.config.global.period_months,
15609            );
15610            info!("  Regulatory filings: {}", filings.len());
15611            filings
15612        } else {
15613            Vec::new()
15614        };
15615
15616        // Build compliance graph (if enabled)
15617        let compliance_graph = if cr_config.graph.enabled {
15618            let graph_config = datasynth_graph::ComplianceGraphConfig {
15619                include_standard_nodes: cr_config.graph.include_compliance_nodes,
15620                include_jurisdiction_nodes: cr_config.graph.include_compliance_nodes,
15621                include_cross_references: cr_config.graph.include_cross_references,
15622                include_supersession_edges: cr_config.graph.include_supersession_edges,
15623                include_account_links: cr_config.graph.include_account_links,
15624                include_control_links: cr_config.graph.include_control_links,
15625                include_company_links: cr_config.graph.include_company_links,
15626            };
15627            let mut builder = datasynth_graph::ComplianceGraphBuilder::new(graph_config);
15628
15629            // Add standard nodes
15630            let standard_inputs: Vec<datasynth_graph::StandardNodeInput> = standard_records
15631                .iter()
15632                .map(|r| datasynth_graph::StandardNodeInput {
15633                    standard_id: r.standard_id.clone(),
15634                    title: r.title.clone(),
15635                    category: r.category.clone(),
15636                    domain: r.domain.clone(),
15637                    is_active: r.is_active,
15638                    features: vec![if r.is_active { 1.0 } else { 0.0 }],
15639                    applicable_account_types: r.applicable_account_types.clone(),
15640                    applicable_processes: r.applicable_processes.clone(),
15641                })
15642                .collect();
15643            builder.add_standards(&standard_inputs);
15644
15645            // Add jurisdiction nodes
15646            let jurisdiction_inputs: Vec<datasynth_graph::JurisdictionNodeInput> =
15647                jurisdiction_records
15648                    .iter()
15649                    .map(|r| datasynth_graph::JurisdictionNodeInput {
15650                        country_code: r.country_code.clone(),
15651                        country_name: r.country_name.clone(),
15652                        framework: r.accounting_framework.clone(),
15653                        standard_count: r.standard_count,
15654                        tax_rate: r.statutory_tax_rate,
15655                    })
15656                    .collect();
15657            builder.add_jurisdictions(&jurisdiction_inputs);
15658
15659            // Add cross-reference edges
15660            let xref_inputs: Vec<datasynth_graph::CrossReferenceEdgeInput> =
15661                cross_reference_records
15662                    .iter()
15663                    .map(|r| datasynth_graph::CrossReferenceEdgeInput {
15664                        from_standard: r.from_standard.clone(),
15665                        to_standard: r.to_standard.clone(),
15666                        relationship: r.relationship.clone(),
15667                        convergence_level: r.convergence_level,
15668                    })
15669                    .collect();
15670            builder.add_cross_references(&xref_inputs);
15671
15672            // Add jurisdiction→standard mappings
15673            let mapping_inputs: Vec<datasynth_graph::JurisdictionMappingInput> = standard_records
15674                .iter()
15675                .map(|r| datasynth_graph::JurisdictionMappingInput {
15676                    country_code: r.jurisdiction.clone(),
15677                    standard_id: r.standard_id.clone(),
15678                })
15679                .collect();
15680            builder.add_jurisdiction_mappings(&mapping_inputs);
15681
15682            // Add procedure nodes
15683            let proc_inputs: Vec<datasynth_graph::ProcedureNodeInput> = audit_procedures
15684                .iter()
15685                .map(|p| datasynth_graph::ProcedureNodeInput {
15686                    procedure_id: p.procedure_id.clone(),
15687                    standard_id: p.standard_id.clone(),
15688                    procedure_type: p.procedure_type.clone(),
15689                    sample_size: p.sample_size,
15690                    confidence_level: p.confidence_level,
15691                })
15692                .collect();
15693            builder.add_procedures(&proc_inputs);
15694
15695            // Add finding nodes
15696            let finding_inputs: Vec<datasynth_graph::FindingNodeInput> = findings
15697                .iter()
15698                .map(|f| datasynth_graph::FindingNodeInput {
15699                    finding_id: f.finding_id.to_string(),
15700                    standard_id: f
15701                        .related_standards
15702                        .first()
15703                        .map(|s| s.as_str().to_string())
15704                        .unwrap_or_default(),
15705                    severity: f.severity.to_string(),
15706                    deficiency_level: f.deficiency_level.to_string(),
15707                    severity_score: f.deficiency_level.severity_score(),
15708                    control_id: f.control_id.clone(),
15709                    affected_accounts: f.affected_accounts.clone(),
15710                })
15711                .collect();
15712            builder.add_findings(&finding_inputs);
15713
15714            // Cross-domain: link standards to accounts from chart of accounts
15715            if cr_config.graph.include_account_links {
15716                let registry = datasynth_standards::registry::StandardRegistry::with_built_in();
15717                let mut account_links: Vec<datasynth_graph::AccountLinkInput> = Vec::new();
15718                for std_record in &standard_records {
15719                    if let Some(std_obj) =
15720                        registry.get(&datasynth_core::models::compliance::StandardId::parse(
15721                            &std_record.standard_id,
15722                        ))
15723                    {
15724                        for acct_type in &std_obj.applicable_account_types {
15725                            account_links.push(datasynth_graph::AccountLinkInput {
15726                                standard_id: std_record.standard_id.clone(),
15727                                account_code: acct_type.clone(),
15728                                account_name: acct_type.clone(),
15729                            });
15730                        }
15731                    }
15732                }
15733                builder.add_account_links(&account_links);
15734            }
15735
15736            // Cross-domain: link standards to internal controls
15737            if cr_config.graph.include_control_links {
15738                let mut control_links = Vec::new();
15739                // SOX/PCAOB standards link to all controls
15740                let sox_like_ids: Vec<String> = standard_records
15741                    .iter()
15742                    .filter(|r| {
15743                        r.standard_id.starts_with("SOX")
15744                            || r.standard_id.starts_with("PCAOB-AS-2201")
15745                    })
15746                    .map(|r| r.standard_id.clone())
15747                    .collect();
15748                // Get control IDs from config (C001-C060 standard controls)
15749                let control_ids = [
15750                    ("C001", "Cash Controls"),
15751                    ("C002", "Large Transaction Approval"),
15752                    ("C010", "PO Approval"),
15753                    ("C011", "Three-Way Match"),
15754                    ("C020", "Revenue Recognition"),
15755                    ("C021", "Credit Check"),
15756                    ("C030", "Manual JE Approval"),
15757                    ("C031", "Period Close Review"),
15758                    ("C032", "Account Reconciliation"),
15759                    ("C040", "Payroll Processing"),
15760                    ("C050", "Fixed Asset Capitalization"),
15761                    ("C060", "Intercompany Elimination"),
15762                ];
15763                for sox_id in &sox_like_ids {
15764                    for (ctrl_id, ctrl_name) in &control_ids {
15765                        control_links.push(datasynth_graph::ControlLinkInput {
15766                            standard_id: sox_id.clone(),
15767                            control_id: ctrl_id.to_string(),
15768                            control_name: ctrl_name.to_string(),
15769                        });
15770                    }
15771                }
15772                builder.add_control_links(&control_links);
15773            }
15774
15775            // Cross-domain: filing nodes with company links
15776            if cr_config.graph.include_company_links {
15777                let filing_inputs: Vec<datasynth_graph::FilingNodeInput> = filings
15778                    .iter()
15779                    .enumerate()
15780                    .map(|(i, f)| datasynth_graph::FilingNodeInput {
15781                        filing_id: format!("F{:04}", i + 1),
15782                        filing_type: f.filing_type.to_string(),
15783                        company_code: f.company_code.clone(),
15784                        jurisdiction: f.jurisdiction.clone(),
15785                        status: format!("{:?}", f.status),
15786                    })
15787                    .collect();
15788                builder.add_filings(&filing_inputs);
15789            }
15790
15791            let graph = builder.build();
15792            info!(
15793                "  Compliance graph: {} nodes, {} edges",
15794                graph.nodes.len(),
15795                graph.edges.len()
15796            );
15797            Some(graph)
15798        } else {
15799            None
15800        };
15801
15802        self.check_resources_with_log("post-compliance-regulations")?;
15803
15804        Ok(ComplianceRegulationsSnapshot {
15805            standard_records,
15806            cross_reference_records,
15807            jurisdiction_records,
15808            audit_procedures,
15809            findings,
15810            filings,
15811            compliance_graph,
15812        })
15813    }
15814
15815    /// Build a lineage graph describing config → phase → output relationships.
15816    fn build_lineage_graph(&self) -> super::lineage::LineageGraph {
15817        use super::lineage::LineageGraphBuilder;
15818
15819        let mut builder = LineageGraphBuilder::new();
15820
15821        // Config sections
15822        builder.add_config_section("config:global", "Global Config");
15823        builder.add_config_section("config:chart_of_accounts", "Chart of Accounts Config");
15824        builder.add_config_section("config:transactions", "Transaction Config");
15825
15826        // Generator phases
15827        builder.add_generator_phase("phase:coa", "Chart of Accounts Generation");
15828        builder.add_generator_phase("phase:je", "Journal Entry Generation");
15829
15830        // Config → phase edges
15831        builder.configured_by("phase:coa", "config:chart_of_accounts");
15832        builder.configured_by("phase:je", "config:transactions");
15833
15834        // Output files
15835        builder.add_output_file("output:je", "Journal Entries", "sample_entries.json");
15836        builder.produced_by("output:je", "phase:je");
15837
15838        // Optional phases based on config
15839        if self.phase_config.generate_master_data {
15840            builder.add_config_section("config:master_data", "Master Data Config");
15841            builder.add_generator_phase("phase:master_data", "Master Data Generation");
15842            builder.configured_by("phase:master_data", "config:master_data");
15843            builder.input_to("phase:master_data", "phase:je");
15844        }
15845
15846        if self.phase_config.generate_document_flows {
15847            builder.add_config_section("config:document_flows", "Document Flow Config");
15848            builder.add_generator_phase("phase:p2p", "P2P Document Flow");
15849            builder.add_generator_phase("phase:o2c", "O2C Document Flow");
15850            builder.configured_by("phase:p2p", "config:document_flows");
15851            builder.configured_by("phase:o2c", "config:document_flows");
15852
15853            builder.add_output_file("output:po", "Purchase Orders", "purchase_orders.csv");
15854            builder.add_output_file("output:gr", "Goods Receipts", "goods_receipts.csv");
15855            builder.add_output_file("output:vi", "Vendor Invoices", "vendor_invoices.csv");
15856            builder.add_output_file("output:so", "Sales Orders", "sales_orders.csv");
15857            builder.add_output_file("output:ci", "Customer Invoices", "customer_invoices.csv");
15858
15859            builder.produced_by("output:po", "phase:p2p");
15860            builder.produced_by("output:gr", "phase:p2p");
15861            builder.produced_by("output:vi", "phase:p2p");
15862            builder.produced_by("output:so", "phase:o2c");
15863            builder.produced_by("output:ci", "phase:o2c");
15864        }
15865
15866        if self.phase_config.inject_anomalies {
15867            builder.add_config_section("config:fraud", "Fraud/Anomaly Config");
15868            builder.add_generator_phase("phase:anomaly", "Anomaly Injection");
15869            builder.configured_by("phase:anomaly", "config:fraud");
15870            builder.add_output_file(
15871                "output:labels",
15872                "Anomaly Labels",
15873                "labels/anomaly_labels.csv",
15874            );
15875            builder.produced_by("output:labels", "phase:anomaly");
15876        }
15877
15878        if self.phase_config.generate_audit {
15879            builder.add_config_section("config:audit", "Audit Config");
15880            builder.add_generator_phase("phase:audit", "Audit Data Generation");
15881            builder.configured_by("phase:audit", "config:audit");
15882        }
15883
15884        if self.phase_config.generate_banking {
15885            builder.add_config_section("config:banking", "Banking Config");
15886            builder.add_generator_phase("phase:banking", "Banking KYC/AML Generation");
15887            builder.configured_by("phase:banking", "config:banking");
15888        }
15889
15890        if self.config.llm.enabled {
15891            builder.add_config_section("config:llm", "LLM Enrichment Config");
15892            builder.add_generator_phase("phase:llm_enrichment", "LLM Enrichment");
15893            builder.configured_by("phase:llm_enrichment", "config:llm");
15894        }
15895
15896        if self.config.diffusion.enabled {
15897            builder.add_config_section("config:diffusion", "Diffusion Enhancement Config");
15898            builder.add_generator_phase("phase:diffusion", "Diffusion Enhancement");
15899            builder.configured_by("phase:diffusion", "config:diffusion");
15900        }
15901
15902        if self.config.causal.enabled {
15903            builder.add_config_section("config:causal", "Causal Generation Config");
15904            builder.add_generator_phase("phase:causal", "Causal Overlay");
15905            builder.configured_by("phase:causal", "config:causal");
15906        }
15907
15908        builder.build()
15909    }
15910
15911    // -----------------------------------------------------------------------
15912    // Trial-balance helpers used to replace hardcoded proxy values
15913    // -----------------------------------------------------------------------
15914
15915    /// Compute total revenue for a company from its journal entries.
15916    ///
15917    /// Revenue accounts start with "4" and are credit-normal. Returns the sum of
15918    /// net credits on all revenue-account lines filtered to `company_code`.
15919    fn compute_company_revenue(
15920        entries: &[JournalEntry],
15921        company_code: &str,
15922    ) -> rust_decimal::Decimal {
15923        use rust_decimal::Decimal;
15924        let mut revenue = Decimal::ZERO;
15925        for je in entries {
15926            if je.header.company_code != company_code {
15927                continue;
15928            }
15929            for line in &je.lines {
15930                if line.gl_account.starts_with('4') {
15931                    // Revenue is credit-normal
15932                    revenue += line.credit_amount - line.debit_amount;
15933                }
15934            }
15935        }
15936        revenue.max(Decimal::ZERO)
15937    }
15938
15939    /// Compute net assets (assets minus liabilities) for an entity from journal entries.
15940    ///
15941    /// Asset accounts start with "1"; liability accounts start with "2".
15942    fn compute_entity_net_assets(
15943        entries: &[JournalEntry],
15944        entity_code: &str,
15945    ) -> rust_decimal::Decimal {
15946        use rust_decimal::Decimal;
15947        let mut asset_net = Decimal::ZERO;
15948        let mut liability_net = Decimal::ZERO;
15949        for je in entries {
15950            if je.header.company_code != entity_code {
15951                continue;
15952            }
15953            for line in &je.lines {
15954                if line.gl_account.starts_with('1') {
15955                    asset_net += line.debit_amount - line.credit_amount;
15956                } else if line.gl_account.starts_with('2') {
15957                    liability_net += line.credit_amount - line.debit_amount;
15958                }
15959            }
15960        }
15961        asset_net - liability_net
15962    }
15963
15964    /// v3.5.1+: Run the statistical validation suite configured in
15965    /// `distributions.validation.tests` over the final amount
15966    /// distribution.  Collects every non-zero line-level amount (debit +
15967    /// credit) and hands it to the runners in
15968    /// `datasynth_core::distributions::validation`.
15969    ///
15970    /// Returns `Ok(None)` when validation is disabled (the default).
15971    /// When `reporting.fail_on_error = true` and any test fails, returns
15972    /// `Err` with a concise message; otherwise attaches the report to
15973    /// the result and lets callers inspect it.
15974    fn phase_statistical_validation(
15975        &self,
15976        entries: &[JournalEntry],
15977    ) -> SynthResult<Option<datasynth_core::distributions::StatisticalValidationReport>> {
15978        use datasynth_config::schema::StatisticalTestConfig;
15979        use datasynth_core::distributions::{
15980            run_anderson_darling, run_benford_first_digit, run_chi_squared, run_correlation_check,
15981            run_ks_uniform_log, StatisticalTestResult, StatisticalValidationReport, TestOutcome,
15982        };
15983        use rust_decimal::prelude::ToPrimitive;
15984
15985        let cfg = &self.config.distributions.validation;
15986        if !cfg.enabled {
15987            return Ok(None);
15988        }
15989
15990        // Collect per-line positive amounts (debit + credit is zero on the
15991        // non-posting side, so this naturally picks the magnitude).
15992        let amounts: Vec<rust_decimal::Decimal> = entries
15993            .iter()
15994            .flat_map(|je| je.lines.iter().map(|l| l.debit_amount + l.credit_amount))
15995            .filter(|a| *a > rust_decimal::Decimal::ZERO)
15996            .collect();
15997
15998        // v4.1.0+ paired (amount, line_count) per entry for correlation
15999        // checks. Amount per entry is the debit-side total (= credit-side
16000        // total for a balanced entry).
16001        let paired_amount_linecount: Vec<(f64, f64)> = entries
16002            .iter()
16003            .filter_map(|je| {
16004                let amt: rust_decimal::Decimal = je.lines.iter().map(|l| l.debit_amount).sum();
16005                if amt > rust_decimal::Decimal::ZERO {
16006                    amt.to_f64().map(|a| (a, je.lines.len() as f64))
16007                } else {
16008                    None
16009                }
16010            })
16011            .collect();
16012
16013        let mut results: Vec<StatisticalTestResult> = Vec::with_capacity(cfg.tests.len());
16014        for test_cfg in &cfg.tests {
16015            match test_cfg {
16016                StatisticalTestConfig::BenfordFirstDigit {
16017                    threshold_mad,
16018                    warning_mad,
16019                } => {
16020                    results.push(run_benford_first_digit(
16021                        &amounts,
16022                        *threshold_mad,
16023                        *warning_mad,
16024                    ));
16025                }
16026                StatisticalTestConfig::ChiSquared { bins, significance } => {
16027                    results.push(run_chi_squared(&amounts, *bins, *significance));
16028                }
16029                StatisticalTestConfig::DistributionFit {
16030                    target: _,
16031                    ks_significance,
16032                    method: _,
16033                } => {
16034                    // v3.5.1+: log-uniformity KS check. Target-specific
16035                    // fits against Normal / Exponential land in v4.1.1+.
16036                    results.push(run_ks_uniform_log(&amounts, *ks_significance));
16037                }
16038                StatisticalTestConfig::AndersonDarling {
16039                    target: _,
16040                    significance,
16041                } => {
16042                    // v4.1.0+: A*² statistic against log-normal on the
16043                    // log-scale. Other targets follow the same pattern.
16044                    results.push(run_anderson_darling(&amounts, *significance));
16045                }
16046                StatisticalTestConfig::CorrelationCheck {
16047                    expected_correlations,
16048                } => {
16049                    // v4.1.0+: (amount, line_count) is tracked today.
16050                    // Other pairs resolve to Skipped pending richer
16051                    // per-entry attribute collection.
16052                    if expected_correlations.is_empty() {
16053                        results.push(StatisticalTestResult {
16054                            name: "correlation_check".to_string(),
16055                            outcome: TestOutcome::Skipped,
16056                            statistic: 0.0,
16057                            threshold: 0.0,
16058                            message: "no expected correlations declared".to_string(),
16059                        });
16060                    } else {
16061                        for ec in expected_correlations {
16062                            let pair_key = format!("{}_{}", ec.field1, ec.field2);
16063                            let is_amount_linecount = (ec.field1 == "amount"
16064                                && ec.field2 == "line_count")
16065                                || (ec.field1 == "line_count" && ec.field2 == "amount");
16066                            if is_amount_linecount {
16067                                let xs: Vec<f64> =
16068                                    paired_amount_linecount.iter().map(|(a, _)| *a).collect();
16069                                let ys: Vec<f64> =
16070                                    paired_amount_linecount.iter().map(|(_, l)| *l).collect();
16071                                results.push(run_correlation_check(
16072                                    &pair_key,
16073                                    &xs,
16074                                    &ys,
16075                                    ec.expected_r,
16076                                    ec.tolerance,
16077                                ));
16078                            } else {
16079                                results.push(StatisticalTestResult {
16080                                    name: format!("correlation_check_{pair_key}"),
16081                                    outcome: TestOutcome::Skipped,
16082                                    statistic: 0.0,
16083                                    threshold: ec.tolerance,
16084                                    message: format!(
16085                                        "pair ({},{}) not tracked; only (amount, line_count) supported in v4.1.0",
16086                                        ec.field1, ec.field2
16087                                    ),
16088                                });
16089                            }
16090                        }
16091                    }
16092                }
16093            }
16094        }
16095
16096        let report = StatisticalValidationReport {
16097            sample_count: amounts.len(),
16098            results,
16099        };
16100
16101        if cfg.reporting.fail_on_error && !report.all_passed() {
16102            let failed = report.failed_names().join(", ");
16103            return Err(SynthError::validation(format!(
16104                "statistical validation failed: {failed}"
16105            )));
16106        }
16107
16108        Ok(Some(report))
16109    }
16110
16111    /// v3.3.0: analytics-metadata phase.
16112    ///
16113    /// Runs AFTER all JE-adding phases (including Phase 20b's
16114    /// fraud-bias sweep). Four sub-generators fire in sequence, each
16115    /// gated by an individual `analytics_metadata.<flag>` toggle:
16116    ///
16117    /// 1. `PriorYearGenerator` — prior-year comparatives derived from
16118    ///    current-period account balances.
16119    /// 2. `IndustryBenchmarkGenerator` — industry benchmarks for the
16120    ///    configured `global.industry`.
16121    /// 3. `ManagementReportGenerator` — management-report artefacts.
16122    /// 4. `DriftEventGenerator` — post-generation drift-event labels.
16123    fn phase_analytics_metadata(
16124        &mut self,
16125        entries: &[JournalEntry],
16126    ) -> SynthResult<AnalyticsMetadataSnapshot> {
16127        use datasynth_generators::drift_event_generator::DriftEventGenerator;
16128        use datasynth_generators::industry_benchmark_generator::IndustryBenchmarkGenerator;
16129        use datasynth_generators::management_report_generator::ManagementReportGenerator;
16130        use datasynth_generators::prior_year_generator::PriorYearGenerator;
16131        use std::collections::BTreeMap;
16132
16133        let mut snap = AnalyticsMetadataSnapshot::default();
16134
16135        if !self.phase_config.generate_analytics_metadata {
16136            return Ok(snap);
16137        }
16138
16139        let cfg = &self.config.analytics_metadata;
16140        let fiscal_year = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16141            .map(|d| d.year())
16142            .unwrap_or(2025);
16143
16144        // ---- 1. Prior-year comparatives ----
16145        if cfg.prior_year {
16146            let mut gen = PriorYearGenerator::new(self.seed + 9100);
16147            for company in &self.config.companies {
16148                // Aggregate current-period balances per account code +
16149                // account name from the entries slice.
16150                let mut balances: BTreeMap<String, (String, rust_decimal::Decimal)> =
16151                    BTreeMap::new();
16152                for je in entries {
16153                    if je.header.company_code != company.code {
16154                        continue;
16155                    }
16156                    for line in &je.lines {
16157                        let entry = balances.entry(line.gl_account.clone()).or_insert_with(|| {
16158                            (line.gl_account.clone(), rust_decimal::Decimal::ZERO)
16159                        });
16160                        entry.1 += line.debit_amount - line.credit_amount;
16161                    }
16162                }
16163                let current: Vec<(String, String, rust_decimal::Decimal)> = balances
16164                    .into_iter()
16165                    .filter(|(_, (_, bal))| !bal.is_zero())
16166                    .map(|(code, (name, bal))| (code, name, bal))
16167                    .collect();
16168                if !current.is_empty() {
16169                    let comparatives =
16170                        gen.generate_comparatives(&company.code, fiscal_year, &current);
16171                    snap.prior_year_comparatives.extend(comparatives);
16172                }
16173            }
16174            info!(
16175                "v3.3.0 analytics: {} prior-year comparatives across {} companies",
16176                snap.prior_year_comparatives.len(),
16177                self.config.companies.len()
16178            );
16179        }
16180
16181        // ---- 2. Industry benchmarks ----
16182        if cfg.industry_benchmark {
16183            use datasynth_core::models::IndustrySector;
16184            let industry = match self.config.global.industry {
16185                IndustrySector::Manufacturing => "manufacturing",
16186                IndustrySector::Retail => "retail",
16187                IndustrySector::FinancialServices => "financial_services",
16188                IndustrySector::Technology => "technology",
16189                IndustrySector::Healthcare => "healthcare",
16190                _ => "other",
16191            };
16192            let mut gen = IndustryBenchmarkGenerator::new(self.seed + 9200);
16193            let benchmarks = gen.generate(industry, fiscal_year);
16194            info!(
16195                "v3.3.0 analytics: {} industry benchmarks for '{industry}'",
16196                benchmarks.len()
16197            );
16198            snap.industry_benchmarks = benchmarks;
16199        }
16200
16201        // ---- 3. Management reports ----
16202        if cfg.management_reports {
16203            let mut gen = ManagementReportGenerator::new(self.seed + 9300);
16204            let period_months = self.config.global.period_months;
16205            for company in &self.config.companies {
16206                let reports =
16207                    gen.generate_reports(&company.code, fiscal_year as u32, period_months);
16208                snap.management_reports.extend(reports);
16209            }
16210            info!(
16211                "v3.3.0 analytics: {} management reports across {} companies",
16212                snap.management_reports.len(),
16213                self.config.companies.len()
16214            );
16215        }
16216
16217        // ---- 4. Drift-event labels ----
16218        if cfg.drift_events {
16219            let fallback_start = NaiveDate::from_ymd_opt(2025, 1, 1)
16220                .expect("hardcoded NaiveDate 2025-01-01 is valid");
16221            let start_date = NaiveDate::parse_from_str(&self.config.global.start_date, "%Y-%m-%d")
16222                .unwrap_or(fallback_start);
16223            let end_date = start_date + chrono::Months::new(self.config.global.period_months);
16224            let mut gen = DriftEventGenerator::new(self.seed + 9400);
16225            let drifts = gen.generate_standalone_drifts(start_date, end_date);
16226            info!("v3.3.0 analytics: {} drift-event labels", drifts.len());
16227            snap.drift_events = drifts;
16228        }
16229        // `entries` parameter reserved for future JE-aware drift detection
16230        let _ = entries;
16231
16232        Ok(snap)
16233    }
16234}
16235
16236/// Get the directory name for a graph export format.
16237fn format_name(format: datasynth_config::schema::GraphExportFormat) -> &'static str {
16238    match format {
16239        datasynth_config::schema::GraphExportFormat::PytorchGeometric => "pytorch_geometric",
16240        datasynth_config::schema::GraphExportFormat::Neo4j => "neo4j",
16241        datasynth_config::schema::GraphExportFormat::Dgl => "dgl",
16242        datasynth_config::schema::GraphExportFormat::RustGraph => "rustgraph",
16243        datasynth_config::schema::GraphExportFormat::RustGraphHypergraph => "rustgraph_hypergraph",
16244    }
16245}
16246
16247/// Aggregate journal entry lines into per-account trial balance rows.
16248///
16249/// Each unique `account_code` gets one [`TrialBalanceEntry`] with summed
16250/// debit/credit totals and a net balance (debit minus credit).
16251fn compute_trial_balance_entries(
16252    entries: &[JournalEntry],
16253    entity_code: &str,
16254    fiscal_year: i32,
16255    coa: Option<&ChartOfAccounts>,
16256) -> Vec<datasynth_audit_fsm::artifact::TrialBalanceEntry> {
16257    use std::collections::BTreeMap;
16258
16259    let mut balances: BTreeMap<String, (rust_decimal::Decimal, rust_decimal::Decimal)> =
16260        BTreeMap::new();
16261
16262    for je in entries {
16263        for line in &je.lines {
16264            let entry = balances.entry(line.account_code.clone()).or_default();
16265            entry.0 += line.debit_amount;
16266            entry.1 += line.credit_amount;
16267        }
16268    }
16269
16270    balances
16271        .into_iter()
16272        .map(
16273            |(account_code, (debit, credit))| datasynth_audit_fsm::artifact::TrialBalanceEntry {
16274                account_description: coa
16275                    .and_then(|c| c.get_account(&account_code))
16276                    .map(|a| a.description().to_string())
16277                    .unwrap_or_else(|| account_code.clone()),
16278                account_code,
16279                debit_balance: debit,
16280                credit_balance: credit,
16281                net_balance: debit - credit,
16282                entity_code: entity_code.to_string(),
16283                period: format!("FY{}", fiscal_year),
16284            },
16285        )
16286        .collect()
16287}
16288
16289#[cfg(test)]
16290mod tests {
16291    use super::*;
16292    use datasynth_config::schema::*;
16293
16294    fn create_test_config() -> GeneratorConfig {
16295        GeneratorConfig {
16296            global: GlobalConfig {
16297                industry: IndustrySector::Manufacturing,
16298                start_date: "2024-01-01".to_string(),
16299                period_months: 1,
16300                seed: Some(42),
16301                parallel: false,
16302                group_currency: "USD".to_string(),
16303                presentation_currency: None,
16304                worker_threads: 0,
16305                memory_limit_mb: 0,
16306                fiscal_year_months: None,
16307            },
16308            companies: vec![CompanyConfig {
16309                code: "1000".to_string(),
16310                name: "Test Company".to_string(),
16311                currency: "USD".to_string(),
16312                functional_currency: None,
16313                country: "US".to_string(),
16314                annual_transaction_volume: TransactionVolume::TenK,
16315                volume_weight: 1.0,
16316                fiscal_year_variant: "K4".to_string(),
16317            }],
16318            chart_of_accounts: ChartOfAccountsConfig {
16319                complexity: CoAComplexity::Small,
16320                industry_specific: true,
16321                custom_accounts: None,
16322                min_hierarchy_depth: 2,
16323                max_hierarchy_depth: 4,
16324                expand_industry_subaccounts: false,
16325            },
16326            transactions: TransactionConfig::default(),
16327            output: OutputConfig::default(),
16328            fraud: FraudConfig::default(),
16329            internal_controls: InternalControlsConfig::default(),
16330            business_processes: BusinessProcessConfig::default(),
16331            user_personas: UserPersonaConfig::default(),
16332            templates: TemplateConfig::default(),
16333            approval: ApprovalConfig::default(),
16334            departments: DepartmentConfig::default(),
16335            master_data: MasterDataConfig::default(),
16336            document_flows: DocumentFlowConfig::default(),
16337            intercompany: IntercompanyConfig::default(),
16338            balance: BalanceConfig::default(),
16339            ocpm: OcpmConfig::default(),
16340            audit: AuditGenerationConfig::default(),
16341            banking: datasynth_banking::BankingConfig::default(),
16342            data_quality: DataQualitySchemaConfig::default(),
16343            scenario: ScenarioConfig::default(),
16344            temporal: TemporalDriftConfig::default(),
16345            graph_export: GraphExportConfig::default(),
16346            streaming: StreamingSchemaConfig::default(),
16347            rate_limit: RateLimitSchemaConfig::default(),
16348            temporal_attributes: TemporalAttributeSchemaConfig::default(),
16349            relationships: RelationshipSchemaConfig::default(),
16350            accounting_standards: AccountingStandardsConfig::default(),
16351            audit_standards: AuditStandardsConfig::default(),
16352            distributions: Default::default(),
16353            temporal_patterns: Default::default(),
16354            vendor_network: VendorNetworkSchemaConfig::default(),
16355            customer_segmentation: CustomerSegmentationSchemaConfig::default(),
16356            relationship_strength: RelationshipStrengthSchemaConfig::default(),
16357            cross_process_links: CrossProcessLinksSchemaConfig::default(),
16358            organizational_events: OrganizationalEventsSchemaConfig::default(),
16359            behavioral_drift: BehavioralDriftSchemaConfig::default(),
16360            market_drift: MarketDriftSchemaConfig::default(),
16361            drift_labeling: DriftLabelingSchemaConfig::default(),
16362            anomaly_injection: Default::default(),
16363            industry_specific: Default::default(),
16364            fingerprint_privacy: Default::default(),
16365            quality_gates: Default::default(),
16366            compliance: Default::default(),
16367            webhooks: Default::default(),
16368            llm: Default::default(),
16369            diffusion: Default::default(),
16370            causal: Default::default(),
16371            source_to_pay: Default::default(),
16372            financial_reporting: Default::default(),
16373            hr: Default::default(),
16374            manufacturing: Default::default(),
16375            sales_quotes: Default::default(),
16376            tax: Default::default(),
16377            treasury: Default::default(),
16378            project_accounting: Default::default(),
16379            esg: Default::default(),
16380            country_packs: None,
16381            scenarios: Default::default(),
16382            session: Default::default(),
16383            compliance_regulations: Default::default(),
16384            analytics_metadata: Default::default(),
16385            concentration: Default::default(),
16386        }
16387    }
16388
16389    #[test]
16390    fn test_enhanced_orchestrator_creation() {
16391        let config = create_test_config();
16392        let orchestrator = EnhancedOrchestrator::with_defaults(config);
16393        assert!(orchestrator.is_ok());
16394    }
16395
16396    #[test]
16397    fn test_minimal_generation() {
16398        let config = create_test_config();
16399        let phase_config = PhaseConfig {
16400            generate_master_data: false,
16401            generate_document_flows: false,
16402            generate_journal_entries: true,
16403            inject_anomalies: false,
16404            show_progress: false,
16405            ..Default::default()
16406        };
16407
16408        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16409        let result = orchestrator.generate();
16410
16411        assert!(result.is_ok());
16412        let result = result.unwrap();
16413        assert!(!result.journal_entries.is_empty());
16414    }
16415
16416    #[test]
16417    fn test_master_data_generation() {
16418        let config = create_test_config();
16419        let phase_config = PhaseConfig {
16420            generate_master_data: true,
16421            generate_document_flows: false,
16422            generate_journal_entries: false,
16423            inject_anomalies: false,
16424            show_progress: false,
16425            vendors_per_company: 5,
16426            customers_per_company: 5,
16427            materials_per_company: 10,
16428            assets_per_company: 5,
16429            employees_per_company: 10,
16430            ..Default::default()
16431        };
16432
16433        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16434        let result = orchestrator.generate().unwrap();
16435
16436        assert!(!result.master_data.vendors.is_empty());
16437        assert!(!result.master_data.customers.is_empty());
16438        assert!(!result.master_data.materials.is_empty());
16439    }
16440
16441    #[test]
16442    fn test_document_flow_generation() {
16443        let config = create_test_config();
16444        let phase_config = PhaseConfig {
16445            generate_master_data: true,
16446            generate_document_flows: true,
16447            generate_journal_entries: false,
16448            inject_anomalies: false,
16449            inject_data_quality: false,
16450            validate_balances: false,
16451            validate_coa_coverage_strict: false,
16452            generate_ocpm_events: false,
16453            show_progress: false,
16454            vendors_per_company: 5,
16455            customers_per_company: 5,
16456            materials_per_company: 10,
16457            assets_per_company: 5,
16458            employees_per_company: 10,
16459            p2p_chains: 5,
16460            o2c_chains: 5,
16461            ..Default::default()
16462        };
16463
16464        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16465        let result = orchestrator.generate().unwrap();
16466
16467        // Should have generated P2P and O2C chains
16468        assert!(!result.document_flows.p2p_chains.is_empty());
16469        assert!(!result.document_flows.o2c_chains.is_empty());
16470
16471        // Flattened documents should be populated
16472        assert!(!result.document_flows.purchase_orders.is_empty());
16473        assert!(!result.document_flows.sales_orders.is_empty());
16474    }
16475
16476    #[test]
16477    fn test_anomaly_injection() {
16478        let config = create_test_config();
16479        let phase_config = PhaseConfig {
16480            generate_master_data: false,
16481            generate_document_flows: false,
16482            generate_journal_entries: true,
16483            inject_anomalies: true,
16484            show_progress: false,
16485            ..Default::default()
16486        };
16487
16488        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16489        let result = orchestrator.generate().unwrap();
16490
16491        // Should have journal entries
16492        assert!(!result.journal_entries.is_empty());
16493
16494        // With ~833 entries and 2% rate, expect some anomalies
16495        // Note: This is probabilistic, so we just verify the structure exists
16496        assert!(result.anomaly_labels.summary.is_some());
16497    }
16498
16499    #[test]
16500    fn test_full_generation_pipeline() {
16501        let config = create_test_config();
16502        let phase_config = PhaseConfig {
16503            generate_master_data: true,
16504            generate_document_flows: true,
16505            generate_journal_entries: true,
16506            inject_anomalies: false,
16507            inject_data_quality: false,
16508            validate_balances: true,
16509            validate_coa_coverage_strict: false,
16510            generate_ocpm_events: false,
16511            show_progress: false,
16512            vendors_per_company: 3,
16513            customers_per_company: 3,
16514            materials_per_company: 5,
16515            assets_per_company: 3,
16516            employees_per_company: 5,
16517            p2p_chains: 3,
16518            o2c_chains: 3,
16519            ..Default::default()
16520        };
16521
16522        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16523        let result = orchestrator.generate().unwrap();
16524
16525        // All phases should have results
16526        assert!(!result.master_data.vendors.is_empty());
16527        assert!(!result.master_data.customers.is_empty());
16528        assert!(!result.document_flows.p2p_chains.is_empty());
16529        assert!(!result.document_flows.o2c_chains.is_empty());
16530        assert!(!result.journal_entries.is_empty());
16531        assert!(result.statistics.accounts_count > 0);
16532
16533        // Subledger linking should have run
16534        assert!(!result.subledger.ap_invoices.is_empty());
16535        assert!(!result.subledger.ar_invoices.is_empty());
16536
16537        // Balance validation should have run
16538        assert!(result.balance_validation.validated);
16539        assert!(result.balance_validation.entries_processed > 0);
16540    }
16541
16542    #[test]
16543    fn test_subledger_linking() {
16544        let config = create_test_config();
16545        let phase_config = PhaseConfig {
16546            generate_master_data: true,
16547            generate_document_flows: true,
16548            generate_journal_entries: false,
16549            inject_anomalies: false,
16550            inject_data_quality: false,
16551            validate_balances: false,
16552            validate_coa_coverage_strict: false,
16553            generate_ocpm_events: false,
16554            show_progress: false,
16555            vendors_per_company: 5,
16556            customers_per_company: 5,
16557            materials_per_company: 10,
16558            assets_per_company: 3,
16559            employees_per_company: 5,
16560            p2p_chains: 5,
16561            o2c_chains: 5,
16562            ..Default::default()
16563        };
16564
16565        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16566        let result = orchestrator.generate().unwrap();
16567
16568        // Should have document flows
16569        assert!(!result.document_flows.vendor_invoices.is_empty());
16570        assert!(!result.document_flows.customer_invoices.is_empty());
16571
16572        // Subledger should be linked from document flows
16573        assert!(!result.subledger.ap_invoices.is_empty());
16574        assert!(!result.subledger.ar_invoices.is_empty());
16575
16576        // AP invoices count should match vendor invoices count
16577        assert_eq!(
16578            result.subledger.ap_invoices.len(),
16579            result.document_flows.vendor_invoices.len()
16580        );
16581
16582        // AR invoices count should match customer invoices count
16583        assert_eq!(
16584            result.subledger.ar_invoices.len(),
16585            result.document_flows.customer_invoices.len()
16586        );
16587
16588        // Statistics should reflect subledger counts
16589        assert_eq!(
16590            result.statistics.ap_invoice_count,
16591            result.subledger.ap_invoices.len()
16592        );
16593        assert_eq!(
16594            result.statistics.ar_invoice_count,
16595            result.subledger.ar_invoices.len()
16596        );
16597    }
16598
16599    #[test]
16600    fn test_balance_validation() {
16601        let config = create_test_config();
16602        let phase_config = PhaseConfig {
16603            generate_master_data: false,
16604            generate_document_flows: false,
16605            generate_journal_entries: true,
16606            inject_anomalies: false,
16607            validate_balances: true,
16608            validate_coa_coverage_strict: false,
16609            show_progress: false,
16610            ..Default::default()
16611        };
16612
16613        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16614        let result = orchestrator.generate().unwrap();
16615
16616        // Balance validation should run
16617        assert!(result.balance_validation.validated);
16618        assert!(result.balance_validation.entries_processed > 0);
16619
16620        // Generated JEs should be balanced (no unbalanced entries)
16621        assert!(!result.balance_validation.has_unbalanced_entries);
16622
16623        // Total debits should equal total credits
16624        assert_eq!(
16625            result.balance_validation.total_debits,
16626            result.balance_validation.total_credits
16627        );
16628    }
16629
16630    #[test]
16631    fn test_statistics_accuracy() {
16632        let config = create_test_config();
16633        let phase_config = PhaseConfig {
16634            generate_master_data: true,
16635            generate_document_flows: false,
16636            generate_journal_entries: true,
16637            inject_anomalies: false,
16638            show_progress: false,
16639            vendors_per_company: 10,
16640            customers_per_company: 20,
16641            materials_per_company: 15,
16642            assets_per_company: 5,
16643            employees_per_company: 8,
16644            ..Default::default()
16645        };
16646
16647        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16648        let result = orchestrator.generate().unwrap();
16649
16650        // Statistics should match actual data
16651        assert_eq!(
16652            result.statistics.vendor_count,
16653            result.master_data.vendors.len()
16654        );
16655        assert_eq!(
16656            result.statistics.customer_count,
16657            result.master_data.customers.len()
16658        );
16659        assert_eq!(
16660            result.statistics.material_count,
16661            result.master_data.materials.len()
16662        );
16663        assert_eq!(
16664            result.statistics.total_entries as usize,
16665            result.journal_entries.len()
16666        );
16667    }
16668
16669    #[test]
16670    fn test_phase_config_defaults() {
16671        let config = PhaseConfig::default();
16672        assert!(config.generate_master_data);
16673        assert!(config.generate_document_flows);
16674        assert!(config.generate_journal_entries);
16675        assert!(!config.inject_anomalies);
16676        assert!(config.validate_balances);
16677        assert!(config.show_progress);
16678        assert!(config.vendors_per_company > 0);
16679        assert!(config.customers_per_company > 0);
16680    }
16681
16682    #[test]
16683    fn test_get_coa_before_generation() {
16684        let config = create_test_config();
16685        let orchestrator = EnhancedOrchestrator::with_defaults(config).unwrap();
16686
16687        // Before generation, CoA should be None
16688        assert!(orchestrator.get_coa().is_none());
16689    }
16690
16691    #[test]
16692    fn test_get_coa_after_generation() {
16693        let config = create_test_config();
16694        let phase_config = PhaseConfig {
16695            generate_master_data: false,
16696            generate_document_flows: false,
16697            generate_journal_entries: true,
16698            inject_anomalies: false,
16699            show_progress: false,
16700            ..Default::default()
16701        };
16702
16703        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16704        let _ = orchestrator.generate().unwrap();
16705
16706        // After generation, CoA should be available
16707        assert!(orchestrator.get_coa().is_some());
16708    }
16709
16710    #[test]
16711    fn test_get_master_data() {
16712        let config = create_test_config();
16713        let phase_config = PhaseConfig {
16714            generate_master_data: true,
16715            generate_document_flows: false,
16716            generate_journal_entries: false,
16717            inject_anomalies: false,
16718            show_progress: false,
16719            vendors_per_company: 5,
16720            customers_per_company: 5,
16721            materials_per_company: 5,
16722            assets_per_company: 5,
16723            employees_per_company: 5,
16724            ..Default::default()
16725        };
16726
16727        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16728        let result = orchestrator.generate().unwrap();
16729
16730        // After generate(), master_data is moved into the result
16731        assert!(!result.master_data.vendors.is_empty());
16732    }
16733
16734    #[test]
16735    fn test_with_progress_builder() {
16736        let config = create_test_config();
16737        let orchestrator = EnhancedOrchestrator::with_defaults(config)
16738            .unwrap()
16739            .with_progress(false);
16740
16741        // Should still work without progress
16742        assert!(!orchestrator.phase_config.show_progress);
16743    }
16744
16745    #[test]
16746    fn test_multi_company_generation() {
16747        let mut config = create_test_config();
16748        config.companies.push(CompanyConfig {
16749            code: "2000".to_string(),
16750            name: "Subsidiary".to_string(),
16751            currency: "EUR".to_string(),
16752            functional_currency: None,
16753            country: "DE".to_string(),
16754            annual_transaction_volume: TransactionVolume::TenK,
16755            volume_weight: 0.5,
16756            fiscal_year_variant: "K4".to_string(),
16757        });
16758
16759        let phase_config = PhaseConfig {
16760            generate_master_data: true,
16761            generate_document_flows: false,
16762            generate_journal_entries: true,
16763            inject_anomalies: false,
16764            show_progress: false,
16765            vendors_per_company: 5,
16766            customers_per_company: 5,
16767            materials_per_company: 5,
16768            assets_per_company: 5,
16769            employees_per_company: 5,
16770            ..Default::default()
16771        };
16772
16773        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16774        let result = orchestrator.generate().unwrap();
16775
16776        // Should have master data for both companies
16777        assert!(result.statistics.vendor_count >= 10); // 5 per company
16778        assert!(result.statistics.customer_count >= 10);
16779        assert!(result.statistics.companies_count == 2);
16780    }
16781
16782    #[test]
16783    fn test_empty_master_data_skips_document_flows() {
16784        let config = create_test_config();
16785        let phase_config = PhaseConfig {
16786            generate_master_data: false,   // Skip master data
16787            generate_document_flows: true, // Try to generate flows
16788            generate_journal_entries: false,
16789            inject_anomalies: false,
16790            show_progress: false,
16791            ..Default::default()
16792        };
16793
16794        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16795        let result = orchestrator.generate().unwrap();
16796
16797        // Without master data, document flows should be empty
16798        assert!(result.document_flows.p2p_chains.is_empty());
16799        assert!(result.document_flows.o2c_chains.is_empty());
16800    }
16801
16802    #[test]
16803    fn test_journal_entry_line_item_count() {
16804        let config = create_test_config();
16805        let phase_config = PhaseConfig {
16806            generate_master_data: false,
16807            generate_document_flows: false,
16808            generate_journal_entries: true,
16809            inject_anomalies: false,
16810            show_progress: false,
16811            ..Default::default()
16812        };
16813
16814        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16815        let result = orchestrator.generate().unwrap();
16816
16817        // Total line items should match sum of all entry line counts
16818        let calculated_line_items: u64 = result
16819            .journal_entries
16820            .iter()
16821            .map(|e| e.line_count() as u64)
16822            .sum();
16823        assert_eq!(result.statistics.total_line_items, calculated_line_items);
16824    }
16825
16826    #[test]
16827    fn test_audit_generation() {
16828        let config = create_test_config();
16829        let phase_config = PhaseConfig {
16830            generate_master_data: false,
16831            generate_document_flows: false,
16832            generate_journal_entries: true,
16833            inject_anomalies: false,
16834            show_progress: false,
16835            generate_audit: true,
16836            audit_engagements: 2,
16837            workpapers_per_engagement: 5,
16838            evidence_per_workpaper: 2,
16839            risks_per_engagement: 3,
16840            findings_per_engagement: 2,
16841            judgments_per_engagement: 2,
16842            ..Default::default()
16843        };
16844
16845        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16846        let result = orchestrator.generate().unwrap();
16847
16848        // Should have generated audit data
16849        assert_eq!(result.audit.engagements.len(), 2);
16850        assert!(!result.audit.workpapers.is_empty());
16851        assert!(!result.audit.evidence.is_empty());
16852        assert!(!result.audit.risk_assessments.is_empty());
16853        assert!(!result.audit.findings.is_empty());
16854        assert!(!result.audit.judgments.is_empty());
16855
16856        // New ISA entity collections should also be populated
16857        assert!(
16858            !result.audit.confirmations.is_empty(),
16859            "ISA 505 confirmations should be generated"
16860        );
16861        assert!(
16862            !result.audit.confirmation_responses.is_empty(),
16863            "ISA 505 confirmation responses should be generated"
16864        );
16865        assert!(
16866            !result.audit.procedure_steps.is_empty(),
16867            "ISA 330 procedure steps should be generated"
16868        );
16869        // Samples may or may not be generated depending on workpaper sampling methods
16870        assert!(
16871            !result.audit.analytical_results.is_empty(),
16872            "ISA 520 analytical procedures should be generated"
16873        );
16874        assert!(
16875            !result.audit.ia_functions.is_empty(),
16876            "ISA 610 IA functions should be generated (one per engagement)"
16877        );
16878        assert!(
16879            !result.audit.related_parties.is_empty(),
16880            "ISA 550 related parties should be generated"
16881        );
16882
16883        // Statistics should match
16884        assert_eq!(
16885            result.statistics.audit_engagement_count,
16886            result.audit.engagements.len()
16887        );
16888        assert_eq!(
16889            result.statistics.audit_workpaper_count,
16890            result.audit.workpapers.len()
16891        );
16892        assert_eq!(
16893            result.statistics.audit_evidence_count,
16894            result.audit.evidence.len()
16895        );
16896        assert_eq!(
16897            result.statistics.audit_risk_count,
16898            result.audit.risk_assessments.len()
16899        );
16900        assert_eq!(
16901            result.statistics.audit_finding_count,
16902            result.audit.findings.len()
16903        );
16904        assert_eq!(
16905            result.statistics.audit_judgment_count,
16906            result.audit.judgments.len()
16907        );
16908        assert_eq!(
16909            result.statistics.audit_confirmation_count,
16910            result.audit.confirmations.len()
16911        );
16912        assert_eq!(
16913            result.statistics.audit_confirmation_response_count,
16914            result.audit.confirmation_responses.len()
16915        );
16916        assert_eq!(
16917            result.statistics.audit_procedure_step_count,
16918            result.audit.procedure_steps.len()
16919        );
16920        assert_eq!(
16921            result.statistics.audit_sample_count,
16922            result.audit.samples.len()
16923        );
16924        assert_eq!(
16925            result.statistics.audit_analytical_result_count,
16926            result.audit.analytical_results.len()
16927        );
16928        assert_eq!(
16929            result.statistics.audit_ia_function_count,
16930            result.audit.ia_functions.len()
16931        );
16932        assert_eq!(
16933            result.statistics.audit_ia_report_count,
16934            result.audit.ia_reports.len()
16935        );
16936        assert_eq!(
16937            result.statistics.audit_related_party_count,
16938            result.audit.related_parties.len()
16939        );
16940        assert_eq!(
16941            result.statistics.audit_related_party_transaction_count,
16942            result.audit.related_party_transactions.len()
16943        );
16944    }
16945
16946    #[test]
16947    fn test_new_phases_disabled_by_default() {
16948        let config = create_test_config();
16949        // Verify new config fields default to disabled
16950        assert!(!config.llm.enabled);
16951        assert!(!config.diffusion.enabled);
16952        assert!(!config.causal.enabled);
16953
16954        let phase_config = PhaseConfig {
16955            generate_master_data: false,
16956            generate_document_flows: false,
16957            generate_journal_entries: true,
16958            inject_anomalies: false,
16959            show_progress: false,
16960            ..Default::default()
16961        };
16962
16963        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16964        let result = orchestrator.generate().unwrap();
16965
16966        // All new phase statistics should be zero when disabled
16967        assert_eq!(result.statistics.llm_enrichment_ms, 0);
16968        assert_eq!(result.statistics.llm_vendors_enriched, 0);
16969        assert_eq!(result.statistics.diffusion_enhancement_ms, 0);
16970        assert_eq!(result.statistics.diffusion_samples_generated, 0);
16971        assert_eq!(result.statistics.causal_generation_ms, 0);
16972        assert_eq!(result.statistics.causal_samples_generated, 0);
16973        assert!(result.statistics.causal_validation_passed.is_none());
16974        assert_eq!(result.statistics.counterfactual_pair_count, 0);
16975        assert!(result.counterfactual_pairs.is_empty());
16976    }
16977
16978    #[test]
16979    fn test_counterfactual_generation_enabled() {
16980        let config = create_test_config();
16981        let phase_config = PhaseConfig {
16982            generate_master_data: false,
16983            generate_document_flows: false,
16984            generate_journal_entries: true,
16985            inject_anomalies: false,
16986            show_progress: false,
16987            generate_counterfactuals: true,
16988            generate_period_close: false, // Disable so entry count matches counterfactual pairs
16989            ..Default::default()
16990        };
16991
16992        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
16993        let result = orchestrator.generate().unwrap();
16994
16995        // With JE generation enabled, counterfactual pairs should be generated
16996        if !result.journal_entries.is_empty() {
16997            assert_eq!(
16998                result.counterfactual_pairs.len(),
16999                result.journal_entries.len()
17000            );
17001            assert_eq!(
17002                result.statistics.counterfactual_pair_count,
17003                result.journal_entries.len()
17004            );
17005            // Each pair should have a distinct pair_id
17006            let ids: std::collections::HashSet<_> = result
17007                .counterfactual_pairs
17008                .iter()
17009                .map(|p| p.pair_id.clone())
17010                .collect();
17011            assert_eq!(ids.len(), result.counterfactual_pairs.len());
17012        }
17013    }
17014
17015    #[test]
17016    fn test_llm_enrichment_enabled() {
17017        let mut config = create_test_config();
17018        config.llm.enabled = true;
17019        config.llm.max_vendor_enrichments = 3;
17020
17021        let phase_config = PhaseConfig {
17022            generate_master_data: true,
17023            generate_document_flows: false,
17024            generate_journal_entries: false,
17025            inject_anomalies: false,
17026            show_progress: false,
17027            vendors_per_company: 5,
17028            customers_per_company: 3,
17029            materials_per_company: 3,
17030            assets_per_company: 3,
17031            employees_per_company: 3,
17032            ..Default::default()
17033        };
17034
17035        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17036        let result = orchestrator.generate().unwrap();
17037
17038        // LLM enrichment should have run
17039        assert!(result.statistics.llm_vendors_enriched > 0);
17040        assert!(result.statistics.llm_vendors_enriched <= 3);
17041    }
17042
17043    #[test]
17044    fn test_diffusion_enhancement_enabled() {
17045        let mut config = create_test_config();
17046        config.diffusion.enabled = true;
17047        config.diffusion.n_steps = 50;
17048        config.diffusion.sample_size = 20;
17049
17050        let phase_config = PhaseConfig {
17051            generate_master_data: false,
17052            generate_document_flows: false,
17053            generate_journal_entries: true,
17054            inject_anomalies: false,
17055            show_progress: false,
17056            ..Default::default()
17057        };
17058
17059        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17060        let result = orchestrator.generate().unwrap();
17061
17062        // Diffusion phase should have generated samples
17063        assert_eq!(result.statistics.diffusion_samples_generated, 20);
17064    }
17065
17066    #[test]
17067    fn test_causal_overlay_enabled() {
17068        let mut config = create_test_config();
17069        config.causal.enabled = true;
17070        config.causal.template = "fraud_detection".to_string();
17071        config.causal.sample_size = 100;
17072        config.causal.validate = true;
17073
17074        let phase_config = PhaseConfig {
17075            generate_master_data: false,
17076            generate_document_flows: false,
17077            generate_journal_entries: true,
17078            inject_anomalies: false,
17079            show_progress: false,
17080            ..Default::default()
17081        };
17082
17083        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17084        let result = orchestrator.generate().unwrap();
17085
17086        // Causal phase should have generated samples
17087        assert_eq!(result.statistics.causal_samples_generated, 100);
17088        // Validation should have run
17089        assert!(result.statistics.causal_validation_passed.is_some());
17090    }
17091
17092    #[test]
17093    fn test_causal_overlay_revenue_cycle_template() {
17094        let mut config = create_test_config();
17095        config.causal.enabled = true;
17096        config.causal.template = "revenue_cycle".to_string();
17097        config.causal.sample_size = 50;
17098        config.causal.validate = false;
17099
17100        let phase_config = PhaseConfig {
17101            generate_master_data: false,
17102            generate_document_flows: false,
17103            generate_journal_entries: true,
17104            inject_anomalies: false,
17105            show_progress: false,
17106            ..Default::default()
17107        };
17108
17109        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17110        let result = orchestrator.generate().unwrap();
17111
17112        // Causal phase should have generated samples
17113        assert_eq!(result.statistics.causal_samples_generated, 50);
17114        // Validation was disabled
17115        assert!(result.statistics.causal_validation_passed.is_none());
17116    }
17117
17118    #[test]
17119    fn test_all_new_phases_enabled_together() {
17120        let mut config = create_test_config();
17121        config.llm.enabled = true;
17122        config.llm.max_vendor_enrichments = 2;
17123        config.diffusion.enabled = true;
17124        config.diffusion.n_steps = 20;
17125        config.diffusion.sample_size = 10;
17126        config.causal.enabled = true;
17127        config.causal.sample_size = 50;
17128        config.causal.validate = true;
17129
17130        let phase_config = PhaseConfig {
17131            generate_master_data: true,
17132            generate_document_flows: false,
17133            generate_journal_entries: true,
17134            inject_anomalies: false,
17135            show_progress: false,
17136            vendors_per_company: 5,
17137            customers_per_company: 3,
17138            materials_per_company: 3,
17139            assets_per_company: 3,
17140            employees_per_company: 3,
17141            ..Default::default()
17142        };
17143
17144        let mut orchestrator = EnhancedOrchestrator::new(config, phase_config).unwrap();
17145        let result = orchestrator.generate().unwrap();
17146
17147        // All three phases should have run
17148        assert!(result.statistics.llm_vendors_enriched > 0);
17149        assert_eq!(result.statistics.diffusion_samples_generated, 10);
17150        assert_eq!(result.statistics.causal_samples_generated, 50);
17151        assert!(result.statistics.causal_validation_passed.is_some());
17152    }
17153
17154    #[test]
17155    fn test_statistics_serialization_with_new_fields() {
17156        let stats = EnhancedGenerationStatistics {
17157            total_entries: 100,
17158            total_line_items: 500,
17159            llm_enrichment_ms: 42,
17160            llm_vendors_enriched: 10,
17161            diffusion_enhancement_ms: 100,
17162            diffusion_samples_generated: 50,
17163            causal_generation_ms: 200,
17164            causal_samples_generated: 100,
17165            causal_validation_passed: Some(true),
17166            ..Default::default()
17167        };
17168
17169        let json = serde_json::to_string(&stats).unwrap();
17170        let deserialized: EnhancedGenerationStatistics = serde_json::from_str(&json).unwrap();
17171
17172        assert_eq!(deserialized.llm_enrichment_ms, 42);
17173        assert_eq!(deserialized.llm_vendors_enriched, 10);
17174        assert_eq!(deserialized.diffusion_enhancement_ms, 100);
17175        assert_eq!(deserialized.diffusion_samples_generated, 50);
17176        assert_eq!(deserialized.causal_generation_ms, 200);
17177        assert_eq!(deserialized.causal_samples_generated, 100);
17178        assert_eq!(deserialized.causal_validation_passed, Some(true));
17179    }
17180
17181    #[test]
17182    fn test_statistics_backward_compat_deserialization() {
17183        // Old JSON without the new fields should still deserialize
17184        let old_json = r#"{
17185            "total_entries": 100,
17186            "total_line_items": 500,
17187            "accounts_count": 50,
17188            "companies_count": 1,
17189            "period_months": 12,
17190            "vendor_count": 10,
17191            "customer_count": 20,
17192            "material_count": 15,
17193            "asset_count": 5,
17194            "employee_count": 8,
17195            "p2p_chain_count": 5,
17196            "o2c_chain_count": 5,
17197            "ap_invoice_count": 5,
17198            "ar_invoice_count": 5,
17199            "ocpm_event_count": 0,
17200            "ocpm_object_count": 0,
17201            "ocpm_case_count": 0,
17202            "audit_engagement_count": 0,
17203            "audit_workpaper_count": 0,
17204            "audit_evidence_count": 0,
17205            "audit_risk_count": 0,
17206            "audit_finding_count": 0,
17207            "audit_judgment_count": 0,
17208            "anomalies_injected": 0,
17209            "data_quality_issues": 0,
17210            "banking_customer_count": 0,
17211            "banking_account_count": 0,
17212            "banking_transaction_count": 0,
17213            "banking_suspicious_count": 0,
17214            "graph_export_count": 0,
17215            "graph_node_count": 0,
17216            "graph_edge_count": 0
17217        }"#;
17218
17219        let stats: EnhancedGenerationStatistics = serde_json::from_str(old_json).unwrap();
17220
17221        // New fields should default to 0 / None
17222        assert_eq!(stats.llm_enrichment_ms, 0);
17223        assert_eq!(stats.llm_vendors_enriched, 0);
17224        assert_eq!(stats.diffusion_enhancement_ms, 0);
17225        assert_eq!(stats.diffusion_samples_generated, 0);
17226        assert_eq!(stats.causal_generation_ms, 0);
17227        assert_eq!(stats.causal_samples_generated, 0);
17228        assert!(stats.causal_validation_passed.is_none());
17229    }
17230
17231    // ── v5.33 #162 — framework-aware TB classification ──────────────────────
17232
17233    #[test]
17234    fn category_from_account_code_us_gaap_unchanged() {
17235        // US-style numbering — same answers as the pre-v5.33 hard-coded table.
17236        assert_eq!(
17237            EnhancedOrchestrator::category_from_account_code("1000", "us_gaap"),
17238            "Cash"
17239        );
17240        assert_eq!(
17241            EnhancedOrchestrator::category_from_account_code("1500", "us_gaap"),
17242            "FixedAssets"
17243        );
17244        assert_eq!(
17245            EnhancedOrchestrator::category_from_account_code("4000", "us_gaap"),
17246            "Revenue"
17247        );
17248        assert_eq!(
17249            EnhancedOrchestrator::category_from_account_code("6000", "us_gaap"),
17250            "OperatingExpenses"
17251        );
17252    }
17253
17254    #[test]
17255    fn category_from_account_code_skr04_german() {
17256        // SKR04 (German GAAP): 0xxx = fixed assets, 4xxx = revenue,
17257        // 8xxx = tax/extraordinary expense — pre-v5.33 the US-only table
17258        // mis-classified 0xxx as OperatingExpenses (default arm), 4xxx as
17259        // Revenue (accidentally correct), and 8xxx as OtherExpenses.
17260        // Framework-aware version routes them correctly.
17261        assert_eq!(
17262            EnhancedOrchestrator::category_from_account_code("0010", "german_gaap"),
17263            "FixedAssets",
17264            "SKR 0xxx must be classified as fixed assets, not P&L"
17265        );
17266        assert_eq!(
17267            EnhancedOrchestrator::category_from_account_code("1000", "german_gaap"),
17268            "Cash"
17269        );
17270        assert_eq!(
17271            EnhancedOrchestrator::category_from_account_code("1300", "german_gaap"),
17272            "Receivables"
17273        );
17274        assert_eq!(
17275            EnhancedOrchestrator::category_from_account_code("2000", "german_gaap"),
17276            "Equity"
17277        );
17278        assert_eq!(
17279            EnhancedOrchestrator::category_from_account_code("3000", "german_gaap"),
17280            "Payables"
17281        );
17282        assert_eq!(
17283            EnhancedOrchestrator::category_from_account_code("4000", "german_gaap"),
17284            "Revenue"
17285        );
17286        assert_eq!(
17287            EnhancedOrchestrator::category_from_account_code("5000", "german_gaap"),
17288            "CostOfSales"
17289        );
17290        assert_eq!(
17291            EnhancedOrchestrator::category_from_account_code("8000", "german_gaap"),
17292            "OtherExpenses"
17293        );
17294    }
17295
17296    #[test]
17297    fn category_from_account_code_pcg_french() {
17298        // PCG (French GAAP): 2 = fixed assets, 5 = cash, 6 = expenses,
17299        // 7 = revenue. Pre-v5.33 these all hit the wrong US-prefix arms.
17300        assert_eq!(
17301            EnhancedOrchestrator::category_from_account_code("210000", "french_gaap"),
17302            "FixedAssets"
17303        );
17304        assert_eq!(
17305            EnhancedOrchestrator::category_from_account_code("411000", "french_gaap"),
17306            "Receivables"
17307        );
17308        assert_eq!(
17309            EnhancedOrchestrator::category_from_account_code("401000", "french_gaap"),
17310            "Payables"
17311        );
17312        assert_eq!(
17313            EnhancedOrchestrator::category_from_account_code("512000", "french_gaap"),
17314            "Cash"
17315        );
17316        assert_eq!(
17317            EnhancedOrchestrator::category_from_account_code("603000", "french_gaap"),
17318            "OperatingExpenses"
17319        );
17320        assert_eq!(
17321            EnhancedOrchestrator::category_from_account_code("707000", "french_gaap"),
17322            "Revenue"
17323        );
17324        assert_eq!(
17325            EnhancedOrchestrator::category_from_account_code("101000", "french_gaap"),
17326            "Equity"
17327        );
17328    }
17329
17330    #[test]
17331    fn is_balance_sheet_account_routes_skr_correctly() {
17332        // SKR04: 0xxx fixed assets, 1xxx current assets, 2xxx equity,
17333        // 3xxx liabilities → all BS.  4xxx revenue, 5-6 expenses → P&L.
17334        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17335            "0010",
17336            "german_gaap"
17337        ));
17338        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17339            "1200",
17340            "german_gaap"
17341        ));
17342        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17343            "2000",
17344            "german_gaap"
17345        ));
17346        assert!(EnhancedOrchestrator::is_balance_sheet_account(
17347            "3000",
17348            "german_gaap"
17349        ));
17350        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17351            "4000",
17352            "german_gaap"
17353        ));
17354        assert!(!EnhancedOrchestrator::is_balance_sheet_account(
17355            "6000",
17356            "german_gaap"
17357        ));
17358    }
17359
17360    #[test]
17361    fn period_trial_balance_into_canonical_account_type_is_framework_aware() {
17362        // Defect C regression test — every TB line was hard-coded
17363        // `account_type: Asset` regardless of the underlying code. With
17364        // the framework-aware classifier wired in, the same SKR codes
17365        // resolve to their proper sides.
17366        use datasynth_generators::TrialBalanceEntry;
17367        let entries = vec![
17368            TrialBalanceEntry {
17369                account_code: "0010".to_string(), // SKR fixed asset
17370                account_name: "Land".to_string(),
17371                category: "FixedAssets".to_string(),
17372                debit_balance: rust_decimal::Decimal::new(1_000_000, 0),
17373                credit_balance: rust_decimal::Decimal::ZERO,
17374            },
17375            TrialBalanceEntry {
17376                account_code: "3000".to_string(), // SKR liability
17377                account_name: "Trade payables".to_string(),
17378                category: "Payables".to_string(),
17379                debit_balance: rust_decimal::Decimal::ZERO,
17380                credit_balance: rust_decimal::Decimal::new(500_000, 0),
17381            },
17382            TrialBalanceEntry {
17383                account_code: "4000".to_string(), // SKR revenue
17384                account_name: "Sales".to_string(),
17385                category: "Revenue".to_string(),
17386                debit_balance: rust_decimal::Decimal::ZERO,
17387                credit_balance: rust_decimal::Decimal::new(2_000_000, 0),
17388            },
17389            TrialBalanceEntry {
17390                account_code: "6000".to_string(), // SKR expense
17391                account_name: "Personnel cost".to_string(),
17392                category: "OperatingExpenses".to_string(),
17393                debit_balance: rust_decimal::Decimal::new(800_000, 0),
17394                credit_balance: rust_decimal::Decimal::ZERO,
17395            },
17396        ];
17397        let ptb = PeriodTrialBalance {
17398            fiscal_year: 2024,
17399            fiscal_period: 12,
17400            period_start: chrono::NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
17401            period_end: chrono::NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
17402            entries,
17403            framework: "german_gaap".to_string(),
17404        };
17405        let tb = ptb.into_canonical("ACME_EU", "EUR");
17406        // Line account_types are no longer all-Asset.
17407        let types: Vec<AccountType> = tb.lines.iter().map(|l| l.account_type).collect();
17408        assert_eq!(types[0], AccountType::Asset, "0010 → Asset");
17409        assert_eq!(types[1], AccountType::Liability, "3000 → Liability");
17410        assert_eq!(types[2], AccountType::Revenue, "4000 → Revenue");
17411        assert_eq!(types[3], AccountType::Expense, "6000 → Expense");
17412        // is_balanced is now an unconditional truth claim — the
17413        // underlying JE-balance invariant is the only one we guarantee.
17414        assert!(tb.is_balanced);
17415        assert!(tb.is_equation_valid);
17416        assert_eq!(tb.out_of_balance, rust_decimal::Decimal::ZERO);
17417        assert_eq!(tb.equation_difference, rust_decimal::Decimal::ZERO);
17418    }
17419
17420    #[test]
17421    fn period_trial_balance_deserialises_legacy_snapshot_without_framework_field() {
17422        // Old in-memory snapshots (pre-v5.33) didn't carry the framework
17423        // field. Serde `#[serde(default)]` must let them round-trip with
17424        // a `"us_gaap"` fallback so older saved sessions keep working.
17425        let legacy_json = r#"{
17426            "fiscal_year": 2024,
17427            "fiscal_period": 12,
17428            "period_start": "2024-01-01",
17429            "period_end": "2024-12-31",
17430            "entries": []
17431        }"#;
17432        let ptb: PeriodTrialBalance = serde_json::from_str(legacy_json).unwrap();
17433        assert_eq!(ptb.framework, "us_gaap");
17434    }
17435}